Commit | Line | Data |
---|---|---|
3f2304f8 SG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * NVMe over Fabrics TCP host. | |
4 | * Copyright (c) 2018 Lightbits Labs. All rights reserved. | |
5 | */ | |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | #include <linux/module.h> | |
8 | #include <linux/init.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/err.h> | |
427fff9a | 11 | #include <linux/crc32.h> |
3f2304f8 | 12 | #include <linux/nvme-tcp.h> |
be8e82ca | 13 | #include <linux/nvme-keyring.h> |
3f2304f8 SG |
14 | #include <net/sock.h> |
15 | #include <net/tcp.h> | |
be8e82ca | 16 | #include <net/tls.h> |
2837966a | 17 | #include <net/tls_prot.h> |
be8e82ca | 18 | #include <net/handshake.h> |
3f2304f8 | 19 | #include <linux/blk-mq.h> |
1a9460ce | 20 | #include <net/busy_poll.h> |
40e0b090 | 21 | #include <trace/events/sock.h> |
3f2304f8 SG |
22 | |
23 | #include "nvme.h" | |
24 | #include "fabrics.h" | |
25 | ||
26 | struct nvme_tcp_queue; | |
27 | ||
9912ade3 WM |
28 | /* Define the socket priority to use for connections were it is desirable |
29 | * that the NIC consider performing optimized packet processing or filtering. | |
30 | * A non-zero value being sufficient to indicate general consideration of any | |
31 | * possible optimization. Making it a module param allows for alternative | |
32 | * values that may be unique for some NIC implementations. | |
33 | */ | |
34 | static int so_priority; | |
35 | module_param(so_priority, int, 0644); | |
36 | MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); | |
37 | ||
0c29f9fa LF |
38 | /* |
39 | * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity | |
40 | * from sysfs. | |
41 | */ | |
42 | static bool wq_unbound; | |
43 | module_param(wq_unbound, bool, 0644); | |
44 | MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)"); | |
45 | ||
be8e82ca HR |
46 | /* |
47 | * TLS handshake timeout | |
48 | */ | |
49 | static int tls_handshake_timeout = 10; | |
0e6c4fe7 | 50 | #ifdef CONFIG_NVME_TCP_TLS |
be8e82ca HR |
51 | module_param(tls_handshake_timeout, int, 0644); |
52 | MODULE_PARM_DESC(tls_handshake_timeout, | |
53 | "nvme TLS handshake timeout in seconds (default 10)"); | |
54 | #endif | |
55 | ||
32193789 SG |
56 | static atomic_t nvme_tcp_cpu_queues[NR_CPUS]; |
57 | ||
841aee4d CL |
58 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
59 | /* lockdep can detect a circular dependency of the form | |
60 | * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock | |
61 | * because dependencies are tracked for both nvme-tcp and user contexts. Using | |
62 | * a separate class prevents lockdep from conflating nvme-tcp socket use with | |
63 | * user-space socket API use. | |
64 | */ | |
65 | static struct lock_class_key nvme_tcp_sk_key[2]; | |
66 | static struct lock_class_key nvme_tcp_slock_key[2]; | |
67 | ||
68 | static void nvme_tcp_reclassify_socket(struct socket *sock) | |
69 | { | |
70 | struct sock *sk = sock->sk; | |
71 | ||
72 | if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) | |
73 | return; | |
74 | ||
75 | switch (sk->sk_family) { | |
76 | case AF_INET: | |
77 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", | |
78 | &nvme_tcp_slock_key[0], | |
79 | "sk_lock-AF_INET-NVME", | |
80 | &nvme_tcp_sk_key[0]); | |
81 | break; | |
82 | case AF_INET6: | |
83 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", | |
84 | &nvme_tcp_slock_key[1], | |
85 | "sk_lock-AF_INET6-NVME", | |
86 | &nvme_tcp_sk_key[1]); | |
87 | break; | |
88 | default: | |
89 | WARN_ON_ONCE(1); | |
90 | } | |
91 | } | |
92 | #else | |
93 | static void nvme_tcp_reclassify_socket(struct socket *sock) { } | |
94 | #endif | |
95 | ||
3f2304f8 SG |
96 | enum nvme_tcp_send_state { |
97 | NVME_TCP_SEND_CMD_PDU = 0, | |
98 | NVME_TCP_SEND_H2C_PDU, | |
99 | NVME_TCP_SEND_DATA, | |
100 | NVME_TCP_SEND_DDGST, | |
101 | }; | |
102 | ||
103 | struct nvme_tcp_request { | |
104 | struct nvme_request req; | |
105 | void *pdu; | |
106 | struct nvme_tcp_queue *queue; | |
107 | u32 data_len; | |
108 | u32 pdu_len; | |
109 | u32 pdu_sent; | |
c2700d28 VP |
110 | u32 h2cdata_left; |
111 | u32 h2cdata_offset; | |
3f2304f8 | 112 | u16 ttag; |
1ba2e507 | 113 | __le16 status; |
3f2304f8 | 114 | struct list_head entry; |
15ec928a | 115 | struct llist_node lentry; |
a7273d40 | 116 | __le32 ddgst; |
3f2304f8 SG |
117 | |
118 | struct bio *curr_bio; | |
119 | struct iov_iter iter; | |
120 | ||
121 | /* send state */ | |
122 | size_t offset; | |
123 | size_t data_sent; | |
124 | enum nvme_tcp_send_state state; | |
125 | }; | |
126 | ||
127 | enum nvme_tcp_queue_flags { | |
128 | NVME_TCP_Q_ALLOCATED = 0, | |
129 | NVME_TCP_Q_LIVE = 1, | |
72e5d757 | 130 | NVME_TCP_Q_POLLING = 2, |
32193789 | 131 | NVME_TCP_Q_IO_CPU_SET = 3, |
3f2304f8 SG |
132 | }; |
133 | ||
134 | enum nvme_tcp_recv_state { | |
135 | NVME_TCP_RECV_PDU = 0, | |
136 | NVME_TCP_RECV_DATA, | |
137 | NVME_TCP_RECV_DDGST, | |
138 | }; | |
139 | ||
140 | struct nvme_tcp_ctrl; | |
141 | struct nvme_tcp_queue { | |
142 | struct socket *sock; | |
143 | struct work_struct io_work; | |
144 | int io_cpu; | |
145 | ||
9ebbfe49 | 146 | struct mutex queue_lock; |
db5ad6b7 | 147 | struct mutex send_mutex; |
15ec928a | 148 | struct llist_head req_list; |
3f2304f8 SG |
149 | struct list_head send_list; |
150 | ||
151 | /* recv state */ | |
152 | void *pdu; | |
153 | int pdu_remaining; | |
154 | int pdu_offset; | |
155 | size_t data_remaining; | |
156 | size_t ddgst_remaining; | |
1a9460ce | 157 | unsigned int nr_cqe; |
3f2304f8 SG |
158 | |
159 | /* send state */ | |
160 | struct nvme_tcp_request *request; | |
161 | ||
c2700d28 | 162 | u32 maxh2cdata; |
3f2304f8 SG |
163 | size_t cmnd_capsule_len; |
164 | struct nvme_tcp_ctrl *ctrl; | |
165 | unsigned long flags; | |
166 | bool rd_enabled; | |
167 | ||
168 | bool hdr_digest; | |
169 | bool data_digest; | |
36389576 | 170 | bool tls_enabled; |
427fff9a EB |
171 | u32 rcv_crc; |
172 | u32 snd_crc; | |
3f2304f8 SG |
173 | __le32 exp_ddgst; |
174 | __le32 recv_ddgst; | |
be8e82ca HR |
175 | struct completion tls_complete; |
176 | int tls_err; | |
3f2304f8 SG |
177 | struct page_frag_cache pf_cache; |
178 | ||
179 | void (*state_change)(struct sock *); | |
180 | void (*data_ready)(struct sock *); | |
181 | void (*write_space)(struct sock *); | |
182 | }; | |
183 | ||
184 | struct nvme_tcp_ctrl { | |
185 | /* read only in the hot path */ | |
186 | struct nvme_tcp_queue *queues; | |
187 | struct blk_mq_tag_set tag_set; | |
188 | ||
189 | /* other member variables */ | |
190 | struct list_head list; | |
191 | struct blk_mq_tag_set admin_tag_set; | |
192 | struct sockaddr_storage addr; | |
193 | struct sockaddr_storage src_addr; | |
194 | struct nvme_ctrl ctrl; | |
195 | ||
196 | struct work_struct err_work; | |
197 | struct delayed_work connect_work; | |
198 | struct nvme_tcp_request async_req; | |
64861993 | 199 | u32 io_queues[HCTX_MAX_TYPES]; |
3f2304f8 SG |
200 | }; |
201 | ||
202 | static LIST_HEAD(nvme_tcp_ctrl_list); | |
203 | static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); | |
204 | static struct workqueue_struct *nvme_tcp_wq; | |
6acbd961 RF |
205 | static const struct blk_mq_ops nvme_tcp_mq_ops; |
206 | static const struct blk_mq_ops nvme_tcp_admin_mq_ops; | |
db5ad6b7 | 207 | static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); |
3f2304f8 SG |
208 | |
209 | static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) | |
210 | { | |
211 | return container_of(ctrl, struct nvme_tcp_ctrl, ctrl); | |
212 | } | |
213 | ||
214 | static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) | |
215 | { | |
216 | return queue - queue->ctrl->queues; | |
217 | } | |
218 | ||
ad95bab0 ML |
219 | static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) |
220 | { | |
221 | switch (type) { | |
222 | case nvme_tcp_c2h_term: | |
223 | case nvme_tcp_c2h_data: | |
224 | case nvme_tcp_r2t: | |
225 | case nvme_tcp_rsp: | |
226 | return true; | |
227 | default: | |
228 | return false; | |
229 | } | |
230 | } | |
231 | ||
36389576 HR |
232 | /* |
233 | * Check if the queue is TLS encrypted | |
234 | */ | |
235 | static inline bool nvme_tcp_queue_tls(struct nvme_tcp_queue *queue) | |
236 | { | |
237 | if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) | |
238 | return 0; | |
239 | ||
240 | return queue->tls_enabled; | |
241 | } | |
242 | ||
243 | /* | |
244 | * Check if TLS is configured for the controller. | |
245 | */ | |
246 | static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) | |
0e6c4fe7 AB |
247 | { |
248 | if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) | |
249 | return 0; | |
250 | ||
e88a7595 | 251 | return ctrl->opts->tls || ctrl->opts->concat; |
0e6c4fe7 AB |
252 | } |
253 | ||
3f2304f8 SG |
254 | static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) |
255 | { | |
256 | u32 queue_idx = nvme_tcp_queue_id(queue); | |
257 | ||
258 | if (queue_idx == 0) | |
259 | return queue->ctrl->admin_tag_set.tags[queue_idx]; | |
260 | return queue->ctrl->tag_set.tags[queue_idx - 1]; | |
261 | } | |
262 | ||
263 | static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue) | |
264 | { | |
265 | return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
266 | } | |
267 | ||
268 | static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue) | |
269 | { | |
270 | return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
271 | } | |
272 | ||
a3406352 SG |
273 | static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req) |
274 | { | |
275 | return req->pdu; | |
276 | } | |
277 | ||
278 | static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req) | |
279 | { | |
280 | /* use the pdu space in the back for the data pdu */ | |
281 | return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) - | |
282 | sizeof(struct nvme_tcp_data_pdu); | |
283 | } | |
284 | ||
53ee9e29 | 285 | static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req) |
3f2304f8 | 286 | { |
53ee9e29 CS |
287 | if (nvme_is_fabrics(req->req.cmd)) |
288 | return NVME_TCP_ADMIN_CCSZ; | |
289 | return req->queue->cmnd_capsule_len - sizeof(struct nvme_command); | |
3f2304f8 SG |
290 | } |
291 | ||
292 | static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) | |
293 | { | |
294 | return req == &req->queue->ctrl->async_req; | |
295 | } | |
296 | ||
297 | static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) | |
298 | { | |
299 | struct request *rq; | |
3f2304f8 SG |
300 | |
301 | if (unlikely(nvme_tcp_async_req(req))) | |
302 | return false; /* async events don't have a request */ | |
303 | ||
304 | rq = blk_mq_rq_from_pdu(req); | |
3f2304f8 | 305 | |
25e5cb78 | 306 | return rq_data_dir(rq) == WRITE && req->data_len && |
53ee9e29 | 307 | req->data_len <= nvme_tcp_inline_data_size(req); |
3f2304f8 SG |
308 | } |
309 | ||
310 | static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) | |
311 | { | |
312 | return req->iter.bvec->bv_page; | |
313 | } | |
314 | ||
315 | static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) | |
316 | { | |
317 | return req->iter.bvec->bv_offset + req->iter.iov_offset; | |
318 | } | |
319 | ||
320 | static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) | |
321 | { | |
ca1ff67d | 322 | return min_t(size_t, iov_iter_single_seg_count(&req->iter), |
3f2304f8 SG |
323 | req->pdu_len - req->pdu_sent); |
324 | } | |
325 | ||
3f2304f8 SG |
326 | static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req) |
327 | { | |
328 | return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ? | |
329 | req->pdu_len - req->pdu_sent : 0; | |
330 | } | |
331 | ||
332 | static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, | |
333 | int len) | |
334 | { | |
335 | return nvme_tcp_pdu_data_left(req) <= len; | |
336 | } | |
337 | ||
338 | static void nvme_tcp_init_iter(struct nvme_tcp_request *req, | |
339 | unsigned int dir) | |
340 | { | |
341 | struct request *rq = blk_mq_rq_from_pdu(req); | |
342 | struct bio_vec *vec; | |
343 | unsigned int size; | |
0dc9edaf | 344 | int nr_bvec; |
3f2304f8 SG |
345 | size_t offset; |
346 | ||
347 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { | |
348 | vec = &rq->special_vec; | |
0dc9edaf | 349 | nr_bvec = 1; |
3f2304f8 SG |
350 | size = blk_rq_payload_bytes(rq); |
351 | offset = 0; | |
352 | } else { | |
353 | struct bio *bio = req->curr_bio; | |
0dc9edaf SG |
354 | struct bvec_iter bi; |
355 | struct bio_vec bv; | |
3f2304f8 SG |
356 | |
357 | vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); | |
0dc9edaf SG |
358 | nr_bvec = 0; |
359 | bio_for_each_bvec(bv, bio, bi) { | |
360 | nr_bvec++; | |
361 | } | |
3f2304f8 SG |
362 | size = bio->bi_iter.bi_size; |
363 | offset = bio->bi_iter.bi_bvec_done; | |
364 | } | |
365 | ||
0dc9edaf | 366 | iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size); |
3f2304f8 SG |
367 | req->iter.iov_offset = offset; |
368 | } | |
369 | ||
370 | static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, | |
371 | int len) | |
372 | { | |
373 | req->data_sent += len; | |
374 | req->pdu_sent += len; | |
375 | iov_iter_advance(&req->iter, len); | |
376 | if (!iov_iter_count(&req->iter) && | |
377 | req->data_sent < req->data_len) { | |
378 | req->curr_bio = req->curr_bio->bi_next; | |
de4eda9d | 379 | nvme_tcp_init_iter(req, ITER_SOURCE); |
3f2304f8 SG |
380 | } |
381 | } | |
382 | ||
5c11f7d9 SG |
383 | static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) |
384 | { | |
385 | int ret; | |
386 | ||
387 | /* drain the send queue as much as we can... */ | |
388 | do { | |
389 | ret = nvme_tcp_try_send(queue); | |
390 | } while (ret > 0); | |
391 | } | |
392 | ||
50abcc17 | 393 | static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue) |
70f437fb KB |
394 | { |
395 | return !list_empty(&queue->send_list) || | |
3770a42b | 396 | !llist_empty(&queue->req_list); |
70f437fb KB |
397 | } |
398 | ||
50abcc17 HR |
399 | static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) |
400 | { | |
36389576 | 401 | return !nvme_tcp_queue_tls(queue) && |
50abcc17 HR |
402 | nvme_tcp_queue_has_pending(queue); |
403 | } | |
404 | ||
db5ad6b7 | 405 | static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, |
674f872b | 406 | bool last) |
3f2304f8 SG |
407 | { |
408 | struct nvme_tcp_queue *queue = req->queue; | |
db5ad6b7 | 409 | bool empty; |
3f2304f8 | 410 | |
15ec928a SG |
411 | empty = llist_add(&req->lentry, &queue->req_list) && |
412 | list_empty(&queue->send_list) && !queue->request; | |
3f2304f8 | 413 | |
db5ad6b7 SG |
414 | /* |
415 | * if we're the first on the send_list and we can try to send | |
416 | * directly, otherwise queue io_work. Also, only do that if we | |
417 | * are on the same cpu, so we don't introduce contention. | |
418 | */ | |
bb833370 | 419 | if (queue->io_cpu == raw_smp_processor_id() && |
674f872b | 420 | empty && mutex_trylock(&queue->send_mutex)) { |
5c11f7d9 | 421 | nvme_tcp_send_all(queue); |
db5ad6b7 | 422 | mutex_unlock(&queue->send_mutex); |
db5ad6b7 | 423 | } |
70f437fb | 424 | |
50abcc17 | 425 | if (last && nvme_tcp_queue_has_pending(queue)) |
70f437fb | 426 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); |
3f2304f8 SG |
427 | } |
428 | ||
15ec928a SG |
429 | static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) |
430 | { | |
431 | struct nvme_tcp_request *req; | |
432 | struct llist_node *node; | |
433 | ||
434 | for (node = llist_del_all(&queue->req_list); node; node = node->next) { | |
435 | req = llist_entry(node, struct nvme_tcp_request, lentry); | |
436 | list_add(&req->entry, &queue->send_list); | |
437 | } | |
438 | } | |
439 | ||
3f2304f8 SG |
440 | static inline struct nvme_tcp_request * |
441 | nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) | |
442 | { | |
443 | struct nvme_tcp_request *req; | |
444 | ||
3f2304f8 SG |
445 | req = list_first_entry_or_null(&queue->send_list, |
446 | struct nvme_tcp_request, entry); | |
15ec928a SG |
447 | if (!req) { |
448 | nvme_tcp_process_req_list(queue); | |
449 | req = list_first_entry_or_null(&queue->send_list, | |
450 | struct nvme_tcp_request, entry); | |
451 | if (unlikely(!req)) | |
452 | return NULL; | |
453 | } | |
3f2304f8 | 454 | |
0bf04c87 HR |
455 | list_del_init(&req->entry); |
456 | init_llist_node(&req->lentry); | |
3f2304f8 SG |
457 | return req; |
458 | } | |
459 | ||
427fff9a EB |
460 | #define NVME_TCP_CRC_SEED (~0) |
461 | ||
462 | static inline void nvme_tcp_ddgst_update(u32 *crcp, | |
463 | struct page *page, size_t off, size_t len) | |
3f2304f8 | 464 | { |
427fff9a EB |
465 | page += off / PAGE_SIZE; |
466 | off %= PAGE_SIZE; | |
467 | while (len) { | |
468 | const void *vaddr = kmap_local_page(page); | |
469 | size_t n = min(len, (size_t)PAGE_SIZE - off); | |
470 | ||
471 | *crcp = crc32c(*crcp, vaddr + off, n); | |
472 | kunmap_local(vaddr); | |
473 | page++; | |
474 | off = 0; | |
475 | len -= n; | |
476 | } | |
3f2304f8 SG |
477 | } |
478 | ||
427fff9a | 479 | static inline __le32 nvme_tcp_ddgst_final(u32 crc) |
3f2304f8 | 480 | { |
427fff9a | 481 | return cpu_to_le32(~crc); |
3f2304f8 SG |
482 | } |
483 | ||
427fff9a | 484 | static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len) |
3f2304f8 | 485 | { |
427fff9a EB |
486 | return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len)); |
487 | } | |
3f2304f8 | 488 | |
427fff9a EB |
489 | static inline void nvme_tcp_set_hdgst(void *pdu, size_t len) |
490 | { | |
491 | *(__le32 *)(pdu + len) = nvme_tcp_hdgst(pdu, len); | |
3f2304f8 SG |
492 | } |
493 | ||
494 | static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, | |
495 | void *pdu, size_t pdu_len) | |
496 | { | |
497 | struct nvme_tcp_hdr *hdr = pdu; | |
498 | __le32 recv_digest; | |
499 | __le32 exp_digest; | |
500 | ||
501 | if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { | |
502 | dev_err(queue->ctrl->ctrl.device, | |
503 | "queue %d: header digest flag is cleared\n", | |
504 | nvme_tcp_queue_id(queue)); | |
505 | return -EPROTO; | |
506 | } | |
507 | ||
508 | recv_digest = *(__le32 *)(pdu + hdr->hlen); | |
427fff9a | 509 | exp_digest = nvme_tcp_hdgst(pdu, pdu_len); |
3f2304f8 SG |
510 | if (recv_digest != exp_digest) { |
511 | dev_err(queue->ctrl->ctrl.device, | |
512 | "header digest error: recv %#x expected %#x\n", | |
513 | le32_to_cpu(recv_digest), le32_to_cpu(exp_digest)); | |
514 | return -EIO; | |
515 | } | |
516 | ||
517 | return 0; | |
518 | } | |
519 | ||
520 | static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) | |
521 | { | |
522 | struct nvme_tcp_hdr *hdr = pdu; | |
523 | u8 digest_len = nvme_tcp_hdgst_len(queue); | |
524 | u32 len; | |
525 | ||
526 | len = le32_to_cpu(hdr->plen) - hdr->hlen - | |
527 | ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0); | |
528 | ||
529 | if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { | |
530 | dev_err(queue->ctrl->ctrl.device, | |
531 | "queue %d: data digest flag is cleared\n", | |
532 | nvme_tcp_queue_id(queue)); | |
533 | return -EPROTO; | |
534 | } | |
427fff9a | 535 | queue->rcv_crc = NVME_TCP_CRC_SEED; |
3f2304f8 SG |
536 | |
537 | return 0; | |
538 | } | |
539 | ||
540 | static void nvme_tcp_exit_request(struct blk_mq_tag_set *set, | |
541 | struct request *rq, unsigned int hctx_idx) | |
542 | { | |
543 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
544 | ||
545 | page_frag_free(req->pdu); | |
546 | } | |
547 | ||
548 | static int nvme_tcp_init_request(struct blk_mq_tag_set *set, | |
549 | struct request *rq, unsigned int hctx_idx, | |
550 | unsigned int numa_node) | |
551 | { | |
06427ca0 | 552 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); |
3f2304f8 | 553 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
f4b9e6c9 | 554 | struct nvme_tcp_cmd_pdu *pdu; |
3f2304f8 SG |
555 | int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; |
556 | struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; | |
557 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
558 | ||
559 | req->pdu = page_frag_alloc(&queue->pf_cache, | |
560 | sizeof(struct nvme_tcp_cmd_pdu) + hdgst, | |
561 | GFP_KERNEL | __GFP_ZERO); | |
562 | if (!req->pdu) | |
563 | return -ENOMEM; | |
564 | ||
f4b9e6c9 | 565 | pdu = req->pdu; |
3f2304f8 SG |
566 | req->queue = queue; |
567 | nvme_req(rq)->ctrl = &ctrl->ctrl; | |
f4b9e6c9 | 568 | nvme_req(rq)->cmd = &pdu->cmd; |
0bf04c87 HR |
569 | init_llist_node(&req->lentry); |
570 | INIT_LIST_HEAD(&req->entry); | |
3f2304f8 SG |
571 | |
572 | return 0; | |
573 | } | |
574 | ||
575 | static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | |
576 | unsigned int hctx_idx) | |
577 | { | |
06427ca0 | 578 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); |
3f2304f8 SG |
579 | struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; |
580 | ||
581 | hctx->driver_data = queue; | |
582 | return 0; | |
583 | } | |
584 | ||
585 | static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, | |
586 | unsigned int hctx_idx) | |
587 | { | |
06427ca0 | 588 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); |
3f2304f8 SG |
589 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; |
590 | ||
591 | hctx->driver_data = queue; | |
592 | return 0; | |
593 | } | |
594 | ||
595 | static enum nvme_tcp_recv_state | |
596 | nvme_tcp_recv_state(struct nvme_tcp_queue *queue) | |
597 | { | |
598 | return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU : | |
599 | (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST : | |
600 | NVME_TCP_RECV_DATA; | |
601 | } | |
602 | ||
603 | static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) | |
604 | { | |
605 | queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) + | |
606 | nvme_tcp_hdgst_len(queue); | |
607 | queue->pdu_offset = 0; | |
608 | queue->data_remaining = -1; | |
609 | queue->ddgst_remaining = 0; | |
610 | } | |
611 | ||
612 | static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) | |
613 | { | |
614 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) | |
615 | return; | |
616 | ||
236187c4 | 617 | dev_warn(ctrl->device, "starting error recovery\n"); |
97b2512a | 618 | queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); |
3f2304f8 SG |
619 | } |
620 | ||
621 | static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, | |
622 | struct nvme_completion *cqe) | |
623 | { | |
1ba2e507 | 624 | struct nvme_tcp_request *req; |
3f2304f8 SG |
625 | struct request *rq; |
626 | ||
e7006de6 | 627 | rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); |
3f2304f8 SG |
628 | if (!rq) { |
629 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
630 | "got bad cqe.command_id %#x on queue %d\n", |
631 | cqe->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
632 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); |
633 | return -EINVAL; | |
634 | } | |
635 | ||
1ba2e507 DW |
636 | req = blk_mq_rq_to_pdu(rq); |
637 | if (req->status == cpu_to_le16(NVME_SC_SUCCESS)) | |
638 | req->status = cqe->status; | |
639 | ||
640 | if (!nvme_try_complete_req(rq, req->status, cqe->result)) | |
ff029451 | 641 | nvme_complete_rq(rq); |
1a9460ce | 642 | queue->nr_cqe++; |
3f2304f8 SG |
643 | |
644 | return 0; | |
645 | } | |
646 | ||
647 | static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, | |
648 | struct nvme_tcp_data_pdu *pdu) | |
649 | { | |
650 | struct request *rq; | |
651 | ||
e7006de6 | 652 | rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3f2304f8 SG |
653 | if (!rq) { |
654 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
655 | "got bad c2hdata.command_id %#x on queue %d\n", |
656 | pdu->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
657 | return -ENOENT; |
658 | } | |
659 | ||
660 | if (!blk_rq_payload_bytes(rq)) { | |
661 | dev_err(queue->ctrl->ctrl.device, | |
662 | "queue %d tag %#x unexpected data\n", | |
663 | nvme_tcp_queue_id(queue), rq->tag); | |
664 | return -EIO; | |
665 | } | |
666 | ||
667 | queue->data_remaining = le32_to_cpu(pdu->data_length); | |
668 | ||
602d674c SG |
669 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS && |
670 | unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) { | |
671 | dev_err(queue->ctrl->ctrl.device, | |
672 | "queue %d tag %#x SUCCESS set but not last PDU\n", | |
673 | nvme_tcp_queue_id(queue), rq->tag); | |
674 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); | |
675 | return -EPROTO; | |
676 | } | |
677 | ||
3f2304f8 | 678 | return 0; |
3f2304f8 SG |
679 | } |
680 | ||
681 | static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, | |
682 | struct nvme_tcp_rsp_pdu *pdu) | |
683 | { | |
684 | struct nvme_completion *cqe = &pdu->cqe; | |
685 | int ret = 0; | |
686 | ||
687 | /* | |
688 | * AEN requests are special as they don't time out and can | |
689 | * survive any kind of queue freeze and often don't respond to | |
690 | * aborts. We don't even bother to allocate a struct request | |
691 | * for them but rather special case them here. | |
692 | */ | |
58a8df67 IR |
693 | if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue), |
694 | cqe->command_id))) | |
3f2304f8 SG |
695 | nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, |
696 | &cqe->result); | |
697 | else | |
698 | ret = nvme_tcp_process_nvme_cqe(queue, cqe); | |
699 | ||
700 | return ret; | |
701 | } | |
702 | ||
c2700d28 | 703 | static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) |
3f2304f8 | 704 | { |
a3406352 | 705 | struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req); |
3f2304f8 SG |
706 | struct nvme_tcp_queue *queue = req->queue; |
707 | struct request *rq = blk_mq_rq_from_pdu(req); | |
c2700d28 | 708 | u32 h2cdata_sent = req->pdu_len; |
3f2304f8 SG |
709 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
710 | u8 ddgst = nvme_tcp_ddgst_len(queue); | |
711 | ||
1d3ef9c3 VP |
712 | req->state = NVME_TCP_SEND_H2C_PDU; |
713 | req->offset = 0; | |
c2700d28 | 714 | req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); |
3f2304f8 | 715 | req->pdu_sent = 0; |
c2700d28 VP |
716 | req->h2cdata_left -= req->pdu_len; |
717 | req->h2cdata_offset += h2cdata_sent; | |
3f2304f8 | 718 | |
3f2304f8 SG |
719 | memset(data, 0, sizeof(*data)); |
720 | data->hdr.type = nvme_tcp_h2c_data; | |
c2700d28 VP |
721 | if (!req->h2cdata_left) |
722 | data->hdr.flags = NVME_TCP_F_DATA_LAST; | |
3f2304f8 SG |
723 | if (queue->hdr_digest) |
724 | data->hdr.flags |= NVME_TCP_F_HDGST; | |
725 | if (queue->data_digest) | |
726 | data->hdr.flags |= NVME_TCP_F_DDGST; | |
727 | data->hdr.hlen = sizeof(*data); | |
728 | data->hdr.pdo = data->hdr.hlen + hdgst; | |
729 | data->hdr.plen = | |
730 | cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); | |
c2700d28 | 731 | data->ttag = req->ttag; |
e7006de6 | 732 | data->command_id = nvme_cid(rq); |
c2700d28 | 733 | data->data_offset = cpu_to_le32(req->h2cdata_offset); |
3f2304f8 | 734 | data->data_length = cpu_to_le32(req->pdu_len); |
3f2304f8 SG |
735 | } |
736 | ||
737 | static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, | |
738 | struct nvme_tcp_r2t_pdu *pdu) | |
739 | { | |
740 | struct nvme_tcp_request *req; | |
741 | struct request *rq; | |
1d3ef9c3 | 742 | u32 r2t_length = le32_to_cpu(pdu->r2t_length); |
c2700d28 | 743 | u32 r2t_offset = le32_to_cpu(pdu->r2t_offset); |
3f2304f8 | 744 | |
e7006de6 | 745 | rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3f2304f8 SG |
746 | if (!rq) { |
747 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
748 | "got bad r2t.command_id %#x on queue %d\n", |
749 | pdu->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
750 | return -ENOENT; |
751 | } | |
752 | req = blk_mq_rq_to_pdu(rq); | |
753 | ||
1d3ef9c3 VP |
754 | if (unlikely(!r2t_length)) { |
755 | dev_err(queue->ctrl->ctrl.device, | |
756 | "req %d r2t len is %u, probably a bug...\n", | |
757 | rq->tag, r2t_length); | |
758 | return -EPROTO; | |
759 | } | |
3f2304f8 | 760 | |
1d3ef9c3 VP |
761 | if (unlikely(req->data_sent + r2t_length > req->data_len)) { |
762 | dev_err(queue->ctrl->ctrl.device, | |
763 | "req %d r2t len %u exceeded data len %u (%zu sent)\n", | |
764 | rq->tag, r2t_length, req->data_len, req->data_sent); | |
765 | return -EPROTO; | |
766 | } | |
767 | ||
c2700d28 | 768 | if (unlikely(r2t_offset < req->data_sent)) { |
1d3ef9c3 VP |
769 | dev_err(queue->ctrl->ctrl.device, |
770 | "req %d unexpected r2t offset %u (expected %zu)\n", | |
c2700d28 | 771 | rq->tag, r2t_offset, req->data_sent); |
1d3ef9c3 VP |
772 | return -EPROTO; |
773 | } | |
3f2304f8 | 774 | |
0bf04c87 HR |
775 | if (llist_on_list(&req->lentry) || |
776 | !list_empty(&req->entry)) { | |
777 | dev_err(queue->ctrl->ctrl.device, | |
778 | "req %d unexpected r2t while processing request\n", | |
779 | rq->tag); | |
780 | return -EPROTO; | |
781 | } | |
782 | ||
c2700d28 VP |
783 | req->pdu_len = 0; |
784 | req->h2cdata_left = r2t_length; | |
785 | req->h2cdata_offset = r2t_offset; | |
786 | req->ttag = pdu->ttag; | |
787 | ||
788 | nvme_tcp_setup_h2c_data_pdu(req); | |
674f872b HR |
789 | |
790 | llist_add(&req->lentry, &queue->req_list); | |
791 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
3f2304f8 SG |
792 | |
793 | return 0; | |
794 | } | |
795 | ||
84e00904 ML |
796 | static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, |
797 | struct nvme_tcp_term_pdu *pdu) | |
798 | { | |
799 | u16 fes; | |
800 | const char *msg; | |
801 | u32 plen = le32_to_cpu(pdu->hdr.plen); | |
802 | ||
803 | static const char * const msg_table[] = { | |
804 | [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field", | |
805 | [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", | |
806 | [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", | |
807 | [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", | |
afb41b08 | 808 | [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", |
84e00904 ML |
809 | [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", |
810 | }; | |
811 | ||
812 | if (plen < NVME_TCP_MIN_C2HTERM_PLEN || | |
813 | plen > NVME_TCP_MAX_C2HTERM_PLEN) { | |
814 | dev_err(queue->ctrl->ctrl.device, | |
815 | "Received a malformed C2HTermReq PDU (plen = %u)\n", | |
816 | plen); | |
817 | return; | |
818 | } | |
819 | ||
820 | fes = le16_to_cpu(pdu->fes); | |
821 | if (fes && fes < ARRAY_SIZE(msg_table)) | |
822 | msg = msg_table[fes]; | |
823 | else | |
824 | msg = "Unknown"; | |
825 | ||
826 | dev_err(queue->ctrl->ctrl.device, | |
827 | "Received C2HTermReq (FES = %s)\n", msg); | |
828 | } | |
829 | ||
3f2304f8 SG |
830 | static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, |
831 | unsigned int *offset, size_t *len) | |
832 | { | |
833 | struct nvme_tcp_hdr *hdr; | |
834 | char *pdu = queue->pdu; | |
835 | size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining); | |
836 | int ret; | |
837 | ||
838 | ret = skb_copy_bits(skb, *offset, | |
839 | &pdu[queue->pdu_offset], rcv_len); | |
840 | if (unlikely(ret)) | |
841 | return ret; | |
842 | ||
843 | queue->pdu_remaining -= rcv_len; | |
844 | queue->pdu_offset += rcv_len; | |
845 | *offset += rcv_len; | |
846 | *len -= rcv_len; | |
847 | if (queue->pdu_remaining) | |
848 | return 0; | |
849 | ||
850 | hdr = queue->pdu; | |
ad95bab0 ML |
851 | if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { |
852 | if (!nvme_tcp_recv_pdu_supported(hdr->type)) | |
853 | goto unsupported_pdu; | |
854 | ||
855 | dev_err(queue->ctrl->ctrl.device, | |
856 | "pdu type %d has unexpected header length (%d)\n", | |
857 | hdr->type, hdr->hlen); | |
858 | return -EPROTO; | |
859 | } | |
860 | ||
84e00904 ML |
861 | if (unlikely(hdr->type == nvme_tcp_c2h_term)) { |
862 | /* | |
863 | * C2HTermReq never includes Header or Data digests. | |
864 | * Skip the checks. | |
865 | */ | |
866 | nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu); | |
867 | return -EINVAL; | |
868 | } | |
869 | ||
3f2304f8 SG |
870 | if (queue->hdr_digest) { |
871 | ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); | |
872 | if (unlikely(ret)) | |
873 | return ret; | |
874 | } | |
875 | ||
876 | ||
877 | if (queue->data_digest) { | |
878 | ret = nvme_tcp_check_ddgst(queue, queue->pdu); | |
879 | if (unlikely(ret)) | |
880 | return ret; | |
881 | } | |
882 | ||
883 | switch (hdr->type) { | |
884 | case nvme_tcp_c2h_data: | |
6be18260 | 885 | return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu); |
3f2304f8 SG |
886 | case nvme_tcp_rsp: |
887 | nvme_tcp_init_recv_ctx(queue); | |
6be18260 | 888 | return nvme_tcp_handle_comp(queue, (void *)queue->pdu); |
3f2304f8 SG |
889 | case nvme_tcp_r2t: |
890 | nvme_tcp_init_recv_ctx(queue); | |
6be18260 | 891 | return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); |
3f2304f8 | 892 | default: |
ad95bab0 | 893 | goto unsupported_pdu; |
3f2304f8 | 894 | } |
ad95bab0 ML |
895 | |
896 | unsupported_pdu: | |
897 | dev_err(queue->ctrl->ctrl.device, | |
898 | "unsupported pdu type (%d)\n", hdr->type); | |
899 | return -EINVAL; | |
3f2304f8 SG |
900 | } |
901 | ||
988aef9e | 902 | static inline void nvme_tcp_end_request(struct request *rq, u16 status) |
602d674c SG |
903 | { |
904 | union nvme_result res = {}; | |
905 | ||
2eb81a33 | 906 | if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res)) |
ff029451 | 907 | nvme_complete_rq(rq); |
602d674c SG |
908 | } |
909 | ||
3f2304f8 SG |
910 | static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, |
911 | unsigned int *offset, size_t *len) | |
912 | { | |
913 | struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; | |
3b01a9d0 | 914 | struct request *rq = |
e7006de6 | 915 | nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3b01a9d0 | 916 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
3f2304f8 SG |
917 | |
918 | while (true) { | |
919 | int recv_len, ret; | |
920 | ||
921 | recv_len = min_t(size_t, *len, queue->data_remaining); | |
922 | if (!recv_len) | |
923 | break; | |
924 | ||
925 | if (!iov_iter_count(&req->iter)) { | |
926 | req->curr_bio = req->curr_bio->bi_next; | |
927 | ||
928 | /* | |
929 | * If we don`t have any bios it means that controller | |
930 | * sent more data than we requested, hence error | |
931 | */ | |
932 | if (!req->curr_bio) { | |
933 | dev_err(queue->ctrl->ctrl.device, | |
934 | "queue %d no space in request %#x", | |
935 | nvme_tcp_queue_id(queue), rq->tag); | |
936 | nvme_tcp_init_recv_ctx(queue); | |
937 | return -EIO; | |
938 | } | |
de4eda9d | 939 | nvme_tcp_init_iter(req, ITER_DEST); |
3f2304f8 SG |
940 | } |
941 | ||
942 | /* we can read only from what is left in this bio */ | |
943 | recv_len = min_t(size_t, recv_len, | |
944 | iov_iter_count(&req->iter)); | |
945 | ||
946 | if (queue->data_digest) | |
427fff9a EB |
947 | ret = skb_copy_and_crc32c_datagram_iter(skb, *offset, |
948 | &req->iter, recv_len, &queue->rcv_crc); | |
3f2304f8 SG |
949 | else |
950 | ret = skb_copy_datagram_iter(skb, *offset, | |
951 | &req->iter, recv_len); | |
952 | if (ret) { | |
953 | dev_err(queue->ctrl->ctrl.device, | |
954 | "queue %d failed to copy request %#x data", | |
955 | nvme_tcp_queue_id(queue), rq->tag); | |
956 | return ret; | |
957 | } | |
958 | ||
959 | *len -= recv_len; | |
960 | *offset += recv_len; | |
961 | queue->data_remaining -= recv_len; | |
962 | } | |
963 | ||
964 | if (!queue->data_remaining) { | |
965 | if (queue->data_digest) { | |
427fff9a | 966 | queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc); |
3f2304f8 SG |
967 | queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; |
968 | } else { | |
1a9460ce | 969 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { |
1ba2e507 DW |
970 | nvme_tcp_end_request(rq, |
971 | le16_to_cpu(req->status)); | |
1a9460ce SG |
972 | queue->nr_cqe++; |
973 | } | |
3f2304f8 SG |
974 | nvme_tcp_init_recv_ctx(queue); |
975 | } | |
976 | } | |
977 | ||
978 | return 0; | |
979 | } | |
980 | ||
981 | static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, | |
982 | struct sk_buff *skb, unsigned int *offset, size_t *len) | |
983 | { | |
602d674c | 984 | struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; |
3f2304f8 SG |
985 | char *ddgst = (char *)&queue->recv_ddgst; |
986 | size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); | |
987 | off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; | |
988 | int ret; | |
989 | ||
990 | ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); | |
991 | if (unlikely(ret)) | |
992 | return ret; | |
993 | ||
994 | queue->ddgst_remaining -= recv_len; | |
995 | *offset += recv_len; | |
996 | *len -= recv_len; | |
997 | if (queue->ddgst_remaining) | |
998 | return 0; | |
999 | ||
1000 | if (queue->recv_ddgst != queue->exp_ddgst) { | |
1ba2e507 DW |
1001 | struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), |
1002 | pdu->command_id); | |
1003 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
1004 | ||
1005 | req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); | |
1006 | ||
3f2304f8 SG |
1007 | dev_err(queue->ctrl->ctrl.device, |
1008 | "data digest error: recv %#x expected %#x\n", | |
1009 | le32_to_cpu(queue->recv_ddgst), | |
1010 | le32_to_cpu(queue->exp_ddgst)); | |
3f2304f8 SG |
1011 | } |
1012 | ||
602d674c | 1013 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { |
e7006de6 SG |
1014 | struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), |
1015 | pdu->command_id); | |
1ba2e507 | 1016 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
602d674c | 1017 | |
1ba2e507 | 1018 | nvme_tcp_end_request(rq, le16_to_cpu(req->status)); |
1a9460ce | 1019 | queue->nr_cqe++; |
602d674c SG |
1020 | } |
1021 | ||
3f2304f8 SG |
1022 | nvme_tcp_init_recv_ctx(queue); |
1023 | return 0; | |
1024 | } | |
1025 | ||
1026 | static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, | |
1027 | unsigned int offset, size_t len) | |
1028 | { | |
1029 | struct nvme_tcp_queue *queue = desc->arg.data; | |
1030 | size_t consumed = len; | |
1031 | int result; | |
1032 | ||
aeacfcef CL |
1033 | if (unlikely(!queue->rd_enabled)) |
1034 | return -EFAULT; | |
1035 | ||
3f2304f8 SG |
1036 | while (len) { |
1037 | switch (nvme_tcp_recv_state(queue)) { | |
1038 | case NVME_TCP_RECV_PDU: | |
1039 | result = nvme_tcp_recv_pdu(queue, skb, &offset, &len); | |
1040 | break; | |
1041 | case NVME_TCP_RECV_DATA: | |
1042 | result = nvme_tcp_recv_data(queue, skb, &offset, &len); | |
1043 | break; | |
1044 | case NVME_TCP_RECV_DDGST: | |
1045 | result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len); | |
1046 | break; | |
1047 | default: | |
1048 | result = -EFAULT; | |
1049 | } | |
1050 | if (result) { | |
1051 | dev_err(queue->ctrl->ctrl.device, | |
1052 | "receive failed: %d\n", result); | |
1053 | queue->rd_enabled = false; | |
1054 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); | |
1055 | return result; | |
1056 | } | |
1057 | } | |
1058 | ||
1059 | return consumed; | |
1060 | } | |
1061 | ||
1062 | static void nvme_tcp_data_ready(struct sock *sk) | |
1063 | { | |
1064 | struct nvme_tcp_queue *queue; | |
1065 | ||
40e0b090 PY |
1066 | trace_sk_data_ready(sk); |
1067 | ||
386e5e6e | 1068 | read_lock_bh(&sk->sk_callback_lock); |
3f2304f8 | 1069 | queue = sk->sk_user_data; |
72e5d757 SG |
1070 | if (likely(queue && queue->rd_enabled) && |
1071 | !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) | |
3f2304f8 | 1072 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); |
386e5e6e | 1073 | read_unlock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
1074 | } |
1075 | ||
1076 | static void nvme_tcp_write_space(struct sock *sk) | |
1077 | { | |
1078 | struct nvme_tcp_queue *queue; | |
1079 | ||
1080 | read_lock_bh(&sk->sk_callback_lock); | |
1081 | queue = sk->sk_user_data; | |
1082 | if (likely(queue && sk_stream_is_writeable(sk))) { | |
1083 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
1084 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
1085 | } | |
1086 | read_unlock_bh(&sk->sk_callback_lock); | |
1087 | } | |
1088 | ||
1089 | static void nvme_tcp_state_change(struct sock *sk) | |
1090 | { | |
1091 | struct nvme_tcp_queue *queue; | |
1092 | ||
8b73b45d | 1093 | read_lock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
1094 | queue = sk->sk_user_data; |
1095 | if (!queue) | |
1096 | goto done; | |
1097 | ||
1098 | switch (sk->sk_state) { | |
1099 | case TCP_CLOSE: | |
1100 | case TCP_CLOSE_WAIT: | |
1101 | case TCP_LAST_ACK: | |
1102 | case TCP_FIN_WAIT1: | |
1103 | case TCP_FIN_WAIT2: | |
3f2304f8 SG |
1104 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); |
1105 | break; | |
1106 | default: | |
1107 | dev_info(queue->ctrl->ctrl.device, | |
1108 | "queue %d socket state %d\n", | |
1109 | nvme_tcp_queue_id(queue), sk->sk_state); | |
1110 | } | |
1111 | ||
1112 | queue->state_change(sk); | |
1113 | done: | |
8b73b45d | 1114 | read_unlock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
1115 | } |
1116 | ||
1117 | static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) | |
1118 | { | |
1119 | queue->request = NULL; | |
1120 | } | |
1121 | ||
1122 | static void nvme_tcp_fail_request(struct nvme_tcp_request *req) | |
1123 | { | |
63573807 SG |
1124 | if (nvme_tcp_async_req(req)) { |
1125 | union nvme_result res = {}; | |
1126 | ||
1127 | nvme_complete_async_event(&req->queue->ctrl->ctrl, | |
1128 | cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); | |
1129 | } else { | |
1130 | nvme_tcp_end_request(blk_mq_rq_from_pdu(req), | |
1131 | NVME_SC_HOST_PATH_ERROR); | |
1132 | } | |
3f2304f8 SG |
1133 | } |
1134 | ||
1135 | static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) | |
1136 | { | |
1137 | struct nvme_tcp_queue *queue = req->queue; | |
25e1f67e | 1138 | int req_data_len = req->data_len; |
c2700d28 | 1139 | u32 h2cdata_left = req->h2cdata_left; |
3f2304f8 SG |
1140 | |
1141 | while (true) { | |
77698878 DH |
1142 | struct bio_vec bvec; |
1143 | struct msghdr msg = { | |
1144 | .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, | |
1145 | }; | |
3f2304f8 SG |
1146 | struct page *page = nvme_tcp_req_cur_page(req); |
1147 | size_t offset = nvme_tcp_req_cur_offset(req); | |
1148 | size_t len = nvme_tcp_req_cur_length(req); | |
1149 | bool last = nvme_tcp_pdu_last_send(req, len); | |
25e1f67e | 1150 | int req_data_sent = req->data_sent; |
77698878 | 1151 | int ret; |
3f2304f8 | 1152 | |
122e5b9f | 1153 | if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) |
77698878 | 1154 | msg.msg_flags |= MSG_EOR; |
3f2304f8 | 1155 | else |
77698878 | 1156 | msg.msg_flags |= MSG_MORE; |
3f2304f8 | 1157 | |
6af7331a | 1158 | if (!sendpages_ok(page, len, offset)) |
c97d3fb9 | 1159 | msg.msg_flags &= ~MSG_SPLICE_PAGES; |
77698878 DH |
1160 | |
1161 | bvec_set_page(&bvec, page, len, offset); | |
1162 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1163 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1164 | if (ret <= 0) |
1165 | return ret; | |
1166 | ||
3f2304f8 | 1167 | if (queue->data_digest) |
427fff9a | 1168 | nvme_tcp_ddgst_update(&queue->snd_crc, page, |
3f2304f8 SG |
1169 | offset, ret); |
1170 | ||
e371af03 SG |
1171 | /* |
1172 | * update the request iterator except for the last payload send | |
1173 | * in the request where we don't want to modify it as we may | |
1174 | * compete with the RX path completing the request. | |
1175 | */ | |
25e1f67e | 1176 | if (req_data_sent + ret < req_data_len) |
e371af03 SG |
1177 | nvme_tcp_advance_req(req, ret); |
1178 | ||
1179 | /* fully successful last send in current PDU */ | |
3f2304f8 SG |
1180 | if (last && ret == len) { |
1181 | if (queue->data_digest) { | |
427fff9a EB |
1182 | req->ddgst = |
1183 | nvme_tcp_ddgst_final(queue->snd_crc); | |
3f2304f8 SG |
1184 | req->state = NVME_TCP_SEND_DDGST; |
1185 | req->offset = 0; | |
1186 | } else { | |
c2700d28 VP |
1187 | if (h2cdata_left) |
1188 | nvme_tcp_setup_h2c_data_pdu(req); | |
1189 | else | |
1190 | nvme_tcp_done_send_req(queue); | |
3f2304f8 SG |
1191 | } |
1192 | return 1; | |
1193 | } | |
1194 | } | |
1195 | return -EAGAIN; | |
1196 | } | |
1197 | ||
1198 | static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) | |
1199 | { | |
1200 | struct nvme_tcp_queue *queue = req->queue; | |
a3406352 | 1201 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
77698878 DH |
1202 | struct bio_vec bvec; |
1203 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; | |
3f2304f8 | 1204 | bool inline_data = nvme_tcp_has_inline_data(req); |
3f2304f8 SG |
1205 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
1206 | int len = sizeof(*pdu) + hdgst - req->offset; | |
1207 | int ret; | |
1208 | ||
122e5b9f | 1209 | if (inline_data || nvme_tcp_queue_more(queue)) |
77698878 | 1210 | msg.msg_flags |= MSG_MORE; |
5bb052d7 | 1211 | else |
77698878 | 1212 | msg.msg_flags |= MSG_EOR; |
5bb052d7 | 1213 | |
3f2304f8 | 1214 | if (queue->hdr_digest && !req->offset) |
427fff9a | 1215 | nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); |
3f2304f8 | 1216 | |
77698878 DH |
1217 | bvec_set_virt(&bvec, (void *)pdu + req->offset, len); |
1218 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1219 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1220 | if (unlikely(ret <= 0)) |
1221 | return ret; | |
1222 | ||
1223 | len -= ret; | |
1224 | if (!len) { | |
1225 | if (inline_data) { | |
1226 | req->state = NVME_TCP_SEND_DATA; | |
1227 | if (queue->data_digest) | |
427fff9a | 1228 | queue->snd_crc = NVME_TCP_CRC_SEED; |
3f2304f8 SG |
1229 | } else { |
1230 | nvme_tcp_done_send_req(queue); | |
1231 | } | |
1232 | return 1; | |
1233 | } | |
1234 | req->offset += ret; | |
1235 | ||
1236 | return -EAGAIN; | |
1237 | } | |
1238 | ||
1239 | static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) | |
1240 | { | |
1241 | struct nvme_tcp_queue *queue = req->queue; | |
a3406352 | 1242 | struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req); |
77698878 DH |
1243 | struct bio_vec bvec; |
1244 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, }; | |
3f2304f8 SG |
1245 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
1246 | int len = sizeof(*pdu) - req->offset + hdgst; | |
1247 | int ret; | |
1248 | ||
1249 | if (queue->hdr_digest && !req->offset) | |
427fff9a | 1250 | nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); |
3f2304f8 | 1251 | |
c2700d28 | 1252 | if (!req->h2cdata_left) |
77698878 DH |
1253 | msg.msg_flags |= MSG_SPLICE_PAGES; |
1254 | ||
1255 | bvec_set_virt(&bvec, (void *)pdu + req->offset, len); | |
1256 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1257 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1258 | if (unlikely(ret <= 0)) |
1259 | return ret; | |
1260 | ||
1261 | len -= ret; | |
1262 | if (!len) { | |
1263 | req->state = NVME_TCP_SEND_DATA; | |
1264 | if (queue->data_digest) | |
427fff9a | 1265 | queue->snd_crc = NVME_TCP_CRC_SEED; |
3f2304f8 SG |
1266 | return 1; |
1267 | } | |
1268 | req->offset += ret; | |
1269 | ||
1270 | return -EAGAIN; | |
1271 | } | |
1272 | ||
1273 | static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) | |
1274 | { | |
1275 | struct nvme_tcp_queue *queue = req->queue; | |
ce7723e9 | 1276 | size_t offset = req->offset; |
c2700d28 | 1277 | u32 h2cdata_left = req->h2cdata_left; |
3f2304f8 | 1278 | int ret; |
122e5b9f | 1279 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; |
3f2304f8 | 1280 | struct kvec iov = { |
d89b9f3b | 1281 | .iov_base = (u8 *)&req->ddgst + req->offset, |
3f2304f8 SG |
1282 | .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset |
1283 | }; | |
1284 | ||
122e5b9f SG |
1285 | if (nvme_tcp_queue_more(queue)) |
1286 | msg.msg_flags |= MSG_MORE; | |
1287 | else | |
1288 | msg.msg_flags |= MSG_EOR; | |
1289 | ||
3f2304f8 SG |
1290 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); |
1291 | if (unlikely(ret <= 0)) | |
1292 | return ret; | |
1293 | ||
ce7723e9 | 1294 | if (offset + ret == NVME_TCP_DIGEST_LENGTH) { |
c2700d28 VP |
1295 | if (h2cdata_left) |
1296 | nvme_tcp_setup_h2c_data_pdu(req); | |
1297 | else | |
1298 | nvme_tcp_done_send_req(queue); | |
3f2304f8 SG |
1299 | return 1; |
1300 | } | |
1301 | ||
1302 | req->offset += ret; | |
1303 | return -EAGAIN; | |
1304 | } | |
1305 | ||
1306 | static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) | |
1307 | { | |
1308 | struct nvme_tcp_request *req; | |
83e1226b | 1309 | unsigned int noreclaim_flag; |
3f2304f8 SG |
1310 | int ret = 1; |
1311 | ||
1312 | if (!queue->request) { | |
1313 | queue->request = nvme_tcp_fetch_request(queue); | |
1314 | if (!queue->request) | |
1315 | return 0; | |
1316 | } | |
1317 | req = queue->request; | |
1318 | ||
83e1226b | 1319 | noreclaim_flag = memalloc_noreclaim_save(); |
3f2304f8 SG |
1320 | if (req->state == NVME_TCP_SEND_CMD_PDU) { |
1321 | ret = nvme_tcp_try_send_cmd_pdu(req); | |
1322 | if (ret <= 0) | |
1323 | goto done; | |
1324 | if (!nvme_tcp_has_inline_data(req)) | |
83e1226b | 1325 | goto out; |
3f2304f8 SG |
1326 | } |
1327 | ||
1328 | if (req->state == NVME_TCP_SEND_H2C_PDU) { | |
1329 | ret = nvme_tcp_try_send_data_pdu(req); | |
1330 | if (ret <= 0) | |
1331 | goto done; | |
1332 | } | |
1333 | ||
1334 | if (req->state == NVME_TCP_SEND_DATA) { | |
1335 | ret = nvme_tcp_try_send_data(req); | |
1336 | if (ret <= 0) | |
1337 | goto done; | |
1338 | } | |
1339 | ||
1340 | if (req->state == NVME_TCP_SEND_DDGST) | |
1341 | ret = nvme_tcp_try_send_ddgst(req); | |
1342 | done: | |
5ff4e112 | 1343 | if (ret == -EAGAIN) { |
3f2304f8 | 1344 | ret = 0; |
5ff4e112 SG |
1345 | } else if (ret < 0) { |
1346 | dev_err(queue->ctrl->ctrl.device, | |
1347 | "failed to send request %d\n", ret); | |
41d07df7 | 1348 | nvme_tcp_fail_request(queue->request); |
5ff4e112 SG |
1349 | nvme_tcp_done_send_req(queue); |
1350 | } | |
83e1226b SG |
1351 | out: |
1352 | memalloc_noreclaim_restore(noreclaim_flag); | |
3f2304f8 SG |
1353 | return ret; |
1354 | } | |
1355 | ||
1356 | static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue) | |
1357 | { | |
10407ec9 PBT |
1358 | struct socket *sock = queue->sock; |
1359 | struct sock *sk = sock->sk; | |
3f2304f8 SG |
1360 | read_descriptor_t rd_desc; |
1361 | int consumed; | |
1362 | ||
1363 | rd_desc.arg.data = queue; | |
1364 | rd_desc.count = 1; | |
1365 | lock_sock(sk); | |
1a9460ce | 1366 | queue->nr_cqe = 0; |
10407ec9 | 1367 | consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb); |
3f2304f8 | 1368 | release_sock(sk); |
f42d4796 | 1369 | return consumed == -EAGAIN ? 0 : consumed; |
3f2304f8 SG |
1370 | } |
1371 | ||
1372 | static void nvme_tcp_io_work(struct work_struct *w) | |
1373 | { | |
1374 | struct nvme_tcp_queue *queue = | |
1375 | container_of(w, struct nvme_tcp_queue, io_work); | |
ddef2957 | 1376 | unsigned long deadline = jiffies + msecs_to_jiffies(1); |
3f2304f8 SG |
1377 | |
1378 | do { | |
1379 | bool pending = false; | |
1380 | int result; | |
1381 | ||
db5ad6b7 SG |
1382 | if (mutex_trylock(&queue->send_mutex)) { |
1383 | result = nvme_tcp_try_send(queue); | |
1384 | mutex_unlock(&queue->send_mutex); | |
1385 | if (result > 0) | |
1386 | pending = true; | |
1387 | else if (unlikely(result < 0)) | |
1388 | break; | |
70f437fb | 1389 | } |
3f2304f8 SG |
1390 | |
1391 | result = nvme_tcp_try_recv(queue); | |
1392 | if (result > 0) | |
1393 | pending = true; | |
761ad26c | 1394 | else if (unlikely(result < 0)) |
39d06079 | 1395 | return; |
3f2304f8 | 1396 | |
f42d4796 HR |
1397 | /* did we get some space after spending time in recv? */ |
1398 | if (nvme_tcp_queue_has_pending(queue) && | |
1399 | sk_stream_is_writeable(queue->sock->sk)) | |
1400 | pending = true; | |
1401 | ||
160f3549 | 1402 | if (!pending || !queue->rd_enabled) |
3f2304f8 SG |
1403 | return; |
1404 | ||
ddef2957 | 1405 | } while (!time_after(jiffies, deadline)); /* quota is exhausted */ |
3f2304f8 SG |
1406 | |
1407 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
1408 | } | |
1409 | ||
3f2304f8 SG |
1410 | static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) |
1411 | { | |
1412 | struct nvme_tcp_request *async = &ctrl->async_req; | |
1413 | ||
1414 | page_frag_free(async->pdu); | |
1415 | } | |
1416 | ||
1417 | static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) | |
1418 | { | |
1419 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; | |
1420 | struct nvme_tcp_request *async = &ctrl->async_req; | |
1421 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
1422 | ||
1423 | async->pdu = page_frag_alloc(&queue->pf_cache, | |
1424 | sizeof(struct nvme_tcp_cmd_pdu) + hdgst, | |
1425 | GFP_KERNEL | __GFP_ZERO); | |
1426 | if (!async->pdu) | |
1427 | return -ENOMEM; | |
1428 | ||
1429 | async->queue = &ctrl->queues[0]; | |
1430 | return 0; | |
1431 | } | |
1432 | ||
1433 | static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) | |
1434 | { | |
1435 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1436 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
83e1226b | 1437 | unsigned int noreclaim_flag; |
3f2304f8 SG |
1438 | |
1439 | if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) | |
1440 | return; | |
1441 | ||
a0727489 | 1442 | page_frag_cache_drain(&queue->pf_cache); |
83e1226b SG |
1443 | |
1444 | noreclaim_flag = memalloc_noreclaim_save(); | |
e40d4eb8 HR |
1445 | /* ->sock will be released by fput() */ |
1446 | fput(queue->sock->file); | |
1447 | queue->sock = NULL; | |
83e1226b SG |
1448 | memalloc_noreclaim_restore(noreclaim_flag); |
1449 | ||
3f2304f8 | 1450 | kfree(queue->pdu); |
d48f92cd | 1451 | mutex_destroy(&queue->send_mutex); |
9ebbfe49 | 1452 | mutex_destroy(&queue->queue_lock); |
3f2304f8 SG |
1453 | } |
1454 | ||
1455 | static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) | |
1456 | { | |
1457 | struct nvme_tcp_icreq_pdu *icreq; | |
1458 | struct nvme_tcp_icresp_pdu *icresp; | |
2837966a HR |
1459 | char cbuf[CMSG_LEN(sizeof(char))] = {}; |
1460 | u8 ctype; | |
3f2304f8 SG |
1461 | struct msghdr msg = {}; |
1462 | struct kvec iov; | |
1463 | bool ctrl_hdgst, ctrl_ddgst; | |
c2700d28 | 1464 | u32 maxh2cdata; |
3f2304f8 SG |
1465 | int ret; |
1466 | ||
1467 | icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); | |
1468 | if (!icreq) | |
1469 | return -ENOMEM; | |
1470 | ||
1471 | icresp = kzalloc(sizeof(*icresp), GFP_KERNEL); | |
1472 | if (!icresp) { | |
1473 | ret = -ENOMEM; | |
1474 | goto free_icreq; | |
1475 | } | |
1476 | ||
1477 | icreq->hdr.type = nvme_tcp_icreq; | |
1478 | icreq->hdr.hlen = sizeof(*icreq); | |
1479 | icreq->hdr.pdo = 0; | |
1480 | icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen); | |
1481 | icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); | |
1482 | icreq->maxr2t = 0; /* single inflight r2t supported */ | |
1483 | icreq->hpda = 0; /* no alignment constraint */ | |
1484 | if (queue->hdr_digest) | |
1485 | icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE; | |
1486 | if (queue->data_digest) | |
1487 | icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE; | |
1488 | ||
1489 | iov.iov_base = icreq; | |
1490 | iov.iov_len = sizeof(*icreq); | |
1491 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
e4f4aabb HR |
1492 | if (ret < 0) { |
1493 | pr_warn("queue %d: failed to send icreq, error %d\n", | |
1494 | nvme_tcp_queue_id(queue), ret); | |
3f2304f8 | 1495 | goto free_icresp; |
e4f4aabb | 1496 | } |
3f2304f8 SG |
1497 | |
1498 | memset(&msg, 0, sizeof(msg)); | |
1499 | iov.iov_base = icresp; | |
1500 | iov.iov_len = sizeof(*icresp); | |
36389576 | 1501 | if (nvme_tcp_queue_tls(queue)) { |
2837966a HR |
1502 | msg.msg_control = cbuf; |
1503 | msg.msg_controllen = sizeof(cbuf); | |
1504 | } | |
578539e0 | 1505 | msg.msg_flags = MSG_WAITALL; |
3f2304f8 SG |
1506 | ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, |
1507 | iov.iov_len, msg.msg_flags); | |
528361c4 DC |
1508 | if (ret >= 0 && ret < sizeof(*icresp)) |
1509 | ret = -ECONNRESET; | |
1510 | if (ret < 0) { | |
e4f4aabb HR |
1511 | pr_warn("queue %d: failed to receive icresp, error %d\n", |
1512 | nvme_tcp_queue_id(queue), ret); | |
3f2304f8 | 1513 | goto free_icresp; |
e4f4aabb | 1514 | } |
0e32fdd7 | 1515 | ret = -ENOTCONN; |
36389576 | 1516 | if (nvme_tcp_queue_tls(queue)) { |
2837966a HR |
1517 | ctype = tls_get_record_type(queue->sock->sk, |
1518 | (struct cmsghdr *)cbuf); | |
1519 | if (ctype != TLS_RECORD_TYPE_DATA) { | |
1520 | pr_err("queue %d: unhandled TLS record %d\n", | |
1521 | nvme_tcp_queue_id(queue), ctype); | |
0e32fdd7 | 1522 | goto free_icresp; |
2837966a HR |
1523 | } |
1524 | } | |
3f2304f8 SG |
1525 | ret = -EINVAL; |
1526 | if (icresp->hdr.type != nvme_tcp_icresp) { | |
1527 | pr_err("queue %d: bad type returned %d\n", | |
1528 | nvme_tcp_queue_id(queue), icresp->hdr.type); | |
1529 | goto free_icresp; | |
1530 | } | |
1531 | ||
1532 | if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) { | |
1533 | pr_err("queue %d: bad pdu length returned %d\n", | |
1534 | nvme_tcp_queue_id(queue), icresp->hdr.plen); | |
1535 | goto free_icresp; | |
1536 | } | |
1537 | ||
1538 | if (icresp->pfv != NVME_TCP_PFV_1_0) { | |
1539 | pr_err("queue %d: bad pfv returned %d\n", | |
1540 | nvme_tcp_queue_id(queue), icresp->pfv); | |
1541 | goto free_icresp; | |
1542 | } | |
1543 | ||
1544 | ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE); | |
1545 | if ((queue->data_digest && !ctrl_ddgst) || | |
1546 | (!queue->data_digest && ctrl_ddgst)) { | |
1547 | pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n", | |
1548 | nvme_tcp_queue_id(queue), | |
1549 | queue->data_digest ? "enabled" : "disabled", | |
1550 | ctrl_ddgst ? "enabled" : "disabled"); | |
1551 | goto free_icresp; | |
1552 | } | |
1553 | ||
1554 | ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE); | |
1555 | if ((queue->hdr_digest && !ctrl_hdgst) || | |
1556 | (!queue->hdr_digest && ctrl_hdgst)) { | |
1557 | pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n", | |
1558 | nvme_tcp_queue_id(queue), | |
1559 | queue->hdr_digest ? "enabled" : "disabled", | |
1560 | ctrl_hdgst ? "enabled" : "disabled"); | |
1561 | goto free_icresp; | |
1562 | } | |
1563 | ||
1564 | if (icresp->cpda != 0) { | |
1565 | pr_err("queue %d: unsupported cpda returned %d\n", | |
1566 | nvme_tcp_queue_id(queue), icresp->cpda); | |
1567 | goto free_icresp; | |
1568 | } | |
1569 | ||
c2700d28 VP |
1570 | maxh2cdata = le32_to_cpu(icresp->maxdata); |
1571 | if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { | |
1572 | pr_err("queue %d: invalid maxh2cdata returned %u\n", | |
1573 | nvme_tcp_queue_id(queue), maxh2cdata); | |
1574 | goto free_icresp; | |
1575 | } | |
1576 | queue->maxh2cdata = maxh2cdata; | |
1577 | ||
3f2304f8 SG |
1578 | ret = 0; |
1579 | free_icresp: | |
1580 | kfree(icresp); | |
1581 | free_icreq: | |
1582 | kfree(icreq); | |
1583 | return ret; | |
1584 | } | |
1585 | ||
40510a63 SG |
1586 | static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) |
1587 | { | |
1588 | return nvme_tcp_queue_id(queue) == 0; | |
1589 | } | |
1590 | ||
1591 | static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) | |
1592 | { | |
1593 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1594 | int qid = nvme_tcp_queue_id(queue); | |
1595 | ||
1596 | return !nvme_tcp_admin_queue(queue) && | |
1597 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; | |
1598 | } | |
1599 | ||
1600 | static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) | |
1601 | { | |
1602 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1603 | int qid = nvme_tcp_queue_id(queue); | |
1604 | ||
1605 | return !nvme_tcp_admin_queue(queue) && | |
1606 | !nvme_tcp_default_queue(queue) && | |
1607 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + | |
1608 | ctrl->io_queues[HCTX_TYPE_READ]; | |
1609 | } | |
1610 | ||
1611 | static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) | |
1612 | { | |
1613 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1614 | int qid = nvme_tcp_queue_id(queue); | |
1615 | ||
1616 | return !nvme_tcp_admin_queue(queue) && | |
1617 | !nvme_tcp_default_queue(queue) && | |
1618 | !nvme_tcp_read_queue(queue) && | |
1619 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + | |
1620 | ctrl->io_queues[HCTX_TYPE_READ] + | |
1621 | ctrl->io_queues[HCTX_TYPE_POLL]; | |
1622 | } | |
1623 | ||
cd513e04 | 1624 | /* |
32193789 SG |
1625 | * Track the number of queues assigned to each cpu using a global per-cpu |
1626 | * counter and select the least used cpu from the mq_map. Our goal is to spread | |
1627 | * different controllers I/O threads across different cpu cores. | |
1628 | * | |
1629 | * Note that the accounting is not 100% perfect, but we don't need to be, we're | |
1630 | * simply putting our best effort to select the best candidate cpu core that we | |
1631 | * find at any given point. | |
1632 | */ | |
40510a63 SG |
1633 | static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) |
1634 | { | |
1635 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
32193789 SG |
1636 | struct blk_mq_tag_set *set = &ctrl->tag_set; |
1637 | int qid = nvme_tcp_queue_id(queue) - 1; | |
1638 | unsigned int *mq_map = NULL; | |
1639 | int cpu, min_queues = INT_MAX, io_cpu; | |
1640 | ||
1641 | if (wq_unbound) | |
1642 | goto out; | |
40510a63 SG |
1643 | |
1644 | if (nvme_tcp_default_queue(queue)) | |
32193789 | 1645 | mq_map = set->map[HCTX_TYPE_DEFAULT].mq_map; |
40510a63 | 1646 | else if (nvme_tcp_read_queue(queue)) |
32193789 | 1647 | mq_map = set->map[HCTX_TYPE_READ].mq_map; |
40510a63 | 1648 | else if (nvme_tcp_poll_queue(queue)) |
32193789 SG |
1649 | mq_map = set->map[HCTX_TYPE_POLL].mq_map; |
1650 | ||
1651 | if (WARN_ON(!mq_map)) | |
1652 | goto out; | |
1653 | ||
1654 | /* Search for the least used cpu from the mq_map */ | |
1655 | io_cpu = WORK_CPU_UNBOUND; | |
1656 | for_each_online_cpu(cpu) { | |
1657 | int num_queues = atomic_read(&nvme_tcp_cpu_queues[cpu]); | |
1658 | ||
1659 | if (mq_map[cpu] != qid) | |
1660 | continue; | |
1661 | if (num_queues < min_queues) { | |
1662 | io_cpu = cpu; | |
1663 | min_queues = num_queues; | |
1664 | } | |
1665 | } | |
1666 | if (io_cpu != WORK_CPU_UNBOUND) { | |
1667 | queue->io_cpu = io_cpu; | |
1668 | atomic_inc(&nvme_tcp_cpu_queues[io_cpu]); | |
1669 | set_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags); | |
1670 | } | |
1671 | out: | |
1672 | dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n", | |
1673 | qid, queue->io_cpu); | |
40510a63 SG |
1674 | } |
1675 | ||
be8e82ca HR |
1676 | static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) |
1677 | { | |
1678 | struct nvme_tcp_queue *queue = data; | |
1679 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1680 | int qid = nvme_tcp_queue_id(queue); | |
1681 | struct key *tls_key; | |
1682 | ||
1683 | dev_dbg(ctrl->ctrl.device, "queue %d: TLS handshake done, key %x, status %d\n", | |
1684 | qid, pskid, status); | |
1685 | ||
1686 | if (status) { | |
1687 | queue->tls_err = -status; | |
1688 | goto out_complete; | |
1689 | } | |
1690 | ||
5bc46b49 | 1691 | tls_key = nvme_tls_key_lookup(pskid); |
be8e82ca HR |
1692 | if (IS_ERR(tls_key)) { |
1693 | dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n", | |
1694 | qid, pskid); | |
1695 | queue->tls_err = -ENOKEY; | |
1696 | } else { | |
36389576 HR |
1697 | queue->tls_enabled = true; |
1698 | if (qid == 0) | |
1699 | ctrl->ctrl.tls_pskid = key_serial(tls_key); | |
1700 | key_put(tls_key); | |
be8e82ca HR |
1701 | queue->tls_err = 0; |
1702 | } | |
1703 | ||
1704 | out_complete: | |
1705 | complete(&queue->tls_complete); | |
1706 | } | |
1707 | ||
1708 | static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, | |
1709 | struct nvme_tcp_queue *queue, | |
1710 | key_serial_t pskid) | |
1711 | { | |
1712 | int qid = nvme_tcp_queue_id(queue); | |
1713 | int ret; | |
1714 | struct tls_handshake_args args; | |
1715 | unsigned long tmo = tls_handshake_timeout * HZ; | |
1716 | key_serial_t keyring = nvme_keyring_id(); | |
1717 | ||
1718 | dev_dbg(nctrl->device, "queue %d: start TLS with key %x\n", | |
1719 | qid, pskid); | |
1720 | memset(&args, 0, sizeof(args)); | |
1721 | args.ta_sock = queue->sock; | |
1722 | args.ta_done = nvme_tcp_tls_done; | |
1723 | args.ta_data = queue; | |
1724 | args.ta_my_peerids[0] = pskid; | |
1725 | args.ta_num_peerids = 1; | |
adf22c52 HR |
1726 | if (nctrl->opts->keyring) |
1727 | keyring = key_serial(nctrl->opts->keyring); | |
be8e82ca HR |
1728 | args.ta_keyring = keyring; |
1729 | args.ta_timeout_ms = tls_handshake_timeout * 1000; | |
1730 | queue->tls_err = -EOPNOTSUPP; | |
1731 | init_completion(&queue->tls_complete); | |
1732 | ret = tls_client_hello_psk(&args, GFP_KERNEL); | |
1733 | if (ret) { | |
1734 | dev_err(nctrl->device, "queue %d: failed to start TLS: %d\n", | |
1735 | qid, ret); | |
1736 | return ret; | |
1737 | } | |
1738 | ret = wait_for_completion_interruptible_timeout(&queue->tls_complete, tmo); | |
1739 | if (ret <= 0) { | |
1740 | if (ret == 0) | |
1741 | ret = -ETIMEDOUT; | |
1742 | ||
1743 | dev_err(nctrl->device, | |
1744 | "queue %d: TLS handshake failed, error %d\n", | |
1745 | qid, ret); | |
1746 | tls_handshake_cancel(queue->sock->sk); | |
1747 | } else { | |
1748 | dev_dbg(nctrl->device, | |
1749 | "queue %d: TLS handshake complete, error %d\n", | |
1750 | qid, queue->tls_err); | |
1751 | ret = queue->tls_err; | |
1752 | } | |
1753 | return ret; | |
1754 | } | |
be8e82ca HR |
1755 | |
1756 | static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, | |
1757 | key_serial_t pskid) | |
3f2304f8 SG |
1758 | { |
1759 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1760 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
6ebf71ba | 1761 | int ret, rcv_pdu_size; |
e40d4eb8 | 1762 | struct file *sock_file; |
3f2304f8 | 1763 | |
9ebbfe49 | 1764 | mutex_init(&queue->queue_lock); |
3f2304f8 | 1765 | queue->ctrl = ctrl; |
15ec928a | 1766 | init_llist_head(&queue->req_list); |
3f2304f8 | 1767 | INIT_LIST_HEAD(&queue->send_list); |
db5ad6b7 | 1768 | mutex_init(&queue->send_mutex); |
3f2304f8 | 1769 | INIT_WORK(&queue->io_work, nvme_tcp_io_work); |
3f2304f8 SG |
1770 | |
1771 | if (qid > 0) | |
9924b030 | 1772 | queue->cmnd_capsule_len = nctrl->ioccsz * 16; |
3f2304f8 SG |
1773 | else |
1774 | queue->cmnd_capsule_len = sizeof(struct nvme_command) + | |
1775 | NVME_TCP_ADMIN_CCSZ; | |
1776 | ||
1be52169 PS |
1777 | ret = sock_create_kern(current->nsproxy->net_ns, |
1778 | ctrl->addr.ss_family, SOCK_STREAM, | |
3f2304f8 SG |
1779 | IPPROTO_TCP, &queue->sock); |
1780 | if (ret) { | |
9924b030 | 1781 | dev_err(nctrl->device, |
3f2304f8 | 1782 | "failed to create socket: %d\n", ret); |
9ebbfe49 | 1783 | goto err_destroy_mutex; |
3f2304f8 SG |
1784 | } |
1785 | ||
e40d4eb8 HR |
1786 | sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL); |
1787 | if (IS_ERR(sock_file)) { | |
1788 | ret = PTR_ERR(sock_file); | |
1789 | goto err_destroy_mutex; | |
1790 | } | |
b013b817 KI |
1791 | |
1792 | sk_net_refcnt_upgrade(queue->sock->sk); | |
841aee4d CL |
1793 | nvme_tcp_reclassify_socket(queue->sock); |
1794 | ||
3f2304f8 | 1795 | /* Single syn retry */ |
557eadfc | 1796 | tcp_sock_set_syncnt(queue->sock->sk, 1); |
3f2304f8 SG |
1797 | |
1798 | /* Set TCP no delay */ | |
12abc5ee | 1799 | tcp_sock_set_nodelay(queue->sock->sk); |
3f2304f8 SG |
1800 | |
1801 | /* | |
1802 | * Cleanup whatever is sitting in the TCP transmit queue on socket | |
1803 | * close. This is done to prevent stale data from being sent should | |
1804 | * the network connection be restored before TCP times out. | |
1805 | */ | |
c433594c | 1806 | sock_no_linger(queue->sock->sk); |
3f2304f8 | 1807 | |
6e434967 CH |
1808 | if (so_priority > 0) |
1809 | sock_set_priority(queue->sock->sk, so_priority); | |
9912ade3 | 1810 | |
bb13985d | 1811 | /* Set socket type of service */ |
6ebf71ba CH |
1812 | if (nctrl->opts->tos >= 0) |
1813 | ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos); | |
bb13985d | 1814 | |
adc99fd3 SG |
1815 | /* Set 10 seconds timeout for icresp recvmsg */ |
1816 | queue->sock->sk->sk_rcvtimeo = 10 * HZ; | |
1817 | ||
3f2304f8 | 1818 | queue->sock->sk->sk_allocation = GFP_ATOMIC; |
98123866 | 1819 | queue->sock->sk->sk_use_task_frag = false; |
32193789 | 1820 | queue->io_cpu = WORK_CPU_UNBOUND; |
3f2304f8 SG |
1821 | queue->request = NULL; |
1822 | queue->data_remaining = 0; | |
1823 | queue->ddgst_remaining = 0; | |
1824 | queue->pdu_remaining = 0; | |
1825 | queue->pdu_offset = 0; | |
1826 | sk_set_memalloc(queue->sock->sk); | |
1827 | ||
9924b030 | 1828 | if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) { |
3f2304f8 SG |
1829 | ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr, |
1830 | sizeof(ctrl->src_addr)); | |
1831 | if (ret) { | |
9924b030 | 1832 | dev_err(nctrl->device, |
3f2304f8 SG |
1833 | "failed to bind queue %d socket %d\n", |
1834 | qid, ret); | |
1835 | goto err_sock; | |
1836 | } | |
1837 | } | |
1838 | ||
3ede8f72 MB |
1839 | if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) { |
1840 | char *iface = nctrl->opts->host_iface; | |
1841 | sockptr_t optval = KERNEL_SOCKPTR(iface); | |
1842 | ||
1843 | ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE, | |
1844 | optval, strlen(iface)); | |
1845 | if (ret) { | |
1846 | dev_err(nctrl->device, | |
1847 | "failed to bind to interface %s queue %d err %d\n", | |
1848 | iface, qid, ret); | |
1849 | goto err_sock; | |
1850 | } | |
1851 | } | |
1852 | ||
3f2304f8 SG |
1853 | queue->hdr_digest = nctrl->opts->hdr_digest; |
1854 | queue->data_digest = nctrl->opts->data_digest; | |
3f2304f8 SG |
1855 | |
1856 | rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + | |
1857 | nvme_tcp_hdgst_len(queue); | |
1858 | queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); | |
1859 | if (!queue->pdu) { | |
1860 | ret = -ENOMEM; | |
427fff9a | 1861 | goto err_sock; |
3f2304f8 SG |
1862 | } |
1863 | ||
9924b030 | 1864 | dev_dbg(nctrl->device, "connecting queue %d\n", |
3f2304f8 SG |
1865 | nvme_tcp_queue_id(queue)); |
1866 | ||
1867 | ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr, | |
1868 | sizeof(ctrl->addr), 0); | |
1869 | if (ret) { | |
9924b030 | 1870 | dev_err(nctrl->device, |
3f2304f8 SG |
1871 | "failed to connect socket: %d\n", ret); |
1872 | goto err_rcv_pdu; | |
1873 | } | |
1874 | ||
be8e82ca | 1875 | /* If PSKs are configured try to start TLS */ |
36389576 | 1876 | if (nvme_tcp_tls_configured(nctrl) && pskid) { |
be8e82ca HR |
1877 | ret = nvme_tcp_start_tls(nctrl, queue, pskid); |
1878 | if (ret) | |
1879 | goto err_init_connect; | |
1880 | } | |
1881 | ||
3f2304f8 SG |
1882 | ret = nvme_tcp_init_connection(queue); |
1883 | if (ret) | |
1884 | goto err_init_connect; | |
1885 | ||
3f2304f8 | 1886 | set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); |
3f2304f8 SG |
1887 | |
1888 | return 0; | |
1889 | ||
1890 | err_init_connect: | |
1891 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1892 | err_rcv_pdu: | |
1893 | kfree(queue->pdu); | |
3f2304f8 | 1894 | err_sock: |
e40d4eb8 HR |
1895 | /* ->sock will be released by fput() */ |
1896 | fput(queue->sock->file); | |
3f2304f8 | 1897 | queue->sock = NULL; |
9ebbfe49 | 1898 | err_destroy_mutex: |
d48f92cd | 1899 | mutex_destroy(&queue->send_mutex); |
9ebbfe49 | 1900 | mutex_destroy(&queue->queue_lock); |
3f2304f8 SG |
1901 | return ret; |
1902 | } | |
1903 | ||
88eaba80 | 1904 | static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) |
3f2304f8 SG |
1905 | { |
1906 | struct socket *sock = queue->sock; | |
1907 | ||
1908 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1909 | sock->sk->sk_user_data = NULL; | |
1910 | sock->sk->sk_data_ready = queue->data_ready; | |
1911 | sock->sk->sk_state_change = queue->state_change; | |
1912 | sock->sk->sk_write_space = queue->write_space; | |
1913 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1914 | } | |
1915 | ||
1916 | static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) | |
1917 | { | |
1918 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
88eaba80 | 1919 | nvme_tcp_restore_sock_ops(queue); |
3f2304f8 SG |
1920 | cancel_work_sync(&queue->io_work); |
1921 | } | |
1922 | ||
77e40bbc | 1923 | static void nvme_tcp_stop_queue_nowait(struct nvme_ctrl *nctrl, int qid) |
3f2304f8 SG |
1924 | { |
1925 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1926 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
1927 | ||
2bff487f ML |
1928 | if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) |
1929 | return; | |
1930 | ||
32193789 SG |
1931 | if (test_and_clear_bit(NVME_TCP_Q_IO_CPU_SET, &queue->flags)) |
1932 | atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]); | |
1933 | ||
9ebbfe49 CL |
1934 | mutex_lock(&queue->queue_lock); |
1935 | if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) | |
1936 | __nvme_tcp_stop_queue(queue); | |
36389576 HR |
1937 | /* Stopping the queue will disable TLS */ |
1938 | queue->tls_enabled = false; | |
9ebbfe49 | 1939 | mutex_unlock(&queue->queue_lock); |
3f2304f8 SG |
1940 | } |
1941 | ||
77e40bbc ML |
1942 | static void nvme_tcp_wait_queue(struct nvme_ctrl *nctrl, int qid) |
1943 | { | |
1944 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1945 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
1946 | int timeout = 100; | |
1947 | ||
1948 | while (timeout > 0) { | |
1949 | if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags) || | |
1950 | !sk_wmem_alloc_get(queue->sock->sk)) | |
1951 | return; | |
1952 | msleep(2); | |
1953 | timeout -= 2; | |
1954 | } | |
1955 | dev_warn(nctrl->device, | |
1956 | "qid %d: timeout draining sock wmem allocation expired\n", | |
1957 | qid); | |
1958 | } | |
1959 | ||
1960 | static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) | |
1961 | { | |
1962 | nvme_tcp_stop_queue_nowait(nctrl, qid); | |
1963 | nvme_tcp_wait_queue(nctrl, qid); | |
1964 | } | |
1965 | ||
1966 | ||
88eaba80 SG |
1967 | static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) |
1968 | { | |
1969 | write_lock_bh(&queue->sock->sk->sk_callback_lock); | |
1970 | queue->sock->sk->sk_user_data = queue; | |
1971 | queue->state_change = queue->sock->sk->sk_state_change; | |
1972 | queue->data_ready = queue->sock->sk->sk_data_ready; | |
1973 | queue->write_space = queue->sock->sk->sk_write_space; | |
1974 | queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; | |
1975 | queue->sock->sk->sk_state_change = nvme_tcp_state_change; | |
1976 | queue->sock->sk->sk_write_space = nvme_tcp_write_space; | |
1977 | #ifdef CONFIG_NET_RX_BUSY_POLL | |
1978 | queue->sock->sk->sk_ll_usec = 1; | |
1979 | #endif | |
1980 | write_unlock_bh(&queue->sock->sk->sk_callback_lock); | |
1981 | } | |
1982 | ||
3f2304f8 SG |
1983 | static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) |
1984 | { | |
1985 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
88eaba80 | 1986 | struct nvme_tcp_queue *queue = &ctrl->queues[idx]; |
3f2304f8 SG |
1987 | int ret; |
1988 | ||
88eaba80 SG |
1989 | queue->rd_enabled = true; |
1990 | nvme_tcp_init_recv_ctx(queue); | |
1991 | nvme_tcp_setup_sock_ops(queue); | |
1992 | ||
32193789 SG |
1993 | if (idx) { |
1994 | nvme_tcp_set_queue_io_cpu(queue); | |
be42a33b | 1995 | ret = nvmf_connect_io_queue(nctrl, idx); |
32193789 | 1996 | } else |
3f2304f8 SG |
1997 | ret = nvmf_connect_admin_queue(nctrl); |
1998 | ||
1999 | if (!ret) { | |
88eaba80 | 2000 | set_bit(NVME_TCP_Q_LIVE, &queue->flags); |
3f2304f8 | 2001 | } else { |
88eaba80 SG |
2002 | if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) |
2003 | __nvme_tcp_stop_queue(queue); | |
3f2304f8 SG |
2004 | dev_err(nctrl->device, |
2005 | "failed to connect queue: %d ret=%d\n", idx, ret); | |
2006 | } | |
2007 | return ret; | |
2008 | } | |
2009 | ||
3f2304f8 SG |
2010 | static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) |
2011 | { | |
2012 | if (to_tcp_ctrl(ctrl)->async_req.pdu) { | |
ceb1e087 | 2013 | cancel_work_sync(&ctrl->async_event_work); |
3f2304f8 SG |
2014 | nvme_tcp_free_async_req(to_tcp_ctrl(ctrl)); |
2015 | to_tcp_ctrl(ctrl)->async_req.pdu = NULL; | |
2016 | } | |
2017 | ||
2018 | nvme_tcp_free_queue(ctrl, 0); | |
2019 | } | |
2020 | ||
2021 | static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl) | |
2022 | { | |
2023 | int i; | |
2024 | ||
2025 | for (i = 1; i < ctrl->queue_count; i++) | |
2026 | nvme_tcp_free_queue(ctrl, i); | |
2027 | } | |
2028 | ||
2029 | static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) | |
2030 | { | |
2031 | int i; | |
2032 | ||
2033 | for (i = 1; i < ctrl->queue_count; i++) | |
77e40bbc ML |
2034 | nvme_tcp_stop_queue_nowait(ctrl, i); |
2035 | for (i = 1; i < ctrl->queue_count; i++) | |
2036 | nvme_tcp_wait_queue(ctrl, i); | |
3f2304f8 SG |
2037 | } |
2038 | ||
09035f86 DW |
2039 | static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, |
2040 | int first, int last) | |
3f2304f8 | 2041 | { |
462b8b2d | 2042 | int i, ret; |
3f2304f8 | 2043 | |
09035f86 | 2044 | for (i = first; i < last; i++) { |
3f2304f8 SG |
2045 | ret = nvme_tcp_start_queue(ctrl, i); |
2046 | if (ret) | |
2047 | goto out_stop_queues; | |
2048 | } | |
2049 | ||
2050 | return 0; | |
2051 | ||
2052 | out_stop_queues: | |
09035f86 | 2053 | for (i--; i >= first; i--) |
3f2304f8 SG |
2054 | nvme_tcp_stop_queue(ctrl, i); |
2055 | return ret; | |
2056 | } | |
2057 | ||
2058 | static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) | |
2059 | { | |
2060 | int ret; | |
be8e82ca HR |
2061 | key_serial_t pskid = 0; |
2062 | ||
36389576 | 2063 | if (nvme_tcp_tls_configured(ctrl)) { |
adf22c52 HR |
2064 | if (ctrl->opts->tls_key) |
2065 | pskid = key_serial(ctrl->opts->tls_key); | |
e88a7595 | 2066 | else if (ctrl->opts->tls) { |
adf22c52 HR |
2067 | pskid = nvme_tls_psk_default(ctrl->opts->keyring, |
2068 | ctrl->opts->host->nqn, | |
2069 | ctrl->opts->subsysnqn); | |
36389576 HR |
2070 | if (!pskid) { |
2071 | dev_err(ctrl->device, "no valid PSK found\n"); | |
2072 | return -ENOKEY; | |
2073 | } | |
be8e82ca HR |
2074 | } |
2075 | } | |
3f2304f8 | 2076 | |
be8e82ca | 2077 | ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); |
3f2304f8 | 2078 | if (ret) |
ef184b88 | 2079 | return ret; |
3f2304f8 SG |
2080 | |
2081 | ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); | |
2082 | if (ret) | |
2083 | goto out_free_queue; | |
2084 | ||
2085 | return 0; | |
2086 | ||
2087 | out_free_queue: | |
2088 | nvme_tcp_free_queue(ctrl, 0); | |
2089 | return ret; | |
2090 | } | |
2091 | ||
efb973b1 | 2092 | static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
3f2304f8 SG |
2093 | { |
2094 | int i, ret; | |
2095 | ||
e88a7595 HR |
2096 | if (nvme_tcp_tls_configured(ctrl)) { |
2097 | if (ctrl->opts->concat) { | |
2098 | /* | |
2099 | * The generated PSK is stored in the | |
2100 | * fabric options | |
2101 | */ | |
2102 | if (!ctrl->opts->tls_key) { | |
2103 | dev_err(ctrl->device, "no PSK generated\n"); | |
2104 | return -ENOKEY; | |
2105 | } | |
2106 | if (ctrl->tls_pskid && | |
2107 | ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) { | |
2108 | dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid); | |
2109 | ctrl->tls_pskid = 0; | |
2110 | } | |
2111 | } else if (!ctrl->tls_pskid) { | |
2112 | dev_err(ctrl->device, "no PSK negotiated\n"); | |
2113 | return -ENOKEY; | |
2114 | } | |
be8e82ca | 2115 | } |
36389576 | 2116 | |
3f2304f8 | 2117 | for (i = 1; i < ctrl->queue_count; i++) { |
be8e82ca | 2118 | ret = nvme_tcp_alloc_queue(ctrl, i, |
36389576 | 2119 | ctrl->tls_pskid); |
3f2304f8 SG |
2120 | if (ret) |
2121 | goto out_free_queues; | |
2122 | } | |
2123 | ||
2124 | return 0; | |
2125 | ||
2126 | out_free_queues: | |
2127 | for (i--; i >= 1; i--) | |
2128 | nvme_tcp_free_queue(ctrl, i); | |
2129 | ||
2130 | return ret; | |
2131 | } | |
2132 | ||
efb973b1 | 2133 | static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
3f2304f8 SG |
2134 | { |
2135 | unsigned int nr_io_queues; | |
2136 | int ret; | |
2137 | ||
a249d306 | 2138 | nr_io_queues = nvmf_nr_io_queues(ctrl->opts); |
3f2304f8 SG |
2139 | ret = nvme_set_queue_count(ctrl, &nr_io_queues); |
2140 | if (ret) | |
2141 | return ret; | |
2142 | ||
664227fd | 2143 | if (nr_io_queues == 0) { |
72f57242 SG |
2144 | dev_err(ctrl->device, |
2145 | "unable to set any I/O queues\n"); | |
2146 | return -ENOMEM; | |
2147 | } | |
3f2304f8 | 2148 | |
664227fd | 2149 | ctrl->queue_count = nr_io_queues + 1; |
3f2304f8 SG |
2150 | dev_info(ctrl->device, |
2151 | "creating %d I/O queues.\n", nr_io_queues); | |
2152 | ||
a249d306 KB |
2153 | nvmf_set_io_queues(ctrl->opts, nr_io_queues, |
2154 | to_tcp_ctrl(ctrl)->io_queues); | |
efb973b1 | 2155 | return __nvme_tcp_alloc_io_queues(ctrl); |
3f2304f8 SG |
2156 | } |
2157 | ||
3f2304f8 SG |
2158 | static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) |
2159 | { | |
09035f86 | 2160 | int ret, nr_queues; |
3f2304f8 | 2161 | |
efb973b1 | 2162 | ret = nvme_tcp_alloc_io_queues(ctrl); |
3f2304f8 SG |
2163 | if (ret) |
2164 | return ret; | |
2165 | ||
2166 | if (new) { | |
de777825 CH |
2167 | ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, |
2168 | &nvme_tcp_mq_ops, | |
dcef7727 | 2169 | ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, |
de777825 | 2170 | sizeof(struct nvme_tcp_request)); |
2f7a7e5d | 2171 | if (ret) |
3f2304f8 | 2172 | goto out_free_io_queues; |
3f2304f8 SG |
2173 | } |
2174 | ||
09035f86 DW |
2175 | /* |
2176 | * Only start IO queues for which we have allocated the tagset | |
2177 | * and limitted it to the available queues. On reconnects, the | |
2178 | * queue number might have changed. | |
2179 | */ | |
2180 | nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); | |
2181 | ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues); | |
3f2304f8 SG |
2182 | if (ret) |
2183 | goto out_cleanup_connect_q; | |
2184 | ||
2875b0ae | 2185 | if (!new) { |
99dc2640 | 2186 | nvme_start_freeze(ctrl); |
9f27bd70 | 2187 | nvme_unquiesce_io_queues(ctrl); |
e5c01f4f SG |
2188 | if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { |
2189 | /* | |
2190 | * If we timed out waiting for freeze we are likely to | |
2191 | * be stuck. Fail the controller initialization just | |
2192 | * to be safe. | |
2193 | */ | |
2194 | ret = -ENODEV; | |
99dc2640 | 2195 | nvme_unfreeze(ctrl); |
e5c01f4f SG |
2196 | goto out_wait_freeze_timed_out; |
2197 | } | |
2875b0ae SG |
2198 | blk_mq_update_nr_hw_queues(ctrl->tagset, |
2199 | ctrl->queue_count - 1); | |
2200 | nvme_unfreeze(ctrl); | |
2201 | } | |
2202 | ||
09035f86 DW |
2203 | /* |
2204 | * If the number of queues has increased (reconnect case) | |
2205 | * start all new queues now. | |
2206 | */ | |
2207 | ret = nvme_tcp_start_io_queues(ctrl, nr_queues, | |
2208 | ctrl->tagset->nr_hw_queues + 1); | |
2209 | if (ret) | |
2210 | goto out_wait_freeze_timed_out; | |
2211 | ||
3f2304f8 SG |
2212 | return 0; |
2213 | ||
e5c01f4f | 2214 | out_wait_freeze_timed_out: |
9f27bd70 | 2215 | nvme_quiesce_io_queues(ctrl); |
70a99574 | 2216 | nvme_sync_io_queues(ctrl); |
e5c01f4f | 2217 | nvme_tcp_stop_io_queues(ctrl); |
3f2304f8 | 2218 | out_cleanup_connect_q: |
70a99574 | 2219 | nvme_cancel_tagset(ctrl); |
e85037a2 | 2220 | if (new) |
de777825 | 2221 | nvme_remove_io_tag_set(ctrl); |
3f2304f8 SG |
2222 | out_free_io_queues: |
2223 | nvme_tcp_free_io_queues(ctrl); | |
2224 | return ret; | |
2225 | } | |
2226 | ||
3f2304f8 SG |
2227 | static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) |
2228 | { | |
2229 | int error; | |
2230 | ||
2231 | error = nvme_tcp_alloc_admin_queue(ctrl); | |
2232 | if (error) | |
2233 | return error; | |
2234 | ||
2235 | if (new) { | |
de777825 CH |
2236 | error = nvme_alloc_admin_tag_set(ctrl, |
2237 | &to_tcp_ctrl(ctrl)->admin_tag_set, | |
db45e1a5 | 2238 | &nvme_tcp_admin_mq_ops, |
de777825 | 2239 | sizeof(struct nvme_tcp_request)); |
2f7a7e5d | 2240 | if (error) |
3f2304f8 | 2241 | goto out_free_queue; |
3f2304f8 SG |
2242 | } |
2243 | ||
2244 | error = nvme_tcp_start_queue(ctrl, 0); | |
2245 | if (error) | |
de777825 | 2246 | goto out_cleanup_tagset; |
3f2304f8 | 2247 | |
c0f2f45b | 2248 | error = nvme_enable_ctrl(ctrl); |
3f2304f8 SG |
2249 | if (error) |
2250 | goto out_stop_queue; | |
2251 | ||
9f27bd70 | 2252 | nvme_unquiesce_admin_queue(ctrl); |
e7832cb4 | 2253 | |
94cc781f | 2254 | error = nvme_init_ctrl_finish(ctrl, false); |
3f2304f8 | 2255 | if (error) |
70a99574 | 2256 | goto out_quiesce_queue; |
3f2304f8 SG |
2257 | |
2258 | return 0; | |
2259 | ||
70a99574 | 2260 | out_quiesce_queue: |
9f27bd70 | 2261 | nvme_quiesce_admin_queue(ctrl); |
70a99574 | 2262 | blk_sync_queue(ctrl->admin_q); |
3f2304f8 SG |
2263 | out_stop_queue: |
2264 | nvme_tcp_stop_queue(ctrl, 0); | |
70a99574 | 2265 | nvme_cancel_admin_tagset(ctrl); |
de777825 | 2266 | out_cleanup_tagset: |
e7832cb4 | 2267 | if (new) |
de777825 | 2268 | nvme_remove_admin_tag_set(ctrl); |
3f2304f8 SG |
2269 | out_free_queue: |
2270 | nvme_tcp_free_admin_queue(ctrl); | |
2271 | return error; | |
2272 | } | |
2273 | ||
2274 | static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, | |
2275 | bool remove) | |
2276 | { | |
9f27bd70 | 2277 | nvme_quiesce_admin_queue(ctrl); |
d6f66210 | 2278 | blk_sync_queue(ctrl->admin_q); |
3f2304f8 | 2279 | nvme_tcp_stop_queue(ctrl, 0); |
563c8158 | 2280 | nvme_cancel_admin_tagset(ctrl); |
b4e12f57 | 2281 | if (remove) { |
9f27bd70 | 2282 | nvme_unquiesce_admin_queue(ctrl); |
b4e12f57 C |
2283 | nvme_remove_admin_tag_set(ctrl); |
2284 | } | |
2285 | nvme_tcp_free_admin_queue(ctrl); | |
36389576 HR |
2286 | if (ctrl->tls_pskid) { |
2287 | dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n", | |
2288 | ctrl->tls_pskid); | |
2289 | ctrl->tls_pskid = 0; | |
2290 | } | |
3f2304f8 SG |
2291 | } |
2292 | ||
2293 | static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, | |
2294 | bool remove) | |
2295 | { | |
2296 | if (ctrl->queue_count <= 1) | |
d6f66210 | 2297 | return; |
9f27bd70 | 2298 | nvme_quiesce_io_queues(ctrl); |
d6f66210 | 2299 | nvme_sync_io_queues(ctrl); |
3f2304f8 | 2300 | nvme_tcp_stop_io_queues(ctrl); |
563c8158 | 2301 | nvme_cancel_tagset(ctrl); |
36e3b1f9 | 2302 | if (remove) { |
9f27bd70 | 2303 | nvme_unquiesce_io_queues(ctrl); |
36e3b1f9 C |
2304 | nvme_remove_io_tag_set(ctrl); |
2305 | } | |
2306 | nvme_tcp_free_io_queues(ctrl); | |
3f2304f8 SG |
2307 | } |
2308 | ||
adfde7ed HR |
2309 | static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, |
2310 | int status) | |
3f2304f8 | 2311 | { |
e6e7f7ac KB |
2312 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2313 | ||
3f2304f8 | 2314 | /* If we are resetting/deleting then do nothing */ |
e6e7f7ac KB |
2315 | if (state != NVME_CTRL_CONNECTING) { |
2316 | WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); | |
3f2304f8 SG |
2317 | return; |
2318 | } | |
2319 | ||
adfde7ed | 2320 | if (nvmf_should_reconnect(ctrl, status)) { |
3f2304f8 SG |
2321 | dev_info(ctrl->device, "Reconnecting in %d seconds...\n", |
2322 | ctrl->opts->reconnect_delay); | |
2323 | queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, | |
2324 | ctrl->opts->reconnect_delay * HZ); | |
2325 | } else { | |
adfde7ed HR |
2326 | dev_info(ctrl->device, "Removing controller (%d)...\n", |
2327 | status); | |
3f2304f8 SG |
2328 | nvme_delete_ctrl(ctrl); |
2329 | } | |
2330 | } | |
2331 | ||
e88a7595 HR |
2332 | /* |
2333 | * The TLS key is set by secure concatenation after negotiation has been | |
2334 | * completed on the admin queue. We need to revoke the key when: | |
2335 | * - concatenation is enabled (otherwise it's a static key set by the user) | |
2336 | * and | |
2337 | * - the generated key is present in ctrl->tls_key (otherwise there's nothing | |
2338 | * to revoke) | |
2339 | * and | |
2340 | * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS | |
2341 | * negotiation has not run). | |
2342 | * | |
2343 | * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called | |
2344 | * twice during secure concatenation, once on a 'normal' connection to run the | |
2345 | * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set), | |
2346 | * and once after the negotiation (which uses the key, so it _must_ be set). | |
2347 | */ | |
2348 | static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl) | |
2349 | { | |
2350 | return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid; | |
2351 | } | |
2352 | ||
3f2304f8 SG |
2353 | static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) |
2354 | { | |
2355 | struct nvmf_ctrl_options *opts = ctrl->opts; | |
312910f4 | 2356 | int ret; |
3f2304f8 SG |
2357 | |
2358 | ret = nvme_tcp_configure_admin_queue(ctrl, new); | |
2359 | if (ret) | |
2360 | return ret; | |
2361 | ||
73becfd6 | 2362 | if (ctrl->opts->concat && !ctrl->tls_pskid) { |
104d0e2f HR |
2363 | /* See comments for nvme_tcp_key_revoke_needed() */ |
2364 | dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); | |
2365 | nvme_stop_keep_alive(ctrl); | |
2366 | nvme_tcp_teardown_admin_queue(ctrl, false); | |
2367 | ret = nvme_tcp_configure_admin_queue(ctrl, false); | |
2368 | if (ret) | |
e7143706 | 2369 | goto destroy_admin; |
104d0e2f HR |
2370 | } |
2371 | ||
3f2304f8 | 2372 | if (ctrl->icdoff) { |
522af60c | 2373 | ret = -EOPNOTSUPP; |
3f2304f8 SG |
2374 | dev_err(ctrl->device, "icdoff is not supported!\n"); |
2375 | goto destroy_admin; | |
2376 | } | |
2377 | ||
3b54064f | 2378 | if (!nvme_ctrl_sgl_supported(ctrl)) { |
522af60c | 2379 | ret = -EOPNOTSUPP; |
73ffcefc MG |
2380 | dev_err(ctrl->device, "Mandatory sgls are not supported!\n"); |
2381 | goto destroy_admin; | |
2382 | } | |
2383 | ||
3f2304f8 SG |
2384 | if (opts->queue_size > ctrl->sqsize + 1) |
2385 | dev_warn(ctrl->device, | |
2386 | "queue_size %zu > ctrl sqsize %u, clamping down\n", | |
2387 | opts->queue_size, ctrl->sqsize + 1); | |
2388 | ||
2389 | if (ctrl->sqsize + 1 > ctrl->maxcmd) { | |
2390 | dev_warn(ctrl->device, | |
2391 | "sqsize %u > ctrl maxcmd %u, clamping down\n", | |
2392 | ctrl->sqsize + 1, ctrl->maxcmd); | |
2393 | ctrl->sqsize = ctrl->maxcmd - 1; | |
2394 | } | |
2395 | ||
2396 | if (ctrl->queue_count > 1) { | |
2397 | ret = nvme_tcp_configure_io_queues(ctrl, new); | |
2398 | if (ret) | |
2399 | goto destroy_admin; | |
2400 | } | |
2401 | ||
2402 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { | |
bea54ef5 | 2403 | /* |
ecca390e | 2404 | * state change failure is ok if we started ctrl delete, |
bea54ef5 IR |
2405 | * unless we're during creation of a new controller to |
2406 | * avoid races with teardown flow. | |
2407 | */ | |
e6e7f7ac KB |
2408 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2409 | ||
2410 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2411 | state != NVME_CTRL_DELETING_NOIO); | |
bea54ef5 | 2412 | WARN_ON_ONCE(new); |
3f2304f8 SG |
2413 | ret = -EINVAL; |
2414 | goto destroy_io; | |
2415 | } | |
2416 | ||
2417 | nvme_start_ctrl(ctrl); | |
2418 | return 0; | |
2419 | ||
2420 | destroy_io: | |
70a99574 | 2421 | if (ctrl->queue_count > 1) { |
9f27bd70 | 2422 | nvme_quiesce_io_queues(ctrl); |
70a99574 CL |
2423 | nvme_sync_io_queues(ctrl); |
2424 | nvme_tcp_stop_io_queues(ctrl); | |
2425 | nvme_cancel_tagset(ctrl); | |
36e3b1f9 C |
2426 | if (new) |
2427 | nvme_remove_io_tag_set(ctrl); | |
2428 | nvme_tcp_free_io_queues(ctrl); | |
70a99574 | 2429 | } |
3f2304f8 | 2430 | destroy_admin: |
3af755a4 | 2431 | nvme_stop_keep_alive(ctrl); |
fec55c29 | 2432 | nvme_tcp_teardown_admin_queue(ctrl, new); |
3f2304f8 SG |
2433 | return ret; |
2434 | } | |
2435 | ||
2436 | static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) | |
2437 | { | |
2438 | struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), | |
2439 | struct nvme_tcp_ctrl, connect_work); | |
2440 | struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; | |
adfde7ed | 2441 | int ret; |
3f2304f8 SG |
2442 | |
2443 | ++ctrl->nr_reconnects; | |
2444 | ||
adfde7ed HR |
2445 | ret = nvme_tcp_setup_ctrl(ctrl, false); |
2446 | if (ret) | |
3f2304f8 SG |
2447 | goto requeue; |
2448 | ||
54a76c87 TI |
2449 | dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", |
2450 | ctrl->nr_reconnects, ctrl->opts->max_reconnects); | |
3f2304f8 SG |
2451 | |
2452 | ctrl->nr_reconnects = 0; | |
2453 | ||
2454 | return; | |
2455 | ||
2456 | requeue: | |
54a76c87 TI |
2457 | dev_info(ctrl->device, "Failed reconnect attempt %d/%d\n", |
2458 | ctrl->nr_reconnects, ctrl->opts->max_reconnects); | |
adfde7ed | 2459 | nvme_tcp_reconnect_or_remove(ctrl, ret); |
3f2304f8 SG |
2460 | } |
2461 | ||
2462 | static void nvme_tcp_error_recovery_work(struct work_struct *work) | |
2463 | { | |
2464 | struct nvme_tcp_ctrl *tcp_ctrl = container_of(work, | |
2465 | struct nvme_tcp_ctrl, err_work); | |
2466 | struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; | |
2467 | ||
e88a7595 HR |
2468 | if (nvme_tcp_key_revoke_needed(ctrl)) |
2469 | nvme_auth_revoke_tls_key(ctrl); | |
3f2304f8 | 2470 | nvme_stop_keep_alive(ctrl); |
ff9fc7eb | 2471 | flush_work(&ctrl->async_event_work); |
3f2304f8 SG |
2472 | nvme_tcp_teardown_io_queues(ctrl, false); |
2473 | /* unquiesce to fail fast pending requests */ | |
9f27bd70 | 2474 | nvme_unquiesce_io_queues(ctrl); |
3f2304f8 | 2475 | nvme_tcp_teardown_admin_queue(ctrl, false); |
9f27bd70 | 2476 | nvme_unquiesce_admin_queue(ctrl); |
1f1a4f89 | 2477 | nvme_auth_stop(ctrl); |
3f2304f8 SG |
2478 | |
2479 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { | |
ecca390e | 2480 | /* state change failure is ok if we started ctrl delete */ |
e6e7f7ac KB |
2481 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2482 | ||
2483 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2484 | state != NVME_CTRL_DELETING_NOIO); | |
3f2304f8 SG |
2485 | return; |
2486 | } | |
2487 | ||
adfde7ed | 2488 | nvme_tcp_reconnect_or_remove(ctrl, 0); |
3f2304f8 SG |
2489 | } |
2490 | ||
2491 | static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) | |
2492 | { | |
2493 | nvme_tcp_teardown_io_queues(ctrl, shutdown); | |
9f27bd70 | 2494 | nvme_quiesce_admin_queue(ctrl); |
285b6e9b | 2495 | nvme_disable_ctrl(ctrl, shutdown); |
3f2304f8 SG |
2496 | nvme_tcp_teardown_admin_queue(ctrl, shutdown); |
2497 | } | |
2498 | ||
2499 | static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl) | |
2500 | { | |
2501 | nvme_tcp_teardown_ctrl(ctrl, true); | |
2502 | } | |
2503 | ||
2504 | static void nvme_reset_ctrl_work(struct work_struct *work) | |
2505 | { | |
2506 | struct nvme_ctrl *ctrl = | |
2507 | container_of(work, struct nvme_ctrl, reset_work); | |
adfde7ed | 2508 | int ret; |
3f2304f8 | 2509 | |
e88a7595 HR |
2510 | if (nvme_tcp_key_revoke_needed(ctrl)) |
2511 | nvme_auth_revoke_tls_key(ctrl); | |
3f2304f8 SG |
2512 | nvme_stop_ctrl(ctrl); |
2513 | nvme_tcp_teardown_ctrl(ctrl, false); | |
2514 | ||
2515 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { | |
ecca390e | 2516 | /* state change failure is ok if we started ctrl delete */ |
e6e7f7ac KB |
2517 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2518 | ||
2519 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2520 | state != NVME_CTRL_DELETING_NOIO); | |
3f2304f8 SG |
2521 | return; |
2522 | } | |
2523 | ||
adfde7ed HR |
2524 | ret = nvme_tcp_setup_ctrl(ctrl, false); |
2525 | if (ret) | |
3f2304f8 SG |
2526 | goto out_fail; |
2527 | ||
2528 | return; | |
2529 | ||
2530 | out_fail: | |
2531 | ++ctrl->nr_reconnects; | |
adfde7ed | 2532 | nvme_tcp_reconnect_or_remove(ctrl, ret); |
3f2304f8 SG |
2533 | } |
2534 | ||
f7f70f4a RL |
2535 | static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) |
2536 | { | |
c4abd875 | 2537 | flush_work(&to_tcp_ctrl(ctrl)->err_work); |
f7f70f4a RL |
2538 | cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); |
2539 | } | |
2540 | ||
3f2304f8 SG |
2541 | static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) |
2542 | { | |
2543 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
2544 | ||
2545 | if (list_empty(&ctrl->list)) | |
2546 | goto free_ctrl; | |
2547 | ||
2548 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
2549 | list_del(&ctrl->list); | |
2550 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2551 | ||
2552 | nvmf_free_options(nctrl->opts); | |
2553 | free_ctrl: | |
2554 | kfree(ctrl->queues); | |
2555 | kfree(ctrl); | |
2556 | } | |
2557 | ||
2558 | static void nvme_tcp_set_sg_null(struct nvme_command *c) | |
2559 | { | |
2560 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2561 | ||
2562 | sg->addr = 0; | |
2563 | sg->length = 0; | |
2564 | sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | | |
2565 | NVME_SGL_FMT_TRANSPORT_A; | |
2566 | } | |
2567 | ||
2568 | static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue, | |
2569 | struct nvme_command *c, u32 data_len) | |
2570 | { | |
2571 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2572 | ||
2573 | sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); | |
2574 | sg->length = cpu_to_le32(data_len); | |
2575 | sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; | |
2576 | } | |
2577 | ||
2578 | static void nvme_tcp_set_sg_host_data(struct nvme_command *c, | |
2579 | u32 data_len) | |
2580 | { | |
2581 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2582 | ||
2583 | sg->addr = 0; | |
2584 | sg->length = cpu_to_le32(data_len); | |
2585 | sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | | |
2586 | NVME_SGL_FMT_TRANSPORT_A; | |
2587 | } | |
2588 | ||
2589 | static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) | |
2590 | { | |
2591 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg); | |
2592 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; | |
2593 | struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu; | |
2594 | struct nvme_command *cmd = &pdu->cmd; | |
2595 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
2596 | ||
2597 | memset(pdu, 0, sizeof(*pdu)); | |
2598 | pdu->hdr.type = nvme_tcp_cmd; | |
2599 | if (queue->hdr_digest) | |
2600 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
2601 | pdu->hdr.hlen = sizeof(*pdu); | |
2602 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
2603 | ||
2604 | cmd->common.opcode = nvme_admin_async_event; | |
2605 | cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; | |
2606 | cmd->common.flags |= NVME_CMD_SGL_METABUF; | |
2607 | nvme_tcp_set_sg_null(cmd); | |
2608 | ||
2609 | ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU; | |
2610 | ctrl->async_req.offset = 0; | |
2611 | ctrl->async_req.curr_bio = NULL; | |
2612 | ctrl->async_req.data_len = 0; | |
0bf04c87 HR |
2613 | init_llist_node(&ctrl->async_req.lentry); |
2614 | INIT_LIST_HEAD(&ctrl->async_req.entry); | |
3f2304f8 | 2615 | |
674f872b | 2616 | nvme_tcp_queue_request(&ctrl->async_req, true); |
3f2304f8 SG |
2617 | } |
2618 | ||
236187c4 SG |
2619 | static void nvme_tcp_complete_timed_out(struct request *rq) |
2620 | { | |
2621 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
2622 | struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; | |
2623 | ||
236187c4 | 2624 | nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); |
93ba75c9 | 2625 | nvmf_complete_timed_out_request(rq); |
236187c4 SG |
2626 | } |
2627 | ||
9bdb4833 | 2628 | static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) |
3f2304f8 SG |
2629 | { |
2630 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
236187c4 | 2631 | struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; |
a3406352 | 2632 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
7d23e836 | 2633 | struct nvme_command *cmd = &pdu->cmd; |
99607843 | 2634 | int qid = nvme_tcp_queue_id(req->queue); |
3f2304f8 | 2635 | |
236187c4 | 2636 | dev_warn(ctrl->device, |
45c36f04 | 2637 | "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", |
7d23e836 CS |
2638 | rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, |
2639 | nvme_fabrics_opcode_str(qid, cmd), qid); | |
3f2304f8 | 2640 | |
e6e7f7ac | 2641 | if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { |
39d57757 | 2642 | /* |
236187c4 SG |
2643 | * If we are resetting, connecting or deleting we should |
2644 | * complete immediately because we may block controller | |
2645 | * teardown or setup sequence | |
2646 | * - ctrl disable/shutdown fabrics requests | |
2647 | * - connect requests | |
2648 | * - initialization admin requests | |
2649 | * - I/O requests that entered after unquiescing and | |
2650 | * the controller stopped responding | |
2651 | * | |
2652 | * All other requests should be cancelled by the error | |
2653 | * recovery work, so it's fine that we fail it here. | |
39d57757 | 2654 | */ |
236187c4 | 2655 | nvme_tcp_complete_timed_out(rq); |
3f2304f8 SG |
2656 | return BLK_EH_DONE; |
2657 | } | |
2658 | ||
236187c4 SG |
2659 | /* |
2660 | * LIVE state should trigger the normal error recovery which will | |
2661 | * handle completing this request. | |
2662 | */ | |
2663 | nvme_tcp_error_recovery(ctrl); | |
3f2304f8 SG |
2664 | return BLK_EH_RESET_TIMER; |
2665 | } | |
2666 | ||
2667 | static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, | |
2668 | struct request *rq) | |
2669 | { | |
2670 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
a3406352 | 2671 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
3f2304f8 SG |
2672 | struct nvme_command *c = &pdu->cmd; |
2673 | ||
2674 | c->common.flags |= NVME_CMD_SGL_METABUF; | |
2675 | ||
25e5cb78 SG |
2676 | if (!blk_rq_nr_phys_segments(rq)) |
2677 | nvme_tcp_set_sg_null(c); | |
2678 | else if (rq_data_dir(rq) == WRITE && | |
53ee9e29 | 2679 | req->data_len <= nvme_tcp_inline_data_size(req)) |
3f2304f8 SG |
2680 | nvme_tcp_set_sg_inline(queue, c, req->data_len); |
2681 | else | |
2682 | nvme_tcp_set_sg_host_data(c, req->data_len); | |
2683 | ||
2684 | return 0; | |
2685 | } | |
2686 | ||
2687 | static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, | |
2688 | struct request *rq) | |
2689 | { | |
2690 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
a3406352 | 2691 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
3f2304f8 SG |
2692 | struct nvme_tcp_queue *queue = req->queue; |
2693 | u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; | |
2694 | blk_status_t ret; | |
2695 | ||
f4b9e6c9 | 2696 | ret = nvme_setup_cmd(ns, rq); |
3f2304f8 SG |
2697 | if (ret) |
2698 | return ret; | |
2699 | ||
2700 | req->state = NVME_TCP_SEND_CMD_PDU; | |
1ba2e507 | 2701 | req->status = cpu_to_le16(NVME_SC_SUCCESS); |
3f2304f8 SG |
2702 | req->offset = 0; |
2703 | req->data_sent = 0; | |
2704 | req->pdu_len = 0; | |
2705 | req->pdu_sent = 0; | |
c2700d28 | 2706 | req->h2cdata_left = 0; |
25e5cb78 SG |
2707 | req->data_len = blk_rq_nr_phys_segments(rq) ? |
2708 | blk_rq_payload_bytes(rq) : 0; | |
3f2304f8 | 2709 | req->curr_bio = rq->bio; |
e11e5116 | 2710 | if (req->curr_bio && req->data_len) |
cb9b870f | 2711 | nvme_tcp_init_iter(req, rq_data_dir(rq)); |
3f2304f8 SG |
2712 | |
2713 | if (rq_data_dir(rq) == WRITE && | |
53ee9e29 | 2714 | req->data_len <= nvme_tcp_inline_data_size(req)) |
3f2304f8 | 2715 | req->pdu_len = req->data_len; |
3f2304f8 SG |
2716 | |
2717 | pdu->hdr.type = nvme_tcp_cmd; | |
2718 | pdu->hdr.flags = 0; | |
2719 | if (queue->hdr_digest) | |
2720 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
2721 | if (queue->data_digest && req->pdu_len) { | |
2722 | pdu->hdr.flags |= NVME_TCP_F_DDGST; | |
2723 | ddgst = nvme_tcp_ddgst_len(queue); | |
2724 | } | |
2725 | pdu->hdr.hlen = sizeof(*pdu); | |
2726 | pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0; | |
2727 | pdu->hdr.plen = | |
2728 | cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst); | |
2729 | ||
2730 | ret = nvme_tcp_map_data(queue, rq); | |
2731 | if (unlikely(ret)) { | |
28a4cac4 | 2732 | nvme_cleanup_cmd(rq); |
3f2304f8 SG |
2733 | dev_err(queue->ctrl->ctrl.device, |
2734 | "Failed to map data (%d)\n", ret); | |
2735 | return ret; | |
2736 | } | |
2737 | ||
2738 | return 0; | |
2739 | } | |
2740 | ||
86f0348a SG |
2741 | static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx) |
2742 | { | |
2743 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2744 | ||
2745 | if (!llist_empty(&queue->req_list)) | |
2746 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
2747 | } | |
2748 | ||
3f2304f8 SG |
2749 | static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, |
2750 | const struct blk_mq_queue_data *bd) | |
2751 | { | |
2752 | struct nvme_ns *ns = hctx->queue->queuedata; | |
2753 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2754 | struct request *rq = bd->rq; | |
2755 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
2756 | bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); | |
2757 | blk_status_t ret; | |
2758 | ||
a9715744 TC |
2759 | if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) |
2760 | return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); | |
3f2304f8 SG |
2761 | |
2762 | ret = nvme_tcp_setup_cmd_pdu(ns, rq); | |
2763 | if (unlikely(ret)) | |
2764 | return ret; | |
2765 | ||
6887fc64 | 2766 | nvme_start_request(rq); |
3f2304f8 | 2767 | |
674f872b | 2768 | nvme_tcp_queue_request(req, bd->last); |
3f2304f8 SG |
2769 | |
2770 | return BLK_STS_OK; | |
2771 | } | |
2772 | ||
a4e1d0b7 | 2773 | static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) |
873946f4 | 2774 | { |
06427ca0 | 2775 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); |
a249d306 KB |
2776 | |
2777 | nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); | |
873946f4 SG |
2778 | } |
2779 | ||
5a72e899 | 2780 | static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) |
1a9460ce SG |
2781 | { |
2782 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2783 | struct sock *sk = queue->sock->sk; | |
8c1624b6 | 2784 | int ret; |
1a9460ce | 2785 | |
f86e5bf8 SG |
2786 | if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) |
2787 | return 0; | |
2788 | ||
72e5d757 | 2789 | set_bit(NVME_TCP_Q_POLLING, &queue->flags); |
3f926af3 | 2790 | if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) |
1a9460ce | 2791 | sk_busy_loop(sk, true); |
8c1624b6 | 2792 | ret = nvme_tcp_try_recv(queue); |
72e5d757 | 2793 | clear_bit(NVME_TCP_Q_POLLING, &queue->flags); |
8c1624b6 | 2794 | return ret < 0 ? ret : queue->nr_cqe; |
1a9460ce SG |
2795 | } |
2796 | ||
02c57a82 MB |
2797 | static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) |
2798 | { | |
2799 | struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0]; | |
2800 | struct sockaddr_storage src_addr; | |
2801 | int ret, len; | |
2802 | ||
2803 | len = nvmf_get_address(ctrl, buf, size); | |
2804 | ||
782373ba HR |
2805 | if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) |
2806 | return len; | |
2807 | ||
76d54bf2 AM |
2808 | mutex_lock(&queue->queue_lock); |
2809 | ||
02c57a82 MB |
2810 | ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); |
2811 | if (ret > 0) { | |
2812 | if (len > 0) | |
2813 | len--; /* strip trailing newline */ | |
2814 | len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", | |
2815 | (len) ? "," : "", &src_addr); | |
2816 | } | |
782373ba | 2817 | |
76d54bf2 | 2818 | mutex_unlock(&queue->queue_lock); |
02c57a82 MB |
2819 | |
2820 | return len; | |
2821 | } | |
2822 | ||
6acbd961 | 2823 | static const struct blk_mq_ops nvme_tcp_mq_ops = { |
3f2304f8 | 2824 | .queue_rq = nvme_tcp_queue_rq, |
86f0348a | 2825 | .commit_rqs = nvme_tcp_commit_rqs, |
3f2304f8 SG |
2826 | .complete = nvme_complete_rq, |
2827 | .init_request = nvme_tcp_init_request, | |
2828 | .exit_request = nvme_tcp_exit_request, | |
2829 | .init_hctx = nvme_tcp_init_hctx, | |
2830 | .timeout = nvme_tcp_timeout, | |
873946f4 | 2831 | .map_queues = nvme_tcp_map_queues, |
1a9460ce | 2832 | .poll = nvme_tcp_poll, |
3f2304f8 SG |
2833 | }; |
2834 | ||
6acbd961 | 2835 | static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { |
3f2304f8 SG |
2836 | .queue_rq = nvme_tcp_queue_rq, |
2837 | .complete = nvme_complete_rq, | |
2838 | .init_request = nvme_tcp_init_request, | |
2839 | .exit_request = nvme_tcp_exit_request, | |
2840 | .init_hctx = nvme_tcp_init_admin_hctx, | |
2841 | .timeout = nvme_tcp_timeout, | |
2842 | }; | |
2843 | ||
2844 | static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { | |
2845 | .name = "tcp", | |
2846 | .module = THIS_MODULE, | |
db45e1a5 | 2847 | .flags = NVME_F_FABRICS | NVME_F_BLOCKING, |
3f2304f8 SG |
2848 | .reg_read32 = nvmf_reg_read32, |
2849 | .reg_read64 = nvmf_reg_read64, | |
2850 | .reg_write32 = nvmf_reg_write32, | |
210b1f65 | 2851 | .subsystem_reset = nvmf_subsystem_reset, |
3f2304f8 SG |
2852 | .free_ctrl = nvme_tcp_free_ctrl, |
2853 | .submit_async_event = nvme_tcp_submit_async_event, | |
2854 | .delete_ctrl = nvme_tcp_delete_ctrl, | |
02c57a82 | 2855 | .get_address = nvme_tcp_get_address, |
f7f70f4a | 2856 | .stop_ctrl = nvme_tcp_stop_ctrl, |
3f2304f8 SG |
2857 | }; |
2858 | ||
2859 | static bool | |
2860 | nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) | |
2861 | { | |
2862 | struct nvme_tcp_ctrl *ctrl; | |
2863 | bool found = false; | |
2864 | ||
2865 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
2866 | list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) { | |
2867 | found = nvmf_ip_options_match(&ctrl->ctrl, opts); | |
2868 | if (found) | |
2869 | break; | |
2870 | } | |
2871 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2872 | ||
2873 | return found; | |
2874 | } | |
2875 | ||
10fd7fb6 | 2876 | static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, |
3f2304f8 SG |
2877 | struct nvmf_ctrl_options *opts) |
2878 | { | |
2879 | struct nvme_tcp_ctrl *ctrl; | |
2880 | int ret; | |
2881 | ||
2882 | ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); | |
2883 | if (!ctrl) | |
2884 | return ERR_PTR(-ENOMEM); | |
2885 | ||
2886 | INIT_LIST_HEAD(&ctrl->list); | |
2887 | ctrl->ctrl.opts = opts; | |
1a9460ce SG |
2888 | ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + |
2889 | opts->nr_poll_queues + 1; | |
3f2304f8 SG |
2890 | ctrl->ctrl.sqsize = opts->queue_size - 1; |
2891 | ctrl->ctrl.kato = opts->kato; | |
2892 | ||
2893 | INIT_DELAYED_WORK(&ctrl->connect_work, | |
2894 | nvme_tcp_reconnect_ctrl_work); | |
2895 | INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); | |
2896 | INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); | |
2897 | ||
2898 | if (!(opts->mask & NVMF_OPT_TRSVCID)) { | |
2899 | opts->trsvcid = | |
2900 | kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL); | |
2901 | if (!opts->trsvcid) { | |
2902 | ret = -ENOMEM; | |
2903 | goto out_free_ctrl; | |
2904 | } | |
2905 | opts->mask |= NVMF_OPT_TRSVCID; | |
2906 | } | |
2907 | ||
2908 | ret = inet_pton_with_scope(&init_net, AF_UNSPEC, | |
2909 | opts->traddr, opts->trsvcid, &ctrl->addr); | |
2910 | if (ret) { | |
2911 | pr_err("malformed address passed: %s:%s\n", | |
2912 | opts->traddr, opts->trsvcid); | |
2913 | goto out_free_ctrl; | |
2914 | } | |
2915 | ||
2916 | if (opts->mask & NVMF_OPT_HOST_TRADDR) { | |
2917 | ret = inet_pton_with_scope(&init_net, AF_UNSPEC, | |
2918 | opts->host_traddr, NULL, &ctrl->src_addr); | |
2919 | if (ret) { | |
2920 | pr_err("malformed src address passed: %s\n", | |
2921 | opts->host_traddr); | |
2922 | goto out_free_ctrl; | |
2923 | } | |
2924 | } | |
2925 | ||
3ede8f72 | 2926 | if (opts->mask & NVMF_OPT_HOST_IFACE) { |
8b43ced6 | 2927 | if (!__dev_get_by_name(&init_net, opts->host_iface)) { |
3ede8f72 MB |
2928 | pr_err("invalid interface passed: %s\n", |
2929 | opts->host_iface); | |
2930 | ret = -ENODEV; | |
2931 | goto out_free_ctrl; | |
2932 | } | |
2933 | } | |
2934 | ||
3f2304f8 SG |
2935 | if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) { |
2936 | ret = -EALREADY; | |
2937 | goto out_free_ctrl; | |
2938 | } | |
2939 | ||
873946f4 | 2940 | ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), |
3f2304f8 SG |
2941 | GFP_KERNEL); |
2942 | if (!ctrl->queues) { | |
2943 | ret = -ENOMEM; | |
2944 | goto out_free_ctrl; | |
2945 | } | |
2946 | ||
2947 | ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0); | |
2948 | if (ret) | |
2949 | goto out_kfree_queues; | |
2950 | ||
10fd7fb6 KB |
2951 | return ctrl; |
2952 | out_kfree_queues: | |
2953 | kfree(ctrl->queues); | |
2954 | out_free_ctrl: | |
2955 | kfree(ctrl); | |
2956 | return ERR_PTR(ret); | |
2957 | } | |
2958 | ||
2959 | static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, | |
2960 | struct nvmf_ctrl_options *opts) | |
2961 | { | |
2962 | struct nvme_tcp_ctrl *ctrl; | |
2963 | int ret; | |
2964 | ||
2965 | ctrl = nvme_tcp_alloc_ctrl(dev, opts); | |
2966 | if (IS_ERR(ctrl)) | |
2967 | return ERR_CAST(ctrl); | |
2968 | ||
1a9e2181 KB |
2969 | ret = nvme_add_ctrl(&ctrl->ctrl); |
2970 | if (ret) | |
2971 | goto out_put_ctrl; | |
2972 | ||
3f2304f8 SG |
2973 | if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { |
2974 | WARN_ON_ONCE(1); | |
2975 | ret = -EINTR; | |
2976 | goto out_uninit_ctrl; | |
2977 | } | |
2978 | ||
2979 | ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true); | |
2980 | if (ret) | |
2981 | goto out_uninit_ctrl; | |
2982 | ||
524719b4 NY |
2983 | dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", |
2984 | nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); | |
3f2304f8 | 2985 | |
3f2304f8 SG |
2986 | mutex_lock(&nvme_tcp_ctrl_mutex); |
2987 | list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); | |
2988 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2989 | ||
2990 | return &ctrl->ctrl; | |
2991 | ||
2992 | out_uninit_ctrl: | |
2993 | nvme_uninit_ctrl(&ctrl->ctrl); | |
1a9e2181 | 2994 | out_put_ctrl: |
3f2304f8 SG |
2995 | nvme_put_ctrl(&ctrl->ctrl); |
2996 | if (ret > 0) | |
2997 | ret = -EIO; | |
2998 | return ERR_PTR(ret); | |
3f2304f8 SG |
2999 | } |
3000 | ||
3001 | static struct nvmf_transport_ops nvme_tcp_transport = { | |
3002 | .name = "tcp", | |
3003 | .module = THIS_MODULE, | |
3004 | .required_opts = NVMF_OPT_TRADDR, | |
3005 | .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | | |
3006 | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | | |
873946f4 | 3007 | NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | |
bb13985d | 3008 | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | |
adf22c52 | 3009 | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | |
e88a7595 | 3010 | NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT, |
3f2304f8 SG |
3011 | .create_ctrl = nvme_tcp_create_ctrl, |
3012 | }; | |
3013 | ||
3014 | static int __init nvme_tcp_init_module(void) | |
3015 | { | |
0c29f9fa | 3016 | unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS; |
32193789 | 3017 | int cpu; |
0c29f9fa | 3018 | |
7e87965d SG |
3019 | BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); |
3020 | BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); | |
3021 | BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); | |
3022 | BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24); | |
3023 | BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24); | |
3024 | BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128); | |
3025 | BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); | |
3026 | BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); | |
3027 | ||
0c29f9fa LF |
3028 | if (wq_unbound) |
3029 | wq_flags |= WQ_UNBOUND; | |
3030 | ||
3031 | nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0); | |
3f2304f8 SG |
3032 | if (!nvme_tcp_wq) |
3033 | return -ENOMEM; | |
3034 | ||
32193789 SG |
3035 | for_each_possible_cpu(cpu) |
3036 | atomic_set(&nvme_tcp_cpu_queues[cpu], 0); | |
3037 | ||
3f2304f8 SG |
3038 | nvmf_register_transport(&nvme_tcp_transport); |
3039 | return 0; | |
3040 | } | |
3041 | ||
3042 | static void __exit nvme_tcp_cleanup_module(void) | |
3043 | { | |
3044 | struct nvme_tcp_ctrl *ctrl; | |
3045 | ||
3046 | nvmf_unregister_transport(&nvme_tcp_transport); | |
3047 | ||
3048 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
3049 | list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) | |
3050 | nvme_delete_ctrl(&ctrl->ctrl); | |
3051 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
3052 | flush_workqueue(nvme_delete_wq); | |
3053 | ||
3054 | destroy_workqueue(nvme_tcp_wq); | |
3055 | } | |
3056 | ||
3057 | module_init(nvme_tcp_init_module); | |
3058 | module_exit(nvme_tcp_cleanup_module); | |
3059 | ||
92b0b0ff | 3060 | MODULE_DESCRIPTION("NVMe host TCP transport driver"); |
3f2304f8 | 3061 | MODULE_LICENSE("GPL v2"); |