Commit | Line | Data |
---|---|---|
3f2304f8 SG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * NVMe over Fabrics TCP host. | |
4 | * Copyright (c) 2018 Lightbits Labs. All rights reserved. | |
5 | */ | |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | #include <linux/module.h> | |
8 | #include <linux/init.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/err.h> | |
be8e82ca | 11 | #include <linux/key.h> |
3f2304f8 | 12 | #include <linux/nvme-tcp.h> |
be8e82ca | 13 | #include <linux/nvme-keyring.h> |
3f2304f8 SG |
14 | #include <net/sock.h> |
15 | #include <net/tcp.h> | |
be8e82ca | 16 | #include <net/tls.h> |
2837966a | 17 | #include <net/tls_prot.h> |
be8e82ca | 18 | #include <net/handshake.h> |
3f2304f8 SG |
19 | #include <linux/blk-mq.h> |
20 | #include <crypto/hash.h> | |
1a9460ce | 21 | #include <net/busy_poll.h> |
40e0b090 | 22 | #include <trace/events/sock.h> |
3f2304f8 SG |
23 | |
24 | #include "nvme.h" | |
25 | #include "fabrics.h" | |
26 | ||
27 | struct nvme_tcp_queue; | |
28 | ||
9912ade3 WM |
29 | /* Define the socket priority to use for connections were it is desirable |
30 | * that the NIC consider performing optimized packet processing or filtering. | |
31 | * A non-zero value being sufficient to indicate general consideration of any | |
32 | * possible optimization. Making it a module param allows for alternative | |
33 | * values that may be unique for some NIC implementations. | |
34 | */ | |
35 | static int so_priority; | |
36 | module_param(so_priority, int, 0644); | |
37 | MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); | |
38 | ||
0c29f9fa LF |
39 | /* |
40 | * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity | |
41 | * from sysfs. | |
42 | */ | |
43 | static bool wq_unbound; | |
44 | module_param(wq_unbound, bool, 0644); | |
45 | MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)"); | |
46 | ||
be8e82ca HR |
47 | /* |
48 | * TLS handshake timeout | |
49 | */ | |
50 | static int tls_handshake_timeout = 10; | |
0e6c4fe7 | 51 | #ifdef CONFIG_NVME_TCP_TLS |
be8e82ca HR |
52 | module_param(tls_handshake_timeout, int, 0644); |
53 | MODULE_PARM_DESC(tls_handshake_timeout, | |
54 | "nvme TLS handshake timeout in seconds (default 10)"); | |
55 | #endif | |
56 | ||
841aee4d CL |
57 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
58 | /* lockdep can detect a circular dependency of the form | |
59 | * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock | |
60 | * because dependencies are tracked for both nvme-tcp and user contexts. Using | |
61 | * a separate class prevents lockdep from conflating nvme-tcp socket use with | |
62 | * user-space socket API use. | |
63 | */ | |
64 | static struct lock_class_key nvme_tcp_sk_key[2]; | |
65 | static struct lock_class_key nvme_tcp_slock_key[2]; | |
66 | ||
67 | static void nvme_tcp_reclassify_socket(struct socket *sock) | |
68 | { | |
69 | struct sock *sk = sock->sk; | |
70 | ||
71 | if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) | |
72 | return; | |
73 | ||
74 | switch (sk->sk_family) { | |
75 | case AF_INET: | |
76 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", | |
77 | &nvme_tcp_slock_key[0], | |
78 | "sk_lock-AF_INET-NVME", | |
79 | &nvme_tcp_sk_key[0]); | |
80 | break; | |
81 | case AF_INET6: | |
82 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", | |
83 | &nvme_tcp_slock_key[1], | |
84 | "sk_lock-AF_INET6-NVME", | |
85 | &nvme_tcp_sk_key[1]); | |
86 | break; | |
87 | default: | |
88 | WARN_ON_ONCE(1); | |
89 | } | |
90 | } | |
91 | #else | |
92 | static void nvme_tcp_reclassify_socket(struct socket *sock) { } | |
93 | #endif | |
94 | ||
3f2304f8 SG |
95 | enum nvme_tcp_send_state { |
96 | NVME_TCP_SEND_CMD_PDU = 0, | |
97 | NVME_TCP_SEND_H2C_PDU, | |
98 | NVME_TCP_SEND_DATA, | |
99 | NVME_TCP_SEND_DDGST, | |
100 | }; | |
101 | ||
102 | struct nvme_tcp_request { | |
103 | struct nvme_request req; | |
104 | void *pdu; | |
105 | struct nvme_tcp_queue *queue; | |
106 | u32 data_len; | |
107 | u32 pdu_len; | |
108 | u32 pdu_sent; | |
c2700d28 VP |
109 | u32 h2cdata_left; |
110 | u32 h2cdata_offset; | |
3f2304f8 | 111 | u16 ttag; |
1ba2e507 | 112 | __le16 status; |
3f2304f8 | 113 | struct list_head entry; |
15ec928a | 114 | struct llist_node lentry; |
a7273d40 | 115 | __le32 ddgst; |
3f2304f8 SG |
116 | |
117 | struct bio *curr_bio; | |
118 | struct iov_iter iter; | |
119 | ||
120 | /* send state */ | |
121 | size_t offset; | |
122 | size_t data_sent; | |
123 | enum nvme_tcp_send_state state; | |
124 | }; | |
125 | ||
126 | enum nvme_tcp_queue_flags { | |
127 | NVME_TCP_Q_ALLOCATED = 0, | |
128 | NVME_TCP_Q_LIVE = 1, | |
72e5d757 | 129 | NVME_TCP_Q_POLLING = 2, |
3f2304f8 SG |
130 | }; |
131 | ||
132 | enum nvme_tcp_recv_state { | |
133 | NVME_TCP_RECV_PDU = 0, | |
134 | NVME_TCP_RECV_DATA, | |
135 | NVME_TCP_RECV_DDGST, | |
136 | }; | |
137 | ||
138 | struct nvme_tcp_ctrl; | |
139 | struct nvme_tcp_queue { | |
140 | struct socket *sock; | |
141 | struct work_struct io_work; | |
142 | int io_cpu; | |
143 | ||
9ebbfe49 | 144 | struct mutex queue_lock; |
db5ad6b7 | 145 | struct mutex send_mutex; |
15ec928a | 146 | struct llist_head req_list; |
3f2304f8 SG |
147 | struct list_head send_list; |
148 | ||
149 | /* recv state */ | |
150 | void *pdu; | |
151 | int pdu_remaining; | |
152 | int pdu_offset; | |
153 | size_t data_remaining; | |
154 | size_t ddgst_remaining; | |
1a9460ce | 155 | unsigned int nr_cqe; |
3f2304f8 SG |
156 | |
157 | /* send state */ | |
158 | struct nvme_tcp_request *request; | |
159 | ||
c2700d28 | 160 | u32 maxh2cdata; |
3f2304f8 SG |
161 | size_t cmnd_capsule_len; |
162 | struct nvme_tcp_ctrl *ctrl; | |
163 | unsigned long flags; | |
164 | bool rd_enabled; | |
165 | ||
166 | bool hdr_digest; | |
167 | bool data_digest; | |
36389576 | 168 | bool tls_enabled; |
3f2304f8 SG |
169 | struct ahash_request *rcv_hash; |
170 | struct ahash_request *snd_hash; | |
171 | __le32 exp_ddgst; | |
172 | __le32 recv_ddgst; | |
be8e82ca HR |
173 | struct completion tls_complete; |
174 | int tls_err; | |
3f2304f8 SG |
175 | struct page_frag_cache pf_cache; |
176 | ||
177 | void (*state_change)(struct sock *); | |
178 | void (*data_ready)(struct sock *); | |
179 | void (*write_space)(struct sock *); | |
180 | }; | |
181 | ||
182 | struct nvme_tcp_ctrl { | |
183 | /* read only in the hot path */ | |
184 | struct nvme_tcp_queue *queues; | |
185 | struct blk_mq_tag_set tag_set; | |
186 | ||
187 | /* other member variables */ | |
188 | struct list_head list; | |
189 | struct blk_mq_tag_set admin_tag_set; | |
190 | struct sockaddr_storage addr; | |
191 | struct sockaddr_storage src_addr; | |
192 | struct nvme_ctrl ctrl; | |
193 | ||
194 | struct work_struct err_work; | |
195 | struct delayed_work connect_work; | |
196 | struct nvme_tcp_request async_req; | |
64861993 | 197 | u32 io_queues[HCTX_MAX_TYPES]; |
3f2304f8 SG |
198 | }; |
199 | ||
200 | static LIST_HEAD(nvme_tcp_ctrl_list); | |
201 | static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); | |
202 | static struct workqueue_struct *nvme_tcp_wq; | |
6acbd961 RF |
203 | static const struct blk_mq_ops nvme_tcp_mq_ops; |
204 | static const struct blk_mq_ops nvme_tcp_admin_mq_ops; | |
db5ad6b7 | 205 | static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); |
3f2304f8 SG |
206 | |
207 | static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) | |
208 | { | |
209 | return container_of(ctrl, struct nvme_tcp_ctrl, ctrl); | |
210 | } | |
211 | ||
212 | static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) | |
213 | { | |
214 | return queue - queue->ctrl->queues; | |
215 | } | |
216 | ||
36389576 HR |
217 | /* |
218 | * Check if the queue is TLS encrypted | |
219 | */ | |
220 | static inline bool nvme_tcp_queue_tls(struct nvme_tcp_queue *queue) | |
221 | { | |
222 | if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) | |
223 | return 0; | |
224 | ||
225 | return queue->tls_enabled; | |
226 | } | |
227 | ||
228 | /* | |
229 | * Check if TLS is configured for the controller. | |
230 | */ | |
231 | static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) | |
0e6c4fe7 AB |
232 | { |
233 | if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) | |
234 | return 0; | |
235 | ||
236 | return ctrl->opts->tls; | |
237 | } | |
238 | ||
3f2304f8 SG |
239 | static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) |
240 | { | |
241 | u32 queue_idx = nvme_tcp_queue_id(queue); | |
242 | ||
243 | if (queue_idx == 0) | |
244 | return queue->ctrl->admin_tag_set.tags[queue_idx]; | |
245 | return queue->ctrl->tag_set.tags[queue_idx - 1]; | |
246 | } | |
247 | ||
248 | static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue) | |
249 | { | |
250 | return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
251 | } | |
252 | ||
253 | static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue) | |
254 | { | |
255 | return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
256 | } | |
257 | ||
a3406352 SG |
258 | static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req) |
259 | { | |
260 | return req->pdu; | |
261 | } | |
262 | ||
263 | static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req) | |
264 | { | |
265 | /* use the pdu space in the back for the data pdu */ | |
266 | return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) - | |
267 | sizeof(struct nvme_tcp_data_pdu); | |
268 | } | |
269 | ||
53ee9e29 | 270 | static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req) |
3f2304f8 | 271 | { |
53ee9e29 CS |
272 | if (nvme_is_fabrics(req->req.cmd)) |
273 | return NVME_TCP_ADMIN_CCSZ; | |
274 | return req->queue->cmnd_capsule_len - sizeof(struct nvme_command); | |
3f2304f8 SG |
275 | } |
276 | ||
277 | static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) | |
278 | { | |
279 | return req == &req->queue->ctrl->async_req; | |
280 | } | |
281 | ||
282 | static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) | |
283 | { | |
284 | struct request *rq; | |
3f2304f8 SG |
285 | |
286 | if (unlikely(nvme_tcp_async_req(req))) | |
287 | return false; /* async events don't have a request */ | |
288 | ||
289 | rq = blk_mq_rq_from_pdu(req); | |
3f2304f8 | 290 | |
25e5cb78 | 291 | return rq_data_dir(rq) == WRITE && req->data_len && |
53ee9e29 | 292 | req->data_len <= nvme_tcp_inline_data_size(req); |
3f2304f8 SG |
293 | } |
294 | ||
295 | static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) | |
296 | { | |
297 | return req->iter.bvec->bv_page; | |
298 | } | |
299 | ||
300 | static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) | |
301 | { | |
302 | return req->iter.bvec->bv_offset + req->iter.iov_offset; | |
303 | } | |
304 | ||
305 | static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) | |
306 | { | |
ca1ff67d | 307 | return min_t(size_t, iov_iter_single_seg_count(&req->iter), |
3f2304f8 SG |
308 | req->pdu_len - req->pdu_sent); |
309 | } | |
310 | ||
3f2304f8 SG |
311 | static inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req) |
312 | { | |
313 | return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ? | |
314 | req->pdu_len - req->pdu_sent : 0; | |
315 | } | |
316 | ||
317 | static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, | |
318 | int len) | |
319 | { | |
320 | return nvme_tcp_pdu_data_left(req) <= len; | |
321 | } | |
322 | ||
323 | static void nvme_tcp_init_iter(struct nvme_tcp_request *req, | |
324 | unsigned int dir) | |
325 | { | |
326 | struct request *rq = blk_mq_rq_from_pdu(req); | |
327 | struct bio_vec *vec; | |
328 | unsigned int size; | |
0dc9edaf | 329 | int nr_bvec; |
3f2304f8 SG |
330 | size_t offset; |
331 | ||
332 | if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { | |
333 | vec = &rq->special_vec; | |
0dc9edaf | 334 | nr_bvec = 1; |
3f2304f8 SG |
335 | size = blk_rq_payload_bytes(rq); |
336 | offset = 0; | |
337 | } else { | |
338 | struct bio *bio = req->curr_bio; | |
0dc9edaf SG |
339 | struct bvec_iter bi; |
340 | struct bio_vec bv; | |
3f2304f8 SG |
341 | |
342 | vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); | |
0dc9edaf SG |
343 | nr_bvec = 0; |
344 | bio_for_each_bvec(bv, bio, bi) { | |
345 | nr_bvec++; | |
346 | } | |
3f2304f8 SG |
347 | size = bio->bi_iter.bi_size; |
348 | offset = bio->bi_iter.bi_bvec_done; | |
349 | } | |
350 | ||
0dc9edaf | 351 | iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size); |
3f2304f8 SG |
352 | req->iter.iov_offset = offset; |
353 | } | |
354 | ||
355 | static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, | |
356 | int len) | |
357 | { | |
358 | req->data_sent += len; | |
359 | req->pdu_sent += len; | |
360 | iov_iter_advance(&req->iter, len); | |
361 | if (!iov_iter_count(&req->iter) && | |
362 | req->data_sent < req->data_len) { | |
363 | req->curr_bio = req->curr_bio->bi_next; | |
de4eda9d | 364 | nvme_tcp_init_iter(req, ITER_SOURCE); |
3f2304f8 SG |
365 | } |
366 | } | |
367 | ||
5c11f7d9 SG |
368 | static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) |
369 | { | |
370 | int ret; | |
371 | ||
372 | /* drain the send queue as much as we can... */ | |
373 | do { | |
374 | ret = nvme_tcp_try_send(queue); | |
375 | } while (ret > 0); | |
376 | } | |
377 | ||
50abcc17 | 378 | static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue) |
70f437fb KB |
379 | { |
380 | return !list_empty(&queue->send_list) || | |
3770a42b | 381 | !llist_empty(&queue->req_list); |
70f437fb KB |
382 | } |
383 | ||
50abcc17 HR |
384 | static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) |
385 | { | |
36389576 | 386 | return !nvme_tcp_queue_tls(queue) && |
50abcc17 HR |
387 | nvme_tcp_queue_has_pending(queue); |
388 | } | |
389 | ||
db5ad6b7 | 390 | static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, |
86f0348a | 391 | bool sync, bool last) |
3f2304f8 SG |
392 | { |
393 | struct nvme_tcp_queue *queue = req->queue; | |
db5ad6b7 | 394 | bool empty; |
3f2304f8 | 395 | |
15ec928a SG |
396 | empty = llist_add(&req->lentry, &queue->req_list) && |
397 | list_empty(&queue->send_list) && !queue->request; | |
3f2304f8 | 398 | |
db5ad6b7 SG |
399 | /* |
400 | * if we're the first on the send_list and we can try to send | |
401 | * directly, otherwise queue io_work. Also, only do that if we | |
402 | * are on the same cpu, so we don't introduce contention. | |
403 | */ | |
bb833370 | 404 | if (queue->io_cpu == raw_smp_processor_id() && |
db5ad6b7 | 405 | sync && empty && mutex_trylock(&queue->send_mutex)) { |
5c11f7d9 | 406 | nvme_tcp_send_all(queue); |
db5ad6b7 | 407 | mutex_unlock(&queue->send_mutex); |
db5ad6b7 | 408 | } |
70f437fb | 409 | |
50abcc17 | 410 | if (last && nvme_tcp_queue_has_pending(queue)) |
70f437fb | 411 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); |
3f2304f8 SG |
412 | } |
413 | ||
15ec928a SG |
414 | static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) |
415 | { | |
416 | struct nvme_tcp_request *req; | |
417 | struct llist_node *node; | |
418 | ||
419 | for (node = llist_del_all(&queue->req_list); node; node = node->next) { | |
420 | req = llist_entry(node, struct nvme_tcp_request, lentry); | |
421 | list_add(&req->entry, &queue->send_list); | |
422 | } | |
423 | } | |
424 | ||
3f2304f8 SG |
425 | static inline struct nvme_tcp_request * |
426 | nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) | |
427 | { | |
428 | struct nvme_tcp_request *req; | |
429 | ||
3f2304f8 SG |
430 | req = list_first_entry_or_null(&queue->send_list, |
431 | struct nvme_tcp_request, entry); | |
15ec928a SG |
432 | if (!req) { |
433 | nvme_tcp_process_req_list(queue); | |
434 | req = list_first_entry_or_null(&queue->send_list, | |
435 | struct nvme_tcp_request, entry); | |
436 | if (unlikely(!req)) | |
437 | return NULL; | |
438 | } | |
3f2304f8 | 439 | |
15ec928a | 440 | list_del(&req->entry); |
3f2304f8 SG |
441 | return req; |
442 | } | |
443 | ||
a7273d40 CH |
444 | static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, |
445 | __le32 *dgst) | |
3f2304f8 SG |
446 | { |
447 | ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); | |
448 | crypto_ahash_final(hash); | |
449 | } | |
450 | ||
451 | static inline void nvme_tcp_ddgst_update(struct ahash_request *hash, | |
452 | struct page *page, off_t off, size_t len) | |
453 | { | |
454 | struct scatterlist sg; | |
455 | ||
5fa9add6 | 456 | sg_init_table(&sg, 1); |
3f2304f8 SG |
457 | sg_set_page(&sg, page, len, off); |
458 | ahash_request_set_crypt(hash, &sg, NULL, len); | |
459 | crypto_ahash_update(hash); | |
460 | } | |
461 | ||
462 | static inline void nvme_tcp_hdgst(struct ahash_request *hash, | |
463 | void *pdu, size_t len) | |
464 | { | |
465 | struct scatterlist sg; | |
466 | ||
467 | sg_init_one(&sg, pdu, len); | |
468 | ahash_request_set_crypt(hash, &sg, pdu + len, len); | |
469 | crypto_ahash_digest(hash); | |
470 | } | |
471 | ||
472 | static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, | |
473 | void *pdu, size_t pdu_len) | |
474 | { | |
475 | struct nvme_tcp_hdr *hdr = pdu; | |
476 | __le32 recv_digest; | |
477 | __le32 exp_digest; | |
478 | ||
479 | if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { | |
480 | dev_err(queue->ctrl->ctrl.device, | |
481 | "queue %d: header digest flag is cleared\n", | |
482 | nvme_tcp_queue_id(queue)); | |
483 | return -EPROTO; | |
484 | } | |
485 | ||
486 | recv_digest = *(__le32 *)(pdu + hdr->hlen); | |
487 | nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len); | |
488 | exp_digest = *(__le32 *)(pdu + hdr->hlen); | |
489 | if (recv_digest != exp_digest) { | |
490 | dev_err(queue->ctrl->ctrl.device, | |
491 | "header digest error: recv %#x expected %#x\n", | |
492 | le32_to_cpu(recv_digest), le32_to_cpu(exp_digest)); | |
493 | return -EIO; | |
494 | } | |
495 | ||
496 | return 0; | |
497 | } | |
498 | ||
499 | static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) | |
500 | { | |
501 | struct nvme_tcp_hdr *hdr = pdu; | |
502 | u8 digest_len = nvme_tcp_hdgst_len(queue); | |
503 | u32 len; | |
504 | ||
505 | len = le32_to_cpu(hdr->plen) - hdr->hlen - | |
506 | ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0); | |
507 | ||
508 | if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { | |
509 | dev_err(queue->ctrl->ctrl.device, | |
510 | "queue %d: data digest flag is cleared\n", | |
511 | nvme_tcp_queue_id(queue)); | |
512 | return -EPROTO; | |
513 | } | |
514 | crypto_ahash_init(queue->rcv_hash); | |
515 | ||
516 | return 0; | |
517 | } | |
518 | ||
519 | static void nvme_tcp_exit_request(struct blk_mq_tag_set *set, | |
520 | struct request *rq, unsigned int hctx_idx) | |
521 | { | |
522 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
523 | ||
524 | page_frag_free(req->pdu); | |
525 | } | |
526 | ||
527 | static int nvme_tcp_init_request(struct blk_mq_tag_set *set, | |
528 | struct request *rq, unsigned int hctx_idx, | |
529 | unsigned int numa_node) | |
530 | { | |
06427ca0 | 531 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); |
3f2304f8 | 532 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
f4b9e6c9 | 533 | struct nvme_tcp_cmd_pdu *pdu; |
3f2304f8 SG |
534 | int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; |
535 | struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; | |
536 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
537 | ||
538 | req->pdu = page_frag_alloc(&queue->pf_cache, | |
539 | sizeof(struct nvme_tcp_cmd_pdu) + hdgst, | |
540 | GFP_KERNEL | __GFP_ZERO); | |
541 | if (!req->pdu) | |
542 | return -ENOMEM; | |
543 | ||
f4b9e6c9 | 544 | pdu = req->pdu; |
3f2304f8 SG |
545 | req->queue = queue; |
546 | nvme_req(rq)->ctrl = &ctrl->ctrl; | |
f4b9e6c9 | 547 | nvme_req(rq)->cmd = &pdu->cmd; |
3f2304f8 SG |
548 | |
549 | return 0; | |
550 | } | |
551 | ||
552 | static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | |
553 | unsigned int hctx_idx) | |
554 | { | |
06427ca0 | 555 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); |
3f2304f8 SG |
556 | struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; |
557 | ||
558 | hctx->driver_data = queue; | |
559 | return 0; | |
560 | } | |
561 | ||
562 | static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, | |
563 | unsigned int hctx_idx) | |
564 | { | |
06427ca0 | 565 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); |
3f2304f8 SG |
566 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; |
567 | ||
568 | hctx->driver_data = queue; | |
569 | return 0; | |
570 | } | |
571 | ||
572 | static enum nvme_tcp_recv_state | |
573 | nvme_tcp_recv_state(struct nvme_tcp_queue *queue) | |
574 | { | |
575 | return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU : | |
576 | (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST : | |
577 | NVME_TCP_RECV_DATA; | |
578 | } | |
579 | ||
580 | static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) | |
581 | { | |
582 | queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) + | |
583 | nvme_tcp_hdgst_len(queue); | |
584 | queue->pdu_offset = 0; | |
585 | queue->data_remaining = -1; | |
586 | queue->ddgst_remaining = 0; | |
587 | } | |
588 | ||
589 | static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) | |
590 | { | |
591 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) | |
592 | return; | |
593 | ||
236187c4 | 594 | dev_warn(ctrl->device, "starting error recovery\n"); |
97b2512a | 595 | queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); |
3f2304f8 SG |
596 | } |
597 | ||
598 | static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, | |
599 | struct nvme_completion *cqe) | |
600 | { | |
1ba2e507 | 601 | struct nvme_tcp_request *req; |
3f2304f8 SG |
602 | struct request *rq; |
603 | ||
e7006de6 | 604 | rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); |
3f2304f8 SG |
605 | if (!rq) { |
606 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
607 | "got bad cqe.command_id %#x on queue %d\n", |
608 | cqe->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
609 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); |
610 | return -EINVAL; | |
611 | } | |
612 | ||
1ba2e507 DW |
613 | req = blk_mq_rq_to_pdu(rq); |
614 | if (req->status == cpu_to_le16(NVME_SC_SUCCESS)) | |
615 | req->status = cqe->status; | |
616 | ||
617 | if (!nvme_try_complete_req(rq, req->status, cqe->result)) | |
ff029451 | 618 | nvme_complete_rq(rq); |
1a9460ce | 619 | queue->nr_cqe++; |
3f2304f8 SG |
620 | |
621 | return 0; | |
622 | } | |
623 | ||
624 | static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, | |
625 | struct nvme_tcp_data_pdu *pdu) | |
626 | { | |
627 | struct request *rq; | |
628 | ||
e7006de6 | 629 | rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3f2304f8 SG |
630 | if (!rq) { |
631 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
632 | "got bad c2hdata.command_id %#x on queue %d\n", |
633 | pdu->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
634 | return -ENOENT; |
635 | } | |
636 | ||
637 | if (!blk_rq_payload_bytes(rq)) { | |
638 | dev_err(queue->ctrl->ctrl.device, | |
639 | "queue %d tag %#x unexpected data\n", | |
640 | nvme_tcp_queue_id(queue), rq->tag); | |
641 | return -EIO; | |
642 | } | |
643 | ||
644 | queue->data_remaining = le32_to_cpu(pdu->data_length); | |
645 | ||
602d674c SG |
646 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS && |
647 | unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) { | |
648 | dev_err(queue->ctrl->ctrl.device, | |
649 | "queue %d tag %#x SUCCESS set but not last PDU\n", | |
650 | nvme_tcp_queue_id(queue), rq->tag); | |
651 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); | |
652 | return -EPROTO; | |
653 | } | |
654 | ||
3f2304f8 | 655 | return 0; |
3f2304f8 SG |
656 | } |
657 | ||
658 | static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, | |
659 | struct nvme_tcp_rsp_pdu *pdu) | |
660 | { | |
661 | struct nvme_completion *cqe = &pdu->cqe; | |
662 | int ret = 0; | |
663 | ||
664 | /* | |
665 | * AEN requests are special as they don't time out and can | |
666 | * survive any kind of queue freeze and often don't respond to | |
667 | * aborts. We don't even bother to allocate a struct request | |
668 | * for them but rather special case them here. | |
669 | */ | |
58a8df67 IR |
670 | if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue), |
671 | cqe->command_id))) | |
3f2304f8 SG |
672 | nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, |
673 | &cqe->result); | |
674 | else | |
675 | ret = nvme_tcp_process_nvme_cqe(queue, cqe); | |
676 | ||
677 | return ret; | |
678 | } | |
679 | ||
c2700d28 | 680 | static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) |
3f2304f8 | 681 | { |
a3406352 | 682 | struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req); |
3f2304f8 SG |
683 | struct nvme_tcp_queue *queue = req->queue; |
684 | struct request *rq = blk_mq_rq_from_pdu(req); | |
c2700d28 | 685 | u32 h2cdata_sent = req->pdu_len; |
3f2304f8 SG |
686 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
687 | u8 ddgst = nvme_tcp_ddgst_len(queue); | |
688 | ||
1d3ef9c3 VP |
689 | req->state = NVME_TCP_SEND_H2C_PDU; |
690 | req->offset = 0; | |
c2700d28 | 691 | req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); |
3f2304f8 | 692 | req->pdu_sent = 0; |
c2700d28 VP |
693 | req->h2cdata_left -= req->pdu_len; |
694 | req->h2cdata_offset += h2cdata_sent; | |
3f2304f8 | 695 | |
3f2304f8 SG |
696 | memset(data, 0, sizeof(*data)); |
697 | data->hdr.type = nvme_tcp_h2c_data; | |
c2700d28 VP |
698 | if (!req->h2cdata_left) |
699 | data->hdr.flags = NVME_TCP_F_DATA_LAST; | |
3f2304f8 SG |
700 | if (queue->hdr_digest) |
701 | data->hdr.flags |= NVME_TCP_F_HDGST; | |
702 | if (queue->data_digest) | |
703 | data->hdr.flags |= NVME_TCP_F_DDGST; | |
704 | data->hdr.hlen = sizeof(*data); | |
705 | data->hdr.pdo = data->hdr.hlen + hdgst; | |
706 | data->hdr.plen = | |
707 | cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); | |
c2700d28 | 708 | data->ttag = req->ttag; |
e7006de6 | 709 | data->command_id = nvme_cid(rq); |
c2700d28 | 710 | data->data_offset = cpu_to_le32(req->h2cdata_offset); |
3f2304f8 | 711 | data->data_length = cpu_to_le32(req->pdu_len); |
3f2304f8 SG |
712 | } |
713 | ||
714 | static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, | |
715 | struct nvme_tcp_r2t_pdu *pdu) | |
716 | { | |
717 | struct nvme_tcp_request *req; | |
718 | struct request *rq; | |
1d3ef9c3 | 719 | u32 r2t_length = le32_to_cpu(pdu->r2t_length); |
c2700d28 | 720 | u32 r2t_offset = le32_to_cpu(pdu->r2t_offset); |
3f2304f8 | 721 | |
e7006de6 | 722 | rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3f2304f8 SG |
723 | if (!rq) { |
724 | dev_err(queue->ctrl->ctrl.device, | |
e7006de6 SG |
725 | "got bad r2t.command_id %#x on queue %d\n", |
726 | pdu->command_id, nvme_tcp_queue_id(queue)); | |
3f2304f8 SG |
727 | return -ENOENT; |
728 | } | |
729 | req = blk_mq_rq_to_pdu(rq); | |
730 | ||
1d3ef9c3 VP |
731 | if (unlikely(!r2t_length)) { |
732 | dev_err(queue->ctrl->ctrl.device, | |
733 | "req %d r2t len is %u, probably a bug...\n", | |
734 | rq->tag, r2t_length); | |
735 | return -EPROTO; | |
736 | } | |
3f2304f8 | 737 | |
1d3ef9c3 VP |
738 | if (unlikely(req->data_sent + r2t_length > req->data_len)) { |
739 | dev_err(queue->ctrl->ctrl.device, | |
740 | "req %d r2t len %u exceeded data len %u (%zu sent)\n", | |
741 | rq->tag, r2t_length, req->data_len, req->data_sent); | |
742 | return -EPROTO; | |
743 | } | |
744 | ||
c2700d28 | 745 | if (unlikely(r2t_offset < req->data_sent)) { |
1d3ef9c3 VP |
746 | dev_err(queue->ctrl->ctrl.device, |
747 | "req %d unexpected r2t offset %u (expected %zu)\n", | |
c2700d28 | 748 | rq->tag, r2t_offset, req->data_sent); |
1d3ef9c3 VP |
749 | return -EPROTO; |
750 | } | |
3f2304f8 | 751 | |
c2700d28 VP |
752 | req->pdu_len = 0; |
753 | req->h2cdata_left = r2t_length; | |
754 | req->h2cdata_offset = r2t_offset; | |
755 | req->ttag = pdu->ttag; | |
756 | ||
757 | nvme_tcp_setup_h2c_data_pdu(req); | |
86f0348a | 758 | nvme_tcp_queue_request(req, false, true); |
3f2304f8 SG |
759 | |
760 | return 0; | |
761 | } | |
762 | ||
763 | static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, | |
764 | unsigned int *offset, size_t *len) | |
765 | { | |
766 | struct nvme_tcp_hdr *hdr; | |
767 | char *pdu = queue->pdu; | |
768 | size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining); | |
769 | int ret; | |
770 | ||
771 | ret = skb_copy_bits(skb, *offset, | |
772 | &pdu[queue->pdu_offset], rcv_len); | |
773 | if (unlikely(ret)) | |
774 | return ret; | |
775 | ||
776 | queue->pdu_remaining -= rcv_len; | |
777 | queue->pdu_offset += rcv_len; | |
778 | *offset += rcv_len; | |
779 | *len -= rcv_len; | |
780 | if (queue->pdu_remaining) | |
781 | return 0; | |
782 | ||
783 | hdr = queue->pdu; | |
784 | if (queue->hdr_digest) { | |
785 | ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); | |
786 | if (unlikely(ret)) | |
787 | return ret; | |
788 | } | |
789 | ||
790 | ||
791 | if (queue->data_digest) { | |
792 | ret = nvme_tcp_check_ddgst(queue, queue->pdu); | |
793 | if (unlikely(ret)) | |
794 | return ret; | |
795 | } | |
796 | ||
797 | switch (hdr->type) { | |
798 | case nvme_tcp_c2h_data: | |
6be18260 | 799 | return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu); |
3f2304f8 SG |
800 | case nvme_tcp_rsp: |
801 | nvme_tcp_init_recv_ctx(queue); | |
6be18260 | 802 | return nvme_tcp_handle_comp(queue, (void *)queue->pdu); |
3f2304f8 SG |
803 | case nvme_tcp_r2t: |
804 | nvme_tcp_init_recv_ctx(queue); | |
6be18260 | 805 | return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); |
3f2304f8 SG |
806 | default: |
807 | dev_err(queue->ctrl->ctrl.device, | |
808 | "unsupported pdu type (%d)\n", hdr->type); | |
809 | return -EINVAL; | |
810 | } | |
3f2304f8 SG |
811 | } |
812 | ||
988aef9e | 813 | static inline void nvme_tcp_end_request(struct request *rq, u16 status) |
602d674c SG |
814 | { |
815 | union nvme_result res = {}; | |
816 | ||
2eb81a33 | 817 | if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res)) |
ff029451 | 818 | nvme_complete_rq(rq); |
602d674c SG |
819 | } |
820 | ||
3f2304f8 SG |
821 | static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, |
822 | unsigned int *offset, size_t *len) | |
823 | { | |
824 | struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; | |
3b01a9d0 | 825 | struct request *rq = |
e7006de6 | 826 | nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); |
3b01a9d0 | 827 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
3f2304f8 SG |
828 | |
829 | while (true) { | |
830 | int recv_len, ret; | |
831 | ||
832 | recv_len = min_t(size_t, *len, queue->data_remaining); | |
833 | if (!recv_len) | |
834 | break; | |
835 | ||
836 | if (!iov_iter_count(&req->iter)) { | |
837 | req->curr_bio = req->curr_bio->bi_next; | |
838 | ||
839 | /* | |
840 | * If we don`t have any bios it means that controller | |
841 | * sent more data than we requested, hence error | |
842 | */ | |
843 | if (!req->curr_bio) { | |
844 | dev_err(queue->ctrl->ctrl.device, | |
845 | "queue %d no space in request %#x", | |
846 | nvme_tcp_queue_id(queue), rq->tag); | |
847 | nvme_tcp_init_recv_ctx(queue); | |
848 | return -EIO; | |
849 | } | |
de4eda9d | 850 | nvme_tcp_init_iter(req, ITER_DEST); |
3f2304f8 SG |
851 | } |
852 | ||
853 | /* we can read only from what is left in this bio */ | |
854 | recv_len = min_t(size_t, recv_len, | |
855 | iov_iter_count(&req->iter)); | |
856 | ||
857 | if (queue->data_digest) | |
858 | ret = skb_copy_and_hash_datagram_iter(skb, *offset, | |
859 | &req->iter, recv_len, queue->rcv_hash); | |
860 | else | |
861 | ret = skb_copy_datagram_iter(skb, *offset, | |
862 | &req->iter, recv_len); | |
863 | if (ret) { | |
864 | dev_err(queue->ctrl->ctrl.device, | |
865 | "queue %d failed to copy request %#x data", | |
866 | nvme_tcp_queue_id(queue), rq->tag); | |
867 | return ret; | |
868 | } | |
869 | ||
870 | *len -= recv_len; | |
871 | *offset += recv_len; | |
872 | queue->data_remaining -= recv_len; | |
873 | } | |
874 | ||
875 | if (!queue->data_remaining) { | |
876 | if (queue->data_digest) { | |
877 | nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); | |
878 | queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; | |
879 | } else { | |
1a9460ce | 880 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { |
1ba2e507 DW |
881 | nvme_tcp_end_request(rq, |
882 | le16_to_cpu(req->status)); | |
1a9460ce SG |
883 | queue->nr_cqe++; |
884 | } | |
3f2304f8 SG |
885 | nvme_tcp_init_recv_ctx(queue); |
886 | } | |
887 | } | |
888 | ||
889 | return 0; | |
890 | } | |
891 | ||
892 | static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, | |
893 | struct sk_buff *skb, unsigned int *offset, size_t *len) | |
894 | { | |
602d674c | 895 | struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; |
3f2304f8 SG |
896 | char *ddgst = (char *)&queue->recv_ddgst; |
897 | size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); | |
898 | off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; | |
899 | int ret; | |
900 | ||
901 | ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); | |
902 | if (unlikely(ret)) | |
903 | return ret; | |
904 | ||
905 | queue->ddgst_remaining -= recv_len; | |
906 | *offset += recv_len; | |
907 | *len -= recv_len; | |
908 | if (queue->ddgst_remaining) | |
909 | return 0; | |
910 | ||
911 | if (queue->recv_ddgst != queue->exp_ddgst) { | |
1ba2e507 DW |
912 | struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), |
913 | pdu->command_id); | |
914 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
915 | ||
916 | req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); | |
917 | ||
3f2304f8 SG |
918 | dev_err(queue->ctrl->ctrl.device, |
919 | "data digest error: recv %#x expected %#x\n", | |
920 | le32_to_cpu(queue->recv_ddgst), | |
921 | le32_to_cpu(queue->exp_ddgst)); | |
3f2304f8 SG |
922 | } |
923 | ||
602d674c | 924 | if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { |
e7006de6 SG |
925 | struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), |
926 | pdu->command_id); | |
1ba2e507 | 927 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); |
602d674c | 928 | |
1ba2e507 | 929 | nvme_tcp_end_request(rq, le16_to_cpu(req->status)); |
1a9460ce | 930 | queue->nr_cqe++; |
602d674c SG |
931 | } |
932 | ||
3f2304f8 SG |
933 | nvme_tcp_init_recv_ctx(queue); |
934 | return 0; | |
935 | } | |
936 | ||
937 | static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, | |
938 | unsigned int offset, size_t len) | |
939 | { | |
940 | struct nvme_tcp_queue *queue = desc->arg.data; | |
941 | size_t consumed = len; | |
942 | int result; | |
943 | ||
aeacfcef CL |
944 | if (unlikely(!queue->rd_enabled)) |
945 | return -EFAULT; | |
946 | ||
3f2304f8 SG |
947 | while (len) { |
948 | switch (nvme_tcp_recv_state(queue)) { | |
949 | case NVME_TCP_RECV_PDU: | |
950 | result = nvme_tcp_recv_pdu(queue, skb, &offset, &len); | |
951 | break; | |
952 | case NVME_TCP_RECV_DATA: | |
953 | result = nvme_tcp_recv_data(queue, skb, &offset, &len); | |
954 | break; | |
955 | case NVME_TCP_RECV_DDGST: | |
956 | result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len); | |
957 | break; | |
958 | default: | |
959 | result = -EFAULT; | |
960 | } | |
961 | if (result) { | |
962 | dev_err(queue->ctrl->ctrl.device, | |
963 | "receive failed: %d\n", result); | |
964 | queue->rd_enabled = false; | |
965 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); | |
966 | return result; | |
967 | } | |
968 | } | |
969 | ||
970 | return consumed; | |
971 | } | |
972 | ||
973 | static void nvme_tcp_data_ready(struct sock *sk) | |
974 | { | |
975 | struct nvme_tcp_queue *queue; | |
976 | ||
40e0b090 PY |
977 | trace_sk_data_ready(sk); |
978 | ||
386e5e6e | 979 | read_lock_bh(&sk->sk_callback_lock); |
3f2304f8 | 980 | queue = sk->sk_user_data; |
72e5d757 SG |
981 | if (likely(queue && queue->rd_enabled) && |
982 | !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) | |
3f2304f8 | 983 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); |
386e5e6e | 984 | read_unlock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
985 | } |
986 | ||
987 | static void nvme_tcp_write_space(struct sock *sk) | |
988 | { | |
989 | struct nvme_tcp_queue *queue; | |
990 | ||
991 | read_lock_bh(&sk->sk_callback_lock); | |
992 | queue = sk->sk_user_data; | |
993 | if (likely(queue && sk_stream_is_writeable(sk))) { | |
994 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
995 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
996 | } | |
997 | read_unlock_bh(&sk->sk_callback_lock); | |
998 | } | |
999 | ||
1000 | static void nvme_tcp_state_change(struct sock *sk) | |
1001 | { | |
1002 | struct nvme_tcp_queue *queue; | |
1003 | ||
8b73b45d | 1004 | read_lock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
1005 | queue = sk->sk_user_data; |
1006 | if (!queue) | |
1007 | goto done; | |
1008 | ||
1009 | switch (sk->sk_state) { | |
1010 | case TCP_CLOSE: | |
1011 | case TCP_CLOSE_WAIT: | |
1012 | case TCP_LAST_ACK: | |
1013 | case TCP_FIN_WAIT1: | |
1014 | case TCP_FIN_WAIT2: | |
3f2304f8 SG |
1015 | nvme_tcp_error_recovery(&queue->ctrl->ctrl); |
1016 | break; | |
1017 | default: | |
1018 | dev_info(queue->ctrl->ctrl.device, | |
1019 | "queue %d socket state %d\n", | |
1020 | nvme_tcp_queue_id(queue), sk->sk_state); | |
1021 | } | |
1022 | ||
1023 | queue->state_change(sk); | |
1024 | done: | |
8b73b45d | 1025 | read_unlock_bh(&sk->sk_callback_lock); |
3f2304f8 SG |
1026 | } |
1027 | ||
1028 | static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) | |
1029 | { | |
1030 | queue->request = NULL; | |
1031 | } | |
1032 | ||
1033 | static void nvme_tcp_fail_request(struct nvme_tcp_request *req) | |
1034 | { | |
63573807 SG |
1035 | if (nvme_tcp_async_req(req)) { |
1036 | union nvme_result res = {}; | |
1037 | ||
1038 | nvme_complete_async_event(&req->queue->ctrl->ctrl, | |
1039 | cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); | |
1040 | } else { | |
1041 | nvme_tcp_end_request(blk_mq_rq_from_pdu(req), | |
1042 | NVME_SC_HOST_PATH_ERROR); | |
1043 | } | |
3f2304f8 SG |
1044 | } |
1045 | ||
1046 | static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) | |
1047 | { | |
1048 | struct nvme_tcp_queue *queue = req->queue; | |
25e1f67e | 1049 | int req_data_len = req->data_len; |
c2700d28 | 1050 | u32 h2cdata_left = req->h2cdata_left; |
3f2304f8 SG |
1051 | |
1052 | while (true) { | |
77698878 DH |
1053 | struct bio_vec bvec; |
1054 | struct msghdr msg = { | |
1055 | .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, | |
1056 | }; | |
3f2304f8 SG |
1057 | struct page *page = nvme_tcp_req_cur_page(req); |
1058 | size_t offset = nvme_tcp_req_cur_offset(req); | |
1059 | size_t len = nvme_tcp_req_cur_length(req); | |
1060 | bool last = nvme_tcp_pdu_last_send(req, len); | |
25e1f67e | 1061 | int req_data_sent = req->data_sent; |
77698878 | 1062 | int ret; |
3f2304f8 | 1063 | |
122e5b9f | 1064 | if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) |
77698878 | 1065 | msg.msg_flags |= MSG_EOR; |
3f2304f8 | 1066 | else |
77698878 | 1067 | msg.msg_flags |= MSG_MORE; |
3f2304f8 | 1068 | |
6af7331a | 1069 | if (!sendpages_ok(page, len, offset)) |
c97d3fb9 | 1070 | msg.msg_flags &= ~MSG_SPLICE_PAGES; |
77698878 DH |
1071 | |
1072 | bvec_set_page(&bvec, page, len, offset); | |
1073 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1074 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1075 | if (ret <= 0) |
1076 | return ret; | |
1077 | ||
3f2304f8 SG |
1078 | if (queue->data_digest) |
1079 | nvme_tcp_ddgst_update(queue->snd_hash, page, | |
1080 | offset, ret); | |
1081 | ||
e371af03 SG |
1082 | /* |
1083 | * update the request iterator except for the last payload send | |
1084 | * in the request where we don't want to modify it as we may | |
1085 | * compete with the RX path completing the request. | |
1086 | */ | |
25e1f67e | 1087 | if (req_data_sent + ret < req_data_len) |
e371af03 SG |
1088 | nvme_tcp_advance_req(req, ret); |
1089 | ||
1090 | /* fully successful last send in current PDU */ | |
3f2304f8 SG |
1091 | if (last && ret == len) { |
1092 | if (queue->data_digest) { | |
1093 | nvme_tcp_ddgst_final(queue->snd_hash, | |
1094 | &req->ddgst); | |
1095 | req->state = NVME_TCP_SEND_DDGST; | |
1096 | req->offset = 0; | |
1097 | } else { | |
c2700d28 VP |
1098 | if (h2cdata_left) |
1099 | nvme_tcp_setup_h2c_data_pdu(req); | |
1100 | else | |
1101 | nvme_tcp_done_send_req(queue); | |
3f2304f8 SG |
1102 | } |
1103 | return 1; | |
1104 | } | |
1105 | } | |
1106 | return -EAGAIN; | |
1107 | } | |
1108 | ||
1109 | static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) | |
1110 | { | |
1111 | struct nvme_tcp_queue *queue = req->queue; | |
a3406352 | 1112 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
77698878 DH |
1113 | struct bio_vec bvec; |
1114 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; | |
3f2304f8 | 1115 | bool inline_data = nvme_tcp_has_inline_data(req); |
3f2304f8 SG |
1116 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
1117 | int len = sizeof(*pdu) + hdgst - req->offset; | |
1118 | int ret; | |
1119 | ||
122e5b9f | 1120 | if (inline_data || nvme_tcp_queue_more(queue)) |
77698878 | 1121 | msg.msg_flags |= MSG_MORE; |
5bb052d7 | 1122 | else |
77698878 | 1123 | msg.msg_flags |= MSG_EOR; |
5bb052d7 | 1124 | |
3f2304f8 SG |
1125 | if (queue->hdr_digest && !req->offset) |
1126 | nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
1127 | ||
77698878 DH |
1128 | bvec_set_virt(&bvec, (void *)pdu + req->offset, len); |
1129 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1130 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1131 | if (unlikely(ret <= 0)) |
1132 | return ret; | |
1133 | ||
1134 | len -= ret; | |
1135 | if (!len) { | |
1136 | if (inline_data) { | |
1137 | req->state = NVME_TCP_SEND_DATA; | |
1138 | if (queue->data_digest) | |
1139 | crypto_ahash_init(queue->snd_hash); | |
3f2304f8 SG |
1140 | } else { |
1141 | nvme_tcp_done_send_req(queue); | |
1142 | } | |
1143 | return 1; | |
1144 | } | |
1145 | req->offset += ret; | |
1146 | ||
1147 | return -EAGAIN; | |
1148 | } | |
1149 | ||
1150 | static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) | |
1151 | { | |
1152 | struct nvme_tcp_queue *queue = req->queue; | |
a3406352 | 1153 | struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req); |
77698878 DH |
1154 | struct bio_vec bvec; |
1155 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, }; | |
3f2304f8 SG |
1156 | u8 hdgst = nvme_tcp_hdgst_len(queue); |
1157 | int len = sizeof(*pdu) - req->offset + hdgst; | |
1158 | int ret; | |
1159 | ||
1160 | if (queue->hdr_digest && !req->offset) | |
1161 | nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
1162 | ||
c2700d28 | 1163 | if (!req->h2cdata_left) |
77698878 DH |
1164 | msg.msg_flags |= MSG_SPLICE_PAGES; |
1165 | ||
1166 | bvec_set_virt(&bvec, (void *)pdu + req->offset, len); | |
1167 | iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); | |
1168 | ret = sock_sendmsg(queue->sock, &msg); | |
3f2304f8 SG |
1169 | if (unlikely(ret <= 0)) |
1170 | return ret; | |
1171 | ||
1172 | len -= ret; | |
1173 | if (!len) { | |
1174 | req->state = NVME_TCP_SEND_DATA; | |
1175 | if (queue->data_digest) | |
1176 | crypto_ahash_init(queue->snd_hash); | |
3f2304f8 SG |
1177 | return 1; |
1178 | } | |
1179 | req->offset += ret; | |
1180 | ||
1181 | return -EAGAIN; | |
1182 | } | |
1183 | ||
1184 | static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) | |
1185 | { | |
1186 | struct nvme_tcp_queue *queue = req->queue; | |
ce7723e9 | 1187 | size_t offset = req->offset; |
c2700d28 | 1188 | u32 h2cdata_left = req->h2cdata_left; |
3f2304f8 | 1189 | int ret; |
122e5b9f | 1190 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; |
3f2304f8 | 1191 | struct kvec iov = { |
d89b9f3b | 1192 | .iov_base = (u8 *)&req->ddgst + req->offset, |
3f2304f8 SG |
1193 | .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset |
1194 | }; | |
1195 | ||
122e5b9f SG |
1196 | if (nvme_tcp_queue_more(queue)) |
1197 | msg.msg_flags |= MSG_MORE; | |
1198 | else | |
1199 | msg.msg_flags |= MSG_EOR; | |
1200 | ||
3f2304f8 SG |
1201 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); |
1202 | if (unlikely(ret <= 0)) | |
1203 | return ret; | |
1204 | ||
ce7723e9 | 1205 | if (offset + ret == NVME_TCP_DIGEST_LENGTH) { |
c2700d28 VP |
1206 | if (h2cdata_left) |
1207 | nvme_tcp_setup_h2c_data_pdu(req); | |
1208 | else | |
1209 | nvme_tcp_done_send_req(queue); | |
3f2304f8 SG |
1210 | return 1; |
1211 | } | |
1212 | ||
1213 | req->offset += ret; | |
1214 | return -EAGAIN; | |
1215 | } | |
1216 | ||
1217 | static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) | |
1218 | { | |
1219 | struct nvme_tcp_request *req; | |
83e1226b | 1220 | unsigned int noreclaim_flag; |
3f2304f8 SG |
1221 | int ret = 1; |
1222 | ||
1223 | if (!queue->request) { | |
1224 | queue->request = nvme_tcp_fetch_request(queue); | |
1225 | if (!queue->request) | |
1226 | return 0; | |
1227 | } | |
1228 | req = queue->request; | |
1229 | ||
83e1226b | 1230 | noreclaim_flag = memalloc_noreclaim_save(); |
3f2304f8 SG |
1231 | if (req->state == NVME_TCP_SEND_CMD_PDU) { |
1232 | ret = nvme_tcp_try_send_cmd_pdu(req); | |
1233 | if (ret <= 0) | |
1234 | goto done; | |
1235 | if (!nvme_tcp_has_inline_data(req)) | |
83e1226b | 1236 | goto out; |
3f2304f8 SG |
1237 | } |
1238 | ||
1239 | if (req->state == NVME_TCP_SEND_H2C_PDU) { | |
1240 | ret = nvme_tcp_try_send_data_pdu(req); | |
1241 | if (ret <= 0) | |
1242 | goto done; | |
1243 | } | |
1244 | ||
1245 | if (req->state == NVME_TCP_SEND_DATA) { | |
1246 | ret = nvme_tcp_try_send_data(req); | |
1247 | if (ret <= 0) | |
1248 | goto done; | |
1249 | } | |
1250 | ||
1251 | if (req->state == NVME_TCP_SEND_DDGST) | |
1252 | ret = nvme_tcp_try_send_ddgst(req); | |
1253 | done: | |
5ff4e112 | 1254 | if (ret == -EAGAIN) { |
3f2304f8 | 1255 | ret = 0; |
5ff4e112 SG |
1256 | } else if (ret < 0) { |
1257 | dev_err(queue->ctrl->ctrl.device, | |
1258 | "failed to send request %d\n", ret); | |
41d07df7 | 1259 | nvme_tcp_fail_request(queue->request); |
5ff4e112 SG |
1260 | nvme_tcp_done_send_req(queue); |
1261 | } | |
83e1226b SG |
1262 | out: |
1263 | memalloc_noreclaim_restore(noreclaim_flag); | |
3f2304f8 SG |
1264 | return ret; |
1265 | } | |
1266 | ||
1267 | static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue) | |
1268 | { | |
10407ec9 PBT |
1269 | struct socket *sock = queue->sock; |
1270 | struct sock *sk = sock->sk; | |
3f2304f8 SG |
1271 | read_descriptor_t rd_desc; |
1272 | int consumed; | |
1273 | ||
1274 | rd_desc.arg.data = queue; | |
1275 | rd_desc.count = 1; | |
1276 | lock_sock(sk); | |
1a9460ce | 1277 | queue->nr_cqe = 0; |
10407ec9 | 1278 | consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb); |
3f2304f8 SG |
1279 | release_sock(sk); |
1280 | return consumed; | |
1281 | } | |
1282 | ||
1283 | static void nvme_tcp_io_work(struct work_struct *w) | |
1284 | { | |
1285 | struct nvme_tcp_queue *queue = | |
1286 | container_of(w, struct nvme_tcp_queue, io_work); | |
ddef2957 | 1287 | unsigned long deadline = jiffies + msecs_to_jiffies(1); |
3f2304f8 SG |
1288 | |
1289 | do { | |
1290 | bool pending = false; | |
1291 | int result; | |
1292 | ||
db5ad6b7 SG |
1293 | if (mutex_trylock(&queue->send_mutex)) { |
1294 | result = nvme_tcp_try_send(queue); | |
1295 | mutex_unlock(&queue->send_mutex); | |
1296 | if (result > 0) | |
1297 | pending = true; | |
1298 | else if (unlikely(result < 0)) | |
1299 | break; | |
70f437fb | 1300 | } |
3f2304f8 SG |
1301 | |
1302 | result = nvme_tcp_try_recv(queue); | |
1303 | if (result > 0) | |
1304 | pending = true; | |
761ad26c | 1305 | else if (unlikely(result < 0)) |
39d06079 | 1306 | return; |
3f2304f8 | 1307 | |
160f3549 | 1308 | if (!pending || !queue->rd_enabled) |
3f2304f8 SG |
1309 | return; |
1310 | ||
ddef2957 | 1311 | } while (!time_after(jiffies, deadline)); /* quota is exhausted */ |
3f2304f8 SG |
1312 | |
1313 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
1314 | } | |
1315 | ||
1316 | static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue) | |
1317 | { | |
1318 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); | |
1319 | ||
1320 | ahash_request_free(queue->rcv_hash); | |
1321 | ahash_request_free(queue->snd_hash); | |
1322 | crypto_free_ahash(tfm); | |
1323 | } | |
1324 | ||
1325 | static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue) | |
1326 | { | |
1327 | struct crypto_ahash *tfm; | |
1328 | ||
1329 | tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); | |
1330 | if (IS_ERR(tfm)) | |
1331 | return PTR_ERR(tfm); | |
1332 | ||
1333 | queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
1334 | if (!queue->snd_hash) | |
1335 | goto free_tfm; | |
1336 | ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); | |
1337 | ||
1338 | queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
1339 | if (!queue->rcv_hash) | |
1340 | goto free_snd_hash; | |
1341 | ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); | |
1342 | ||
1343 | return 0; | |
1344 | free_snd_hash: | |
1345 | ahash_request_free(queue->snd_hash); | |
1346 | free_tfm: | |
1347 | crypto_free_ahash(tfm); | |
1348 | return -ENOMEM; | |
1349 | } | |
1350 | ||
1351 | static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) | |
1352 | { | |
1353 | struct nvme_tcp_request *async = &ctrl->async_req; | |
1354 | ||
1355 | page_frag_free(async->pdu); | |
1356 | } | |
1357 | ||
1358 | static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) | |
1359 | { | |
1360 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; | |
1361 | struct nvme_tcp_request *async = &ctrl->async_req; | |
1362 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
1363 | ||
1364 | async->pdu = page_frag_alloc(&queue->pf_cache, | |
1365 | sizeof(struct nvme_tcp_cmd_pdu) + hdgst, | |
1366 | GFP_KERNEL | __GFP_ZERO); | |
1367 | if (!async->pdu) | |
1368 | return -ENOMEM; | |
1369 | ||
1370 | async->queue = &ctrl->queues[0]; | |
1371 | return 0; | |
1372 | } | |
1373 | ||
1374 | static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) | |
1375 | { | |
1376 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1377 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
83e1226b | 1378 | unsigned int noreclaim_flag; |
3f2304f8 SG |
1379 | |
1380 | if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) | |
1381 | return; | |
1382 | ||
1383 | if (queue->hdr_digest || queue->data_digest) | |
1384 | nvme_tcp_free_crypto(queue); | |
1385 | ||
a0727489 | 1386 | page_frag_cache_drain(&queue->pf_cache); |
83e1226b SG |
1387 | |
1388 | noreclaim_flag = memalloc_noreclaim_save(); | |
e40d4eb8 HR |
1389 | /* ->sock will be released by fput() */ |
1390 | fput(queue->sock->file); | |
1391 | queue->sock = NULL; | |
83e1226b SG |
1392 | memalloc_noreclaim_restore(noreclaim_flag); |
1393 | ||
3f2304f8 | 1394 | kfree(queue->pdu); |
d48f92cd | 1395 | mutex_destroy(&queue->send_mutex); |
9ebbfe49 | 1396 | mutex_destroy(&queue->queue_lock); |
3f2304f8 SG |
1397 | } |
1398 | ||
1399 | static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) | |
1400 | { | |
1401 | struct nvme_tcp_icreq_pdu *icreq; | |
1402 | struct nvme_tcp_icresp_pdu *icresp; | |
2837966a HR |
1403 | char cbuf[CMSG_LEN(sizeof(char))] = {}; |
1404 | u8 ctype; | |
3f2304f8 SG |
1405 | struct msghdr msg = {}; |
1406 | struct kvec iov; | |
1407 | bool ctrl_hdgst, ctrl_ddgst; | |
c2700d28 | 1408 | u32 maxh2cdata; |
3f2304f8 SG |
1409 | int ret; |
1410 | ||
1411 | icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); | |
1412 | if (!icreq) | |
1413 | return -ENOMEM; | |
1414 | ||
1415 | icresp = kzalloc(sizeof(*icresp), GFP_KERNEL); | |
1416 | if (!icresp) { | |
1417 | ret = -ENOMEM; | |
1418 | goto free_icreq; | |
1419 | } | |
1420 | ||
1421 | icreq->hdr.type = nvme_tcp_icreq; | |
1422 | icreq->hdr.hlen = sizeof(*icreq); | |
1423 | icreq->hdr.pdo = 0; | |
1424 | icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen); | |
1425 | icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); | |
1426 | icreq->maxr2t = 0; /* single inflight r2t supported */ | |
1427 | icreq->hpda = 0; /* no alignment constraint */ | |
1428 | if (queue->hdr_digest) | |
1429 | icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE; | |
1430 | if (queue->data_digest) | |
1431 | icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE; | |
1432 | ||
1433 | iov.iov_base = icreq; | |
1434 | iov.iov_len = sizeof(*icreq); | |
1435 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
e4f4aabb HR |
1436 | if (ret < 0) { |
1437 | pr_warn("queue %d: failed to send icreq, error %d\n", | |
1438 | nvme_tcp_queue_id(queue), ret); | |
3f2304f8 | 1439 | goto free_icresp; |
e4f4aabb | 1440 | } |
3f2304f8 SG |
1441 | |
1442 | memset(&msg, 0, sizeof(msg)); | |
1443 | iov.iov_base = icresp; | |
1444 | iov.iov_len = sizeof(*icresp); | |
36389576 | 1445 | if (nvme_tcp_queue_tls(queue)) { |
2837966a HR |
1446 | msg.msg_control = cbuf; |
1447 | msg.msg_controllen = sizeof(cbuf); | |
1448 | } | |
3f2304f8 SG |
1449 | ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, |
1450 | iov.iov_len, msg.msg_flags); | |
e4f4aabb HR |
1451 | if (ret < 0) { |
1452 | pr_warn("queue %d: failed to receive icresp, error %d\n", | |
1453 | nvme_tcp_queue_id(queue), ret); | |
3f2304f8 | 1454 | goto free_icresp; |
e4f4aabb | 1455 | } |
0e32fdd7 | 1456 | ret = -ENOTCONN; |
36389576 | 1457 | if (nvme_tcp_queue_tls(queue)) { |
2837966a HR |
1458 | ctype = tls_get_record_type(queue->sock->sk, |
1459 | (struct cmsghdr *)cbuf); | |
1460 | if (ctype != TLS_RECORD_TYPE_DATA) { | |
1461 | pr_err("queue %d: unhandled TLS record %d\n", | |
1462 | nvme_tcp_queue_id(queue), ctype); | |
0e32fdd7 | 1463 | goto free_icresp; |
2837966a HR |
1464 | } |
1465 | } | |
3f2304f8 SG |
1466 | ret = -EINVAL; |
1467 | if (icresp->hdr.type != nvme_tcp_icresp) { | |
1468 | pr_err("queue %d: bad type returned %d\n", | |
1469 | nvme_tcp_queue_id(queue), icresp->hdr.type); | |
1470 | goto free_icresp; | |
1471 | } | |
1472 | ||
1473 | if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) { | |
1474 | pr_err("queue %d: bad pdu length returned %d\n", | |
1475 | nvme_tcp_queue_id(queue), icresp->hdr.plen); | |
1476 | goto free_icresp; | |
1477 | } | |
1478 | ||
1479 | if (icresp->pfv != NVME_TCP_PFV_1_0) { | |
1480 | pr_err("queue %d: bad pfv returned %d\n", | |
1481 | nvme_tcp_queue_id(queue), icresp->pfv); | |
1482 | goto free_icresp; | |
1483 | } | |
1484 | ||
1485 | ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE); | |
1486 | if ((queue->data_digest && !ctrl_ddgst) || | |
1487 | (!queue->data_digest && ctrl_ddgst)) { | |
1488 | pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n", | |
1489 | nvme_tcp_queue_id(queue), | |
1490 | queue->data_digest ? "enabled" : "disabled", | |
1491 | ctrl_ddgst ? "enabled" : "disabled"); | |
1492 | goto free_icresp; | |
1493 | } | |
1494 | ||
1495 | ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE); | |
1496 | if ((queue->hdr_digest && !ctrl_hdgst) || | |
1497 | (!queue->hdr_digest && ctrl_hdgst)) { | |
1498 | pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n", | |
1499 | nvme_tcp_queue_id(queue), | |
1500 | queue->hdr_digest ? "enabled" : "disabled", | |
1501 | ctrl_hdgst ? "enabled" : "disabled"); | |
1502 | goto free_icresp; | |
1503 | } | |
1504 | ||
1505 | if (icresp->cpda != 0) { | |
1506 | pr_err("queue %d: unsupported cpda returned %d\n", | |
1507 | nvme_tcp_queue_id(queue), icresp->cpda); | |
1508 | goto free_icresp; | |
1509 | } | |
1510 | ||
c2700d28 VP |
1511 | maxh2cdata = le32_to_cpu(icresp->maxdata); |
1512 | if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { | |
1513 | pr_err("queue %d: invalid maxh2cdata returned %u\n", | |
1514 | nvme_tcp_queue_id(queue), maxh2cdata); | |
1515 | goto free_icresp; | |
1516 | } | |
1517 | queue->maxh2cdata = maxh2cdata; | |
1518 | ||
3f2304f8 SG |
1519 | ret = 0; |
1520 | free_icresp: | |
1521 | kfree(icresp); | |
1522 | free_icreq: | |
1523 | kfree(icreq); | |
1524 | return ret; | |
1525 | } | |
1526 | ||
40510a63 SG |
1527 | static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) |
1528 | { | |
1529 | return nvme_tcp_queue_id(queue) == 0; | |
1530 | } | |
1531 | ||
1532 | static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) | |
1533 | { | |
1534 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1535 | int qid = nvme_tcp_queue_id(queue); | |
1536 | ||
1537 | return !nvme_tcp_admin_queue(queue) && | |
1538 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; | |
1539 | } | |
1540 | ||
1541 | static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) | |
1542 | { | |
1543 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1544 | int qid = nvme_tcp_queue_id(queue); | |
1545 | ||
1546 | return !nvme_tcp_admin_queue(queue) && | |
1547 | !nvme_tcp_default_queue(queue) && | |
1548 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + | |
1549 | ctrl->io_queues[HCTX_TYPE_READ]; | |
1550 | } | |
1551 | ||
1552 | static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) | |
1553 | { | |
1554 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1555 | int qid = nvme_tcp_queue_id(queue); | |
1556 | ||
1557 | return !nvme_tcp_admin_queue(queue) && | |
1558 | !nvme_tcp_default_queue(queue) && | |
1559 | !nvme_tcp_read_queue(queue) && | |
1560 | qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + | |
1561 | ctrl->io_queues[HCTX_TYPE_READ] + | |
1562 | ctrl->io_queues[HCTX_TYPE_POLL]; | |
1563 | } | |
1564 | ||
1565 | static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) | |
1566 | { | |
1567 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1568 | int qid = nvme_tcp_queue_id(queue); | |
1569 | int n = 0; | |
1570 | ||
1571 | if (nvme_tcp_default_queue(queue)) | |
1572 | n = qid - 1; | |
1573 | else if (nvme_tcp_read_queue(queue)) | |
1574 | n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; | |
1575 | else if (nvme_tcp_poll_queue(queue)) | |
1576 | n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - | |
1577 | ctrl->io_queues[HCTX_TYPE_READ] - 1; | |
0c29f9fa LF |
1578 | if (wq_unbound) |
1579 | queue->io_cpu = WORK_CPU_UNBOUND; | |
1580 | else | |
1581 | queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); | |
40510a63 SG |
1582 | } |
1583 | ||
be8e82ca HR |
1584 | static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) |
1585 | { | |
1586 | struct nvme_tcp_queue *queue = data; | |
1587 | struct nvme_tcp_ctrl *ctrl = queue->ctrl; | |
1588 | int qid = nvme_tcp_queue_id(queue); | |
1589 | struct key *tls_key; | |
1590 | ||
1591 | dev_dbg(ctrl->ctrl.device, "queue %d: TLS handshake done, key %x, status %d\n", | |
1592 | qid, pskid, status); | |
1593 | ||
1594 | if (status) { | |
1595 | queue->tls_err = -status; | |
1596 | goto out_complete; | |
1597 | } | |
1598 | ||
5bc46b49 | 1599 | tls_key = nvme_tls_key_lookup(pskid); |
be8e82ca HR |
1600 | if (IS_ERR(tls_key)) { |
1601 | dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n", | |
1602 | qid, pskid); | |
1603 | queue->tls_err = -ENOKEY; | |
1604 | } else { | |
36389576 HR |
1605 | queue->tls_enabled = true; |
1606 | if (qid == 0) | |
1607 | ctrl->ctrl.tls_pskid = key_serial(tls_key); | |
1608 | key_put(tls_key); | |
be8e82ca HR |
1609 | queue->tls_err = 0; |
1610 | } | |
1611 | ||
1612 | out_complete: | |
1613 | complete(&queue->tls_complete); | |
1614 | } | |
1615 | ||
1616 | static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl, | |
1617 | struct nvme_tcp_queue *queue, | |
1618 | key_serial_t pskid) | |
1619 | { | |
1620 | int qid = nvme_tcp_queue_id(queue); | |
1621 | int ret; | |
1622 | struct tls_handshake_args args; | |
1623 | unsigned long tmo = tls_handshake_timeout * HZ; | |
1624 | key_serial_t keyring = nvme_keyring_id(); | |
1625 | ||
1626 | dev_dbg(nctrl->device, "queue %d: start TLS with key %x\n", | |
1627 | qid, pskid); | |
1628 | memset(&args, 0, sizeof(args)); | |
1629 | args.ta_sock = queue->sock; | |
1630 | args.ta_done = nvme_tcp_tls_done; | |
1631 | args.ta_data = queue; | |
1632 | args.ta_my_peerids[0] = pskid; | |
1633 | args.ta_num_peerids = 1; | |
adf22c52 HR |
1634 | if (nctrl->opts->keyring) |
1635 | keyring = key_serial(nctrl->opts->keyring); | |
be8e82ca HR |
1636 | args.ta_keyring = keyring; |
1637 | args.ta_timeout_ms = tls_handshake_timeout * 1000; | |
1638 | queue->tls_err = -EOPNOTSUPP; | |
1639 | init_completion(&queue->tls_complete); | |
1640 | ret = tls_client_hello_psk(&args, GFP_KERNEL); | |
1641 | if (ret) { | |
1642 | dev_err(nctrl->device, "queue %d: failed to start TLS: %d\n", | |
1643 | qid, ret); | |
1644 | return ret; | |
1645 | } | |
1646 | ret = wait_for_completion_interruptible_timeout(&queue->tls_complete, tmo); | |
1647 | if (ret <= 0) { | |
1648 | if (ret == 0) | |
1649 | ret = -ETIMEDOUT; | |
1650 | ||
1651 | dev_err(nctrl->device, | |
1652 | "queue %d: TLS handshake failed, error %d\n", | |
1653 | qid, ret); | |
1654 | tls_handshake_cancel(queue->sock->sk); | |
1655 | } else { | |
1656 | dev_dbg(nctrl->device, | |
1657 | "queue %d: TLS handshake complete, error %d\n", | |
1658 | qid, queue->tls_err); | |
1659 | ret = queue->tls_err; | |
1660 | } | |
1661 | return ret; | |
1662 | } | |
be8e82ca HR |
1663 | |
1664 | static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, | |
1665 | key_serial_t pskid) | |
3f2304f8 SG |
1666 | { |
1667 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1668 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
6ebf71ba | 1669 | int ret, rcv_pdu_size; |
e40d4eb8 | 1670 | struct file *sock_file; |
3f2304f8 | 1671 | |
9ebbfe49 | 1672 | mutex_init(&queue->queue_lock); |
3f2304f8 | 1673 | queue->ctrl = ctrl; |
15ec928a | 1674 | init_llist_head(&queue->req_list); |
3f2304f8 | 1675 | INIT_LIST_HEAD(&queue->send_list); |
db5ad6b7 | 1676 | mutex_init(&queue->send_mutex); |
3f2304f8 | 1677 | INIT_WORK(&queue->io_work, nvme_tcp_io_work); |
3f2304f8 SG |
1678 | |
1679 | if (qid > 0) | |
9924b030 | 1680 | queue->cmnd_capsule_len = nctrl->ioccsz * 16; |
3f2304f8 SG |
1681 | else |
1682 | queue->cmnd_capsule_len = sizeof(struct nvme_command) + | |
1683 | NVME_TCP_ADMIN_CCSZ; | |
1684 | ||
1685 | ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, | |
1686 | IPPROTO_TCP, &queue->sock); | |
1687 | if (ret) { | |
9924b030 | 1688 | dev_err(nctrl->device, |
3f2304f8 | 1689 | "failed to create socket: %d\n", ret); |
9ebbfe49 | 1690 | goto err_destroy_mutex; |
3f2304f8 SG |
1691 | } |
1692 | ||
e40d4eb8 HR |
1693 | sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL); |
1694 | if (IS_ERR(sock_file)) { | |
1695 | ret = PTR_ERR(sock_file); | |
1696 | goto err_destroy_mutex; | |
1697 | } | |
841aee4d CL |
1698 | nvme_tcp_reclassify_socket(queue->sock); |
1699 | ||
3f2304f8 | 1700 | /* Single syn retry */ |
557eadfc | 1701 | tcp_sock_set_syncnt(queue->sock->sk, 1); |
3f2304f8 SG |
1702 | |
1703 | /* Set TCP no delay */ | |
12abc5ee | 1704 | tcp_sock_set_nodelay(queue->sock->sk); |
3f2304f8 SG |
1705 | |
1706 | /* | |
1707 | * Cleanup whatever is sitting in the TCP transmit queue on socket | |
1708 | * close. This is done to prevent stale data from being sent should | |
1709 | * the network connection be restored before TCP times out. | |
1710 | */ | |
c433594c | 1711 | sock_no_linger(queue->sock->sk); |
3f2304f8 | 1712 | |
6e434967 CH |
1713 | if (so_priority > 0) |
1714 | sock_set_priority(queue->sock->sk, so_priority); | |
9912ade3 | 1715 | |
bb13985d | 1716 | /* Set socket type of service */ |
6ebf71ba CH |
1717 | if (nctrl->opts->tos >= 0) |
1718 | ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos); | |
bb13985d | 1719 | |
adc99fd3 SG |
1720 | /* Set 10 seconds timeout for icresp recvmsg */ |
1721 | queue->sock->sk->sk_rcvtimeo = 10 * HZ; | |
1722 | ||
3f2304f8 | 1723 | queue->sock->sk->sk_allocation = GFP_ATOMIC; |
98123866 | 1724 | queue->sock->sk->sk_use_task_frag = false; |
40510a63 | 1725 | nvme_tcp_set_queue_io_cpu(queue); |
3f2304f8 SG |
1726 | queue->request = NULL; |
1727 | queue->data_remaining = 0; | |
1728 | queue->ddgst_remaining = 0; | |
1729 | queue->pdu_remaining = 0; | |
1730 | queue->pdu_offset = 0; | |
1731 | sk_set_memalloc(queue->sock->sk); | |
1732 | ||
9924b030 | 1733 | if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) { |
3f2304f8 SG |
1734 | ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr, |
1735 | sizeof(ctrl->src_addr)); | |
1736 | if (ret) { | |
9924b030 | 1737 | dev_err(nctrl->device, |
3f2304f8 SG |
1738 | "failed to bind queue %d socket %d\n", |
1739 | qid, ret); | |
1740 | goto err_sock; | |
1741 | } | |
1742 | } | |
1743 | ||
3ede8f72 MB |
1744 | if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) { |
1745 | char *iface = nctrl->opts->host_iface; | |
1746 | sockptr_t optval = KERNEL_SOCKPTR(iface); | |
1747 | ||
1748 | ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE, | |
1749 | optval, strlen(iface)); | |
1750 | if (ret) { | |
1751 | dev_err(nctrl->device, | |
1752 | "failed to bind to interface %s queue %d err %d\n", | |
1753 | iface, qid, ret); | |
1754 | goto err_sock; | |
1755 | } | |
1756 | } | |
1757 | ||
3f2304f8 SG |
1758 | queue->hdr_digest = nctrl->opts->hdr_digest; |
1759 | queue->data_digest = nctrl->opts->data_digest; | |
1760 | if (queue->hdr_digest || queue->data_digest) { | |
1761 | ret = nvme_tcp_alloc_crypto(queue); | |
1762 | if (ret) { | |
9924b030 | 1763 | dev_err(nctrl->device, |
3f2304f8 SG |
1764 | "failed to allocate queue %d crypto\n", qid); |
1765 | goto err_sock; | |
1766 | } | |
1767 | } | |
1768 | ||
1769 | rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + | |
1770 | nvme_tcp_hdgst_len(queue); | |
1771 | queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); | |
1772 | if (!queue->pdu) { | |
1773 | ret = -ENOMEM; | |
1774 | goto err_crypto; | |
1775 | } | |
1776 | ||
9924b030 | 1777 | dev_dbg(nctrl->device, "connecting queue %d\n", |
3f2304f8 SG |
1778 | nvme_tcp_queue_id(queue)); |
1779 | ||
1780 | ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr, | |
1781 | sizeof(ctrl->addr), 0); | |
1782 | if (ret) { | |
9924b030 | 1783 | dev_err(nctrl->device, |
3f2304f8 SG |
1784 | "failed to connect socket: %d\n", ret); |
1785 | goto err_rcv_pdu; | |
1786 | } | |
1787 | ||
be8e82ca | 1788 | /* If PSKs are configured try to start TLS */ |
36389576 | 1789 | if (nvme_tcp_tls_configured(nctrl) && pskid) { |
be8e82ca HR |
1790 | ret = nvme_tcp_start_tls(nctrl, queue, pskid); |
1791 | if (ret) | |
1792 | goto err_init_connect; | |
1793 | } | |
1794 | ||
3f2304f8 SG |
1795 | ret = nvme_tcp_init_connection(queue); |
1796 | if (ret) | |
1797 | goto err_init_connect; | |
1798 | ||
3f2304f8 | 1799 | set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); |
3f2304f8 SG |
1800 | |
1801 | return 0; | |
1802 | ||
1803 | err_init_connect: | |
1804 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1805 | err_rcv_pdu: | |
1806 | kfree(queue->pdu); | |
1807 | err_crypto: | |
1808 | if (queue->hdr_digest || queue->data_digest) | |
1809 | nvme_tcp_free_crypto(queue); | |
1810 | err_sock: | |
e40d4eb8 HR |
1811 | /* ->sock will be released by fput() */ |
1812 | fput(queue->sock->file); | |
3f2304f8 | 1813 | queue->sock = NULL; |
9ebbfe49 | 1814 | err_destroy_mutex: |
d48f92cd | 1815 | mutex_destroy(&queue->send_mutex); |
9ebbfe49 | 1816 | mutex_destroy(&queue->queue_lock); |
3f2304f8 SG |
1817 | return ret; |
1818 | } | |
1819 | ||
88eaba80 | 1820 | static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) |
3f2304f8 SG |
1821 | { |
1822 | struct socket *sock = queue->sock; | |
1823 | ||
1824 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1825 | sock->sk->sk_user_data = NULL; | |
1826 | sock->sk->sk_data_ready = queue->data_ready; | |
1827 | sock->sk->sk_state_change = queue->state_change; | |
1828 | sock->sk->sk_write_space = queue->write_space; | |
1829 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1830 | } | |
1831 | ||
1832 | static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) | |
1833 | { | |
1834 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
88eaba80 | 1835 | nvme_tcp_restore_sock_ops(queue); |
3f2304f8 SG |
1836 | cancel_work_sync(&queue->io_work); |
1837 | } | |
1838 | ||
1839 | static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) | |
1840 | { | |
1841 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
1842 | struct nvme_tcp_queue *queue = &ctrl->queues[qid]; | |
1843 | ||
2bff487f ML |
1844 | if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) |
1845 | return; | |
1846 | ||
9ebbfe49 CL |
1847 | mutex_lock(&queue->queue_lock); |
1848 | if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) | |
1849 | __nvme_tcp_stop_queue(queue); | |
36389576 HR |
1850 | /* Stopping the queue will disable TLS */ |
1851 | queue->tls_enabled = false; | |
9ebbfe49 | 1852 | mutex_unlock(&queue->queue_lock); |
3f2304f8 SG |
1853 | } |
1854 | ||
88eaba80 SG |
1855 | static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) |
1856 | { | |
1857 | write_lock_bh(&queue->sock->sk->sk_callback_lock); | |
1858 | queue->sock->sk->sk_user_data = queue; | |
1859 | queue->state_change = queue->sock->sk->sk_state_change; | |
1860 | queue->data_ready = queue->sock->sk->sk_data_ready; | |
1861 | queue->write_space = queue->sock->sk->sk_write_space; | |
1862 | queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; | |
1863 | queue->sock->sk->sk_state_change = nvme_tcp_state_change; | |
1864 | queue->sock->sk->sk_write_space = nvme_tcp_write_space; | |
1865 | #ifdef CONFIG_NET_RX_BUSY_POLL | |
1866 | queue->sock->sk->sk_ll_usec = 1; | |
1867 | #endif | |
1868 | write_unlock_bh(&queue->sock->sk->sk_callback_lock); | |
1869 | } | |
1870 | ||
3f2304f8 SG |
1871 | static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) |
1872 | { | |
1873 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
88eaba80 | 1874 | struct nvme_tcp_queue *queue = &ctrl->queues[idx]; |
3f2304f8 SG |
1875 | int ret; |
1876 | ||
88eaba80 SG |
1877 | queue->rd_enabled = true; |
1878 | nvme_tcp_init_recv_ctx(queue); | |
1879 | nvme_tcp_setup_sock_ops(queue); | |
1880 | ||
3f2304f8 | 1881 | if (idx) |
be42a33b | 1882 | ret = nvmf_connect_io_queue(nctrl, idx); |
3f2304f8 SG |
1883 | else |
1884 | ret = nvmf_connect_admin_queue(nctrl); | |
1885 | ||
1886 | if (!ret) { | |
88eaba80 | 1887 | set_bit(NVME_TCP_Q_LIVE, &queue->flags); |
3f2304f8 | 1888 | } else { |
88eaba80 SG |
1889 | if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) |
1890 | __nvme_tcp_stop_queue(queue); | |
3f2304f8 SG |
1891 | dev_err(nctrl->device, |
1892 | "failed to connect queue: %d ret=%d\n", idx, ret); | |
1893 | } | |
1894 | return ret; | |
1895 | } | |
1896 | ||
3f2304f8 SG |
1897 | static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) |
1898 | { | |
1899 | if (to_tcp_ctrl(ctrl)->async_req.pdu) { | |
ceb1e087 | 1900 | cancel_work_sync(&ctrl->async_event_work); |
3f2304f8 SG |
1901 | nvme_tcp_free_async_req(to_tcp_ctrl(ctrl)); |
1902 | to_tcp_ctrl(ctrl)->async_req.pdu = NULL; | |
1903 | } | |
1904 | ||
1905 | nvme_tcp_free_queue(ctrl, 0); | |
1906 | } | |
1907 | ||
1908 | static void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl) | |
1909 | { | |
1910 | int i; | |
1911 | ||
1912 | for (i = 1; i < ctrl->queue_count; i++) | |
1913 | nvme_tcp_free_queue(ctrl, i); | |
1914 | } | |
1915 | ||
1916 | static void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) | |
1917 | { | |
1918 | int i; | |
1919 | ||
1920 | for (i = 1; i < ctrl->queue_count; i++) | |
1921 | nvme_tcp_stop_queue(ctrl, i); | |
1922 | } | |
1923 | ||
09035f86 DW |
1924 | static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, |
1925 | int first, int last) | |
3f2304f8 | 1926 | { |
462b8b2d | 1927 | int i, ret; |
3f2304f8 | 1928 | |
09035f86 | 1929 | for (i = first; i < last; i++) { |
3f2304f8 SG |
1930 | ret = nvme_tcp_start_queue(ctrl, i); |
1931 | if (ret) | |
1932 | goto out_stop_queues; | |
1933 | } | |
1934 | ||
1935 | return 0; | |
1936 | ||
1937 | out_stop_queues: | |
09035f86 | 1938 | for (i--; i >= first; i--) |
3f2304f8 SG |
1939 | nvme_tcp_stop_queue(ctrl, i); |
1940 | return ret; | |
1941 | } | |
1942 | ||
1943 | static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) | |
1944 | { | |
1945 | int ret; | |
be8e82ca HR |
1946 | key_serial_t pskid = 0; |
1947 | ||
36389576 | 1948 | if (nvme_tcp_tls_configured(ctrl)) { |
adf22c52 HR |
1949 | if (ctrl->opts->tls_key) |
1950 | pskid = key_serial(ctrl->opts->tls_key); | |
36389576 | 1951 | else { |
adf22c52 HR |
1952 | pskid = nvme_tls_psk_default(ctrl->opts->keyring, |
1953 | ctrl->opts->host->nqn, | |
1954 | ctrl->opts->subsysnqn); | |
36389576 HR |
1955 | if (!pskid) { |
1956 | dev_err(ctrl->device, "no valid PSK found\n"); | |
1957 | return -ENOKEY; | |
1958 | } | |
be8e82ca HR |
1959 | } |
1960 | } | |
3f2304f8 | 1961 | |
be8e82ca | 1962 | ret = nvme_tcp_alloc_queue(ctrl, 0, pskid); |
3f2304f8 | 1963 | if (ret) |
ef184b88 | 1964 | return ret; |
3f2304f8 SG |
1965 | |
1966 | ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); | |
1967 | if (ret) | |
1968 | goto out_free_queue; | |
1969 | ||
1970 | return 0; | |
1971 | ||
1972 | out_free_queue: | |
1973 | nvme_tcp_free_queue(ctrl, 0); | |
1974 | return ret; | |
1975 | } | |
1976 | ||
efb973b1 | 1977 | static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
3f2304f8 SG |
1978 | { |
1979 | int i, ret; | |
1980 | ||
36389576 | 1981 | if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) { |
be8e82ca HR |
1982 | dev_err(ctrl->device, "no PSK negotiated\n"); |
1983 | return -ENOKEY; | |
1984 | } | |
36389576 | 1985 | |
3f2304f8 | 1986 | for (i = 1; i < ctrl->queue_count; i++) { |
be8e82ca | 1987 | ret = nvme_tcp_alloc_queue(ctrl, i, |
36389576 | 1988 | ctrl->tls_pskid); |
3f2304f8 SG |
1989 | if (ret) |
1990 | goto out_free_queues; | |
1991 | } | |
1992 | ||
1993 | return 0; | |
1994 | ||
1995 | out_free_queues: | |
1996 | for (i--; i >= 1; i--) | |
1997 | nvme_tcp_free_queue(ctrl, i); | |
1998 | ||
1999 | return ret; | |
2000 | } | |
2001 | ||
efb973b1 | 2002 | static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) |
3f2304f8 SG |
2003 | { |
2004 | unsigned int nr_io_queues; | |
2005 | int ret; | |
2006 | ||
a249d306 | 2007 | nr_io_queues = nvmf_nr_io_queues(ctrl->opts); |
3f2304f8 SG |
2008 | ret = nvme_set_queue_count(ctrl, &nr_io_queues); |
2009 | if (ret) | |
2010 | return ret; | |
2011 | ||
664227fd | 2012 | if (nr_io_queues == 0) { |
72f57242 SG |
2013 | dev_err(ctrl->device, |
2014 | "unable to set any I/O queues\n"); | |
2015 | return -ENOMEM; | |
2016 | } | |
3f2304f8 | 2017 | |
664227fd | 2018 | ctrl->queue_count = nr_io_queues + 1; |
3f2304f8 SG |
2019 | dev_info(ctrl->device, |
2020 | "creating %d I/O queues.\n", nr_io_queues); | |
2021 | ||
a249d306 KB |
2022 | nvmf_set_io_queues(ctrl->opts, nr_io_queues, |
2023 | to_tcp_ctrl(ctrl)->io_queues); | |
efb973b1 | 2024 | return __nvme_tcp_alloc_io_queues(ctrl); |
3f2304f8 SG |
2025 | } |
2026 | ||
2027 | static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) | |
2028 | { | |
2029 | nvme_tcp_stop_io_queues(ctrl); | |
de777825 CH |
2030 | if (remove) |
2031 | nvme_remove_io_tag_set(ctrl); | |
3f2304f8 SG |
2032 | nvme_tcp_free_io_queues(ctrl); |
2033 | } | |
2034 | ||
2035 | static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) | |
2036 | { | |
09035f86 | 2037 | int ret, nr_queues; |
3f2304f8 | 2038 | |
efb973b1 | 2039 | ret = nvme_tcp_alloc_io_queues(ctrl); |
3f2304f8 SG |
2040 | if (ret) |
2041 | return ret; | |
2042 | ||
2043 | if (new) { | |
de777825 CH |
2044 | ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, |
2045 | &nvme_tcp_mq_ops, | |
dcef7727 | 2046 | ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, |
de777825 | 2047 | sizeof(struct nvme_tcp_request)); |
2f7a7e5d | 2048 | if (ret) |
3f2304f8 | 2049 | goto out_free_io_queues; |
3f2304f8 SG |
2050 | } |
2051 | ||
09035f86 DW |
2052 | /* |
2053 | * Only start IO queues for which we have allocated the tagset | |
2054 | * and limitted it to the available queues. On reconnects, the | |
2055 | * queue number might have changed. | |
2056 | */ | |
2057 | nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); | |
2058 | ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues); | |
3f2304f8 SG |
2059 | if (ret) |
2060 | goto out_cleanup_connect_q; | |
2061 | ||
2875b0ae | 2062 | if (!new) { |
99dc2640 | 2063 | nvme_start_freeze(ctrl); |
9f27bd70 | 2064 | nvme_unquiesce_io_queues(ctrl); |
e5c01f4f SG |
2065 | if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { |
2066 | /* | |
2067 | * If we timed out waiting for freeze we are likely to | |
2068 | * be stuck. Fail the controller initialization just | |
2069 | * to be safe. | |
2070 | */ | |
2071 | ret = -ENODEV; | |
99dc2640 | 2072 | nvme_unfreeze(ctrl); |
e5c01f4f SG |
2073 | goto out_wait_freeze_timed_out; |
2074 | } | |
2875b0ae SG |
2075 | blk_mq_update_nr_hw_queues(ctrl->tagset, |
2076 | ctrl->queue_count - 1); | |
2077 | nvme_unfreeze(ctrl); | |
2078 | } | |
2079 | ||
09035f86 DW |
2080 | /* |
2081 | * If the number of queues has increased (reconnect case) | |
2082 | * start all new queues now. | |
2083 | */ | |
2084 | ret = nvme_tcp_start_io_queues(ctrl, nr_queues, | |
2085 | ctrl->tagset->nr_hw_queues + 1); | |
2086 | if (ret) | |
2087 | goto out_wait_freeze_timed_out; | |
2088 | ||
3f2304f8 SG |
2089 | return 0; |
2090 | ||
e5c01f4f | 2091 | out_wait_freeze_timed_out: |
9f27bd70 | 2092 | nvme_quiesce_io_queues(ctrl); |
70a99574 | 2093 | nvme_sync_io_queues(ctrl); |
e5c01f4f | 2094 | nvme_tcp_stop_io_queues(ctrl); |
3f2304f8 | 2095 | out_cleanup_connect_q: |
70a99574 | 2096 | nvme_cancel_tagset(ctrl); |
e85037a2 | 2097 | if (new) |
de777825 | 2098 | nvme_remove_io_tag_set(ctrl); |
3f2304f8 SG |
2099 | out_free_io_queues: |
2100 | nvme_tcp_free_io_queues(ctrl); | |
2101 | return ret; | |
2102 | } | |
2103 | ||
2104 | static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) | |
2105 | { | |
2106 | nvme_tcp_stop_queue(ctrl, 0); | |
de777825 CH |
2107 | if (remove) |
2108 | nvme_remove_admin_tag_set(ctrl); | |
3f2304f8 SG |
2109 | nvme_tcp_free_admin_queue(ctrl); |
2110 | } | |
2111 | ||
2112 | static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) | |
2113 | { | |
2114 | int error; | |
2115 | ||
2116 | error = nvme_tcp_alloc_admin_queue(ctrl); | |
2117 | if (error) | |
2118 | return error; | |
2119 | ||
2120 | if (new) { | |
de777825 CH |
2121 | error = nvme_alloc_admin_tag_set(ctrl, |
2122 | &to_tcp_ctrl(ctrl)->admin_tag_set, | |
db45e1a5 | 2123 | &nvme_tcp_admin_mq_ops, |
de777825 | 2124 | sizeof(struct nvme_tcp_request)); |
2f7a7e5d | 2125 | if (error) |
3f2304f8 | 2126 | goto out_free_queue; |
3f2304f8 SG |
2127 | } |
2128 | ||
2129 | error = nvme_tcp_start_queue(ctrl, 0); | |
2130 | if (error) | |
de777825 | 2131 | goto out_cleanup_tagset; |
3f2304f8 | 2132 | |
c0f2f45b | 2133 | error = nvme_enable_ctrl(ctrl); |
3f2304f8 SG |
2134 | if (error) |
2135 | goto out_stop_queue; | |
2136 | ||
9f27bd70 | 2137 | nvme_unquiesce_admin_queue(ctrl); |
e7832cb4 | 2138 | |
94cc781f | 2139 | error = nvme_init_ctrl_finish(ctrl, false); |
3f2304f8 | 2140 | if (error) |
70a99574 | 2141 | goto out_quiesce_queue; |
3f2304f8 SG |
2142 | |
2143 | return 0; | |
2144 | ||
70a99574 | 2145 | out_quiesce_queue: |
9f27bd70 | 2146 | nvme_quiesce_admin_queue(ctrl); |
70a99574 | 2147 | blk_sync_queue(ctrl->admin_q); |
3f2304f8 SG |
2148 | out_stop_queue: |
2149 | nvme_tcp_stop_queue(ctrl, 0); | |
70a99574 | 2150 | nvme_cancel_admin_tagset(ctrl); |
de777825 | 2151 | out_cleanup_tagset: |
e7832cb4 | 2152 | if (new) |
de777825 | 2153 | nvme_remove_admin_tag_set(ctrl); |
3f2304f8 SG |
2154 | out_free_queue: |
2155 | nvme_tcp_free_admin_queue(ctrl); | |
2156 | return error; | |
2157 | } | |
2158 | ||
2159 | static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, | |
2160 | bool remove) | |
2161 | { | |
9f27bd70 | 2162 | nvme_quiesce_admin_queue(ctrl); |
d6f66210 | 2163 | blk_sync_queue(ctrl->admin_q); |
3f2304f8 | 2164 | nvme_tcp_stop_queue(ctrl, 0); |
563c8158 | 2165 | nvme_cancel_admin_tagset(ctrl); |
e7832cb4 | 2166 | if (remove) |
9f27bd70 | 2167 | nvme_unquiesce_admin_queue(ctrl); |
3f2304f8 | 2168 | nvme_tcp_destroy_admin_queue(ctrl, remove); |
36389576 HR |
2169 | if (ctrl->tls_pskid) { |
2170 | dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n", | |
2171 | ctrl->tls_pskid); | |
2172 | ctrl->tls_pskid = 0; | |
2173 | } | |
3f2304f8 SG |
2174 | } |
2175 | ||
2176 | static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, | |
2177 | bool remove) | |
2178 | { | |
2179 | if (ctrl->queue_count <= 1) | |
d6f66210 | 2180 | return; |
9f27bd70 | 2181 | nvme_quiesce_admin_queue(ctrl); |
9f27bd70 | 2182 | nvme_quiesce_io_queues(ctrl); |
d6f66210 | 2183 | nvme_sync_io_queues(ctrl); |
3f2304f8 | 2184 | nvme_tcp_stop_io_queues(ctrl); |
563c8158 | 2185 | nvme_cancel_tagset(ctrl); |
3f2304f8 | 2186 | if (remove) |
9f27bd70 | 2187 | nvme_unquiesce_io_queues(ctrl); |
3f2304f8 SG |
2188 | nvme_tcp_destroy_io_queues(ctrl, remove); |
2189 | } | |
2190 | ||
adfde7ed HR |
2191 | static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, |
2192 | int status) | |
3f2304f8 | 2193 | { |
e6e7f7ac KB |
2194 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2195 | ||
3f2304f8 | 2196 | /* If we are resetting/deleting then do nothing */ |
e6e7f7ac KB |
2197 | if (state != NVME_CTRL_CONNECTING) { |
2198 | WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); | |
3f2304f8 SG |
2199 | return; |
2200 | } | |
2201 | ||
adfde7ed | 2202 | if (nvmf_should_reconnect(ctrl, status)) { |
3f2304f8 SG |
2203 | dev_info(ctrl->device, "Reconnecting in %d seconds...\n", |
2204 | ctrl->opts->reconnect_delay); | |
2205 | queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, | |
2206 | ctrl->opts->reconnect_delay * HZ); | |
2207 | } else { | |
adfde7ed HR |
2208 | dev_info(ctrl->device, "Removing controller (%d)...\n", |
2209 | status); | |
3f2304f8 SG |
2210 | nvme_delete_ctrl(ctrl); |
2211 | } | |
2212 | } | |
2213 | ||
2214 | static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) | |
2215 | { | |
2216 | struct nvmf_ctrl_options *opts = ctrl->opts; | |
312910f4 | 2217 | int ret; |
3f2304f8 SG |
2218 | |
2219 | ret = nvme_tcp_configure_admin_queue(ctrl, new); | |
2220 | if (ret) | |
2221 | return ret; | |
2222 | ||
2223 | if (ctrl->icdoff) { | |
522af60c | 2224 | ret = -EOPNOTSUPP; |
3f2304f8 SG |
2225 | dev_err(ctrl->device, "icdoff is not supported!\n"); |
2226 | goto destroy_admin; | |
2227 | } | |
2228 | ||
3b54064f | 2229 | if (!nvme_ctrl_sgl_supported(ctrl)) { |
522af60c | 2230 | ret = -EOPNOTSUPP; |
73ffcefc MG |
2231 | dev_err(ctrl->device, "Mandatory sgls are not supported!\n"); |
2232 | goto destroy_admin; | |
2233 | } | |
2234 | ||
3f2304f8 SG |
2235 | if (opts->queue_size > ctrl->sqsize + 1) |
2236 | dev_warn(ctrl->device, | |
2237 | "queue_size %zu > ctrl sqsize %u, clamping down\n", | |
2238 | opts->queue_size, ctrl->sqsize + 1); | |
2239 | ||
2240 | if (ctrl->sqsize + 1 > ctrl->maxcmd) { | |
2241 | dev_warn(ctrl->device, | |
2242 | "sqsize %u > ctrl maxcmd %u, clamping down\n", | |
2243 | ctrl->sqsize + 1, ctrl->maxcmd); | |
2244 | ctrl->sqsize = ctrl->maxcmd - 1; | |
2245 | } | |
2246 | ||
2247 | if (ctrl->queue_count > 1) { | |
2248 | ret = nvme_tcp_configure_io_queues(ctrl, new); | |
2249 | if (ret) | |
2250 | goto destroy_admin; | |
2251 | } | |
2252 | ||
2253 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { | |
bea54ef5 | 2254 | /* |
ecca390e | 2255 | * state change failure is ok if we started ctrl delete, |
bea54ef5 IR |
2256 | * unless we're during creation of a new controller to |
2257 | * avoid races with teardown flow. | |
2258 | */ | |
e6e7f7ac KB |
2259 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2260 | ||
2261 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2262 | state != NVME_CTRL_DELETING_NOIO); | |
bea54ef5 | 2263 | WARN_ON_ONCE(new); |
3f2304f8 SG |
2264 | ret = -EINVAL; |
2265 | goto destroy_io; | |
2266 | } | |
2267 | ||
2268 | nvme_start_ctrl(ctrl); | |
2269 | return 0; | |
2270 | ||
2271 | destroy_io: | |
70a99574 | 2272 | if (ctrl->queue_count > 1) { |
9f27bd70 | 2273 | nvme_quiesce_io_queues(ctrl); |
70a99574 CL |
2274 | nvme_sync_io_queues(ctrl); |
2275 | nvme_tcp_stop_io_queues(ctrl); | |
2276 | nvme_cancel_tagset(ctrl); | |
3f2304f8 | 2277 | nvme_tcp_destroy_io_queues(ctrl, new); |
70a99574 | 2278 | } |
3f2304f8 | 2279 | destroy_admin: |
3af755a4 | 2280 | nvme_stop_keep_alive(ctrl); |
fd1418de | 2281 | nvme_tcp_teardown_admin_queue(ctrl, false); |
3f2304f8 SG |
2282 | return ret; |
2283 | } | |
2284 | ||
2285 | static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) | |
2286 | { | |
2287 | struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), | |
2288 | struct nvme_tcp_ctrl, connect_work); | |
2289 | struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; | |
adfde7ed | 2290 | int ret; |
3f2304f8 SG |
2291 | |
2292 | ++ctrl->nr_reconnects; | |
2293 | ||
adfde7ed HR |
2294 | ret = nvme_tcp_setup_ctrl(ctrl, false); |
2295 | if (ret) | |
3f2304f8 SG |
2296 | goto requeue; |
2297 | ||
54a76c87 TI |
2298 | dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n", |
2299 | ctrl->nr_reconnects, ctrl->opts->max_reconnects); | |
3f2304f8 SG |
2300 | |
2301 | ctrl->nr_reconnects = 0; | |
2302 | ||
2303 | return; | |
2304 | ||
2305 | requeue: | |
54a76c87 TI |
2306 | dev_info(ctrl->device, "Failed reconnect attempt %d/%d\n", |
2307 | ctrl->nr_reconnects, ctrl->opts->max_reconnects); | |
adfde7ed | 2308 | nvme_tcp_reconnect_or_remove(ctrl, ret); |
3f2304f8 SG |
2309 | } |
2310 | ||
2311 | static void nvme_tcp_error_recovery_work(struct work_struct *work) | |
2312 | { | |
2313 | struct nvme_tcp_ctrl *tcp_ctrl = container_of(work, | |
2314 | struct nvme_tcp_ctrl, err_work); | |
2315 | struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; | |
2316 | ||
2317 | nvme_stop_keep_alive(ctrl); | |
ff9fc7eb | 2318 | flush_work(&ctrl->async_event_work); |
3f2304f8 SG |
2319 | nvme_tcp_teardown_io_queues(ctrl, false); |
2320 | /* unquiesce to fail fast pending requests */ | |
9f27bd70 | 2321 | nvme_unquiesce_io_queues(ctrl); |
3f2304f8 | 2322 | nvme_tcp_teardown_admin_queue(ctrl, false); |
9f27bd70 | 2323 | nvme_unquiesce_admin_queue(ctrl); |
1f1a4f89 | 2324 | nvme_auth_stop(ctrl); |
3f2304f8 SG |
2325 | |
2326 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { | |
ecca390e | 2327 | /* state change failure is ok if we started ctrl delete */ |
e6e7f7ac KB |
2328 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2329 | ||
2330 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2331 | state != NVME_CTRL_DELETING_NOIO); | |
3f2304f8 SG |
2332 | return; |
2333 | } | |
2334 | ||
adfde7ed | 2335 | nvme_tcp_reconnect_or_remove(ctrl, 0); |
3f2304f8 SG |
2336 | } |
2337 | ||
2338 | static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) | |
2339 | { | |
2340 | nvme_tcp_teardown_io_queues(ctrl, shutdown); | |
9f27bd70 | 2341 | nvme_quiesce_admin_queue(ctrl); |
285b6e9b | 2342 | nvme_disable_ctrl(ctrl, shutdown); |
3f2304f8 SG |
2343 | nvme_tcp_teardown_admin_queue(ctrl, shutdown); |
2344 | } | |
2345 | ||
2346 | static void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl) | |
2347 | { | |
2348 | nvme_tcp_teardown_ctrl(ctrl, true); | |
2349 | } | |
2350 | ||
2351 | static void nvme_reset_ctrl_work(struct work_struct *work) | |
2352 | { | |
2353 | struct nvme_ctrl *ctrl = | |
2354 | container_of(work, struct nvme_ctrl, reset_work); | |
adfde7ed | 2355 | int ret; |
3f2304f8 SG |
2356 | |
2357 | nvme_stop_ctrl(ctrl); | |
2358 | nvme_tcp_teardown_ctrl(ctrl, false); | |
2359 | ||
2360 | if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { | |
ecca390e | 2361 | /* state change failure is ok if we started ctrl delete */ |
e6e7f7ac KB |
2362 | enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); |
2363 | ||
2364 | WARN_ON_ONCE(state != NVME_CTRL_DELETING && | |
2365 | state != NVME_CTRL_DELETING_NOIO); | |
3f2304f8 SG |
2366 | return; |
2367 | } | |
2368 | ||
adfde7ed HR |
2369 | ret = nvme_tcp_setup_ctrl(ctrl, false); |
2370 | if (ret) | |
3f2304f8 SG |
2371 | goto out_fail; |
2372 | ||
2373 | return; | |
2374 | ||
2375 | out_fail: | |
2376 | ++ctrl->nr_reconnects; | |
adfde7ed | 2377 | nvme_tcp_reconnect_or_remove(ctrl, ret); |
3f2304f8 SG |
2378 | } |
2379 | ||
f7f70f4a RL |
2380 | static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) |
2381 | { | |
c4abd875 | 2382 | flush_work(&to_tcp_ctrl(ctrl)->err_work); |
f7f70f4a RL |
2383 | cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); |
2384 | } | |
2385 | ||
3f2304f8 SG |
2386 | static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) |
2387 | { | |
2388 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); | |
2389 | ||
2390 | if (list_empty(&ctrl->list)) | |
2391 | goto free_ctrl; | |
2392 | ||
2393 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
2394 | list_del(&ctrl->list); | |
2395 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2396 | ||
2397 | nvmf_free_options(nctrl->opts); | |
2398 | free_ctrl: | |
2399 | kfree(ctrl->queues); | |
2400 | kfree(ctrl); | |
2401 | } | |
2402 | ||
2403 | static void nvme_tcp_set_sg_null(struct nvme_command *c) | |
2404 | { | |
2405 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2406 | ||
2407 | sg->addr = 0; | |
2408 | sg->length = 0; | |
2409 | sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | | |
2410 | NVME_SGL_FMT_TRANSPORT_A; | |
2411 | } | |
2412 | ||
2413 | static void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue, | |
2414 | struct nvme_command *c, u32 data_len) | |
2415 | { | |
2416 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2417 | ||
2418 | sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); | |
2419 | sg->length = cpu_to_le32(data_len); | |
2420 | sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; | |
2421 | } | |
2422 | ||
2423 | static void nvme_tcp_set_sg_host_data(struct nvme_command *c, | |
2424 | u32 data_len) | |
2425 | { | |
2426 | struct nvme_sgl_desc *sg = &c->common.dptr.sgl; | |
2427 | ||
2428 | sg->addr = 0; | |
2429 | sg->length = cpu_to_le32(data_len); | |
2430 | sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | | |
2431 | NVME_SGL_FMT_TRANSPORT_A; | |
2432 | } | |
2433 | ||
2434 | static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) | |
2435 | { | |
2436 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg); | |
2437 | struct nvme_tcp_queue *queue = &ctrl->queues[0]; | |
2438 | struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu; | |
2439 | struct nvme_command *cmd = &pdu->cmd; | |
2440 | u8 hdgst = nvme_tcp_hdgst_len(queue); | |
2441 | ||
2442 | memset(pdu, 0, sizeof(*pdu)); | |
2443 | pdu->hdr.type = nvme_tcp_cmd; | |
2444 | if (queue->hdr_digest) | |
2445 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
2446 | pdu->hdr.hlen = sizeof(*pdu); | |
2447 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
2448 | ||
2449 | cmd->common.opcode = nvme_admin_async_event; | |
2450 | cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; | |
2451 | cmd->common.flags |= NVME_CMD_SGL_METABUF; | |
2452 | nvme_tcp_set_sg_null(cmd); | |
2453 | ||
2454 | ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU; | |
2455 | ctrl->async_req.offset = 0; | |
2456 | ctrl->async_req.curr_bio = NULL; | |
2457 | ctrl->async_req.data_len = 0; | |
2458 | ||
86f0348a | 2459 | nvme_tcp_queue_request(&ctrl->async_req, true, true); |
3f2304f8 SG |
2460 | } |
2461 | ||
236187c4 SG |
2462 | static void nvme_tcp_complete_timed_out(struct request *rq) |
2463 | { | |
2464 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
2465 | struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; | |
2466 | ||
236187c4 | 2467 | nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); |
93ba75c9 | 2468 | nvmf_complete_timed_out_request(rq); |
236187c4 SG |
2469 | } |
2470 | ||
9bdb4833 | 2471 | static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) |
3f2304f8 SG |
2472 | { |
2473 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
236187c4 | 2474 | struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; |
a3406352 | 2475 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
7d23e836 | 2476 | struct nvme_command *cmd = &pdu->cmd; |
99607843 | 2477 | int qid = nvme_tcp_queue_id(req->queue); |
3f2304f8 | 2478 | |
236187c4 | 2479 | dev_warn(ctrl->device, |
45c36f04 | 2480 | "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n", |
7d23e836 CS |
2481 | rq->tag, nvme_cid(rq), pdu->hdr.type, cmd->common.opcode, |
2482 | nvme_fabrics_opcode_str(qid, cmd), qid); | |
3f2304f8 | 2483 | |
e6e7f7ac | 2484 | if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { |
39d57757 | 2485 | /* |
236187c4 SG |
2486 | * If we are resetting, connecting or deleting we should |
2487 | * complete immediately because we may block controller | |
2488 | * teardown or setup sequence | |
2489 | * - ctrl disable/shutdown fabrics requests | |
2490 | * - connect requests | |
2491 | * - initialization admin requests | |
2492 | * - I/O requests that entered after unquiescing and | |
2493 | * the controller stopped responding | |
2494 | * | |
2495 | * All other requests should be cancelled by the error | |
2496 | * recovery work, so it's fine that we fail it here. | |
39d57757 | 2497 | */ |
236187c4 | 2498 | nvme_tcp_complete_timed_out(rq); |
3f2304f8 SG |
2499 | return BLK_EH_DONE; |
2500 | } | |
2501 | ||
236187c4 SG |
2502 | /* |
2503 | * LIVE state should trigger the normal error recovery which will | |
2504 | * handle completing this request. | |
2505 | */ | |
2506 | nvme_tcp_error_recovery(ctrl); | |
3f2304f8 SG |
2507 | return BLK_EH_RESET_TIMER; |
2508 | } | |
2509 | ||
2510 | static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, | |
2511 | struct request *rq) | |
2512 | { | |
2513 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
a3406352 | 2514 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
3f2304f8 SG |
2515 | struct nvme_command *c = &pdu->cmd; |
2516 | ||
2517 | c->common.flags |= NVME_CMD_SGL_METABUF; | |
2518 | ||
25e5cb78 SG |
2519 | if (!blk_rq_nr_phys_segments(rq)) |
2520 | nvme_tcp_set_sg_null(c); | |
2521 | else if (rq_data_dir(rq) == WRITE && | |
53ee9e29 | 2522 | req->data_len <= nvme_tcp_inline_data_size(req)) |
3f2304f8 SG |
2523 | nvme_tcp_set_sg_inline(queue, c, req->data_len); |
2524 | else | |
2525 | nvme_tcp_set_sg_host_data(c, req->data_len); | |
2526 | ||
2527 | return 0; | |
2528 | } | |
2529 | ||
2530 | static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, | |
2531 | struct request *rq) | |
2532 | { | |
2533 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
a3406352 | 2534 | struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); |
3f2304f8 SG |
2535 | struct nvme_tcp_queue *queue = req->queue; |
2536 | u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; | |
2537 | blk_status_t ret; | |
2538 | ||
f4b9e6c9 | 2539 | ret = nvme_setup_cmd(ns, rq); |
3f2304f8 SG |
2540 | if (ret) |
2541 | return ret; | |
2542 | ||
2543 | req->state = NVME_TCP_SEND_CMD_PDU; | |
1ba2e507 | 2544 | req->status = cpu_to_le16(NVME_SC_SUCCESS); |
3f2304f8 SG |
2545 | req->offset = 0; |
2546 | req->data_sent = 0; | |
2547 | req->pdu_len = 0; | |
2548 | req->pdu_sent = 0; | |
c2700d28 | 2549 | req->h2cdata_left = 0; |
25e5cb78 SG |
2550 | req->data_len = blk_rq_nr_phys_segments(rq) ? |
2551 | blk_rq_payload_bytes(rq) : 0; | |
3f2304f8 | 2552 | req->curr_bio = rq->bio; |
e11e5116 | 2553 | if (req->curr_bio && req->data_len) |
cb9b870f | 2554 | nvme_tcp_init_iter(req, rq_data_dir(rq)); |
3f2304f8 SG |
2555 | |
2556 | if (rq_data_dir(rq) == WRITE && | |
53ee9e29 | 2557 | req->data_len <= nvme_tcp_inline_data_size(req)) |
3f2304f8 | 2558 | req->pdu_len = req->data_len; |
3f2304f8 SG |
2559 | |
2560 | pdu->hdr.type = nvme_tcp_cmd; | |
2561 | pdu->hdr.flags = 0; | |
2562 | if (queue->hdr_digest) | |
2563 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
2564 | if (queue->data_digest && req->pdu_len) { | |
2565 | pdu->hdr.flags |= NVME_TCP_F_DDGST; | |
2566 | ddgst = nvme_tcp_ddgst_len(queue); | |
2567 | } | |
2568 | pdu->hdr.hlen = sizeof(*pdu); | |
2569 | pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0; | |
2570 | pdu->hdr.plen = | |
2571 | cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst); | |
2572 | ||
2573 | ret = nvme_tcp_map_data(queue, rq); | |
2574 | if (unlikely(ret)) { | |
28a4cac4 | 2575 | nvme_cleanup_cmd(rq); |
3f2304f8 SG |
2576 | dev_err(queue->ctrl->ctrl.device, |
2577 | "Failed to map data (%d)\n", ret); | |
2578 | return ret; | |
2579 | } | |
2580 | ||
2581 | return 0; | |
2582 | } | |
2583 | ||
86f0348a SG |
2584 | static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx) |
2585 | { | |
2586 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2587 | ||
2588 | if (!llist_empty(&queue->req_list)) | |
2589 | queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); | |
2590 | } | |
2591 | ||
3f2304f8 SG |
2592 | static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, |
2593 | const struct blk_mq_queue_data *bd) | |
2594 | { | |
2595 | struct nvme_ns *ns = hctx->queue->queuedata; | |
2596 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2597 | struct request *rq = bd->rq; | |
2598 | struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); | |
2599 | bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); | |
2600 | blk_status_t ret; | |
2601 | ||
a9715744 TC |
2602 | if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) |
2603 | return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); | |
3f2304f8 SG |
2604 | |
2605 | ret = nvme_tcp_setup_cmd_pdu(ns, rq); | |
2606 | if (unlikely(ret)) | |
2607 | return ret; | |
2608 | ||
6887fc64 | 2609 | nvme_start_request(rq); |
3f2304f8 | 2610 | |
86f0348a | 2611 | nvme_tcp_queue_request(req, true, bd->last); |
3f2304f8 SG |
2612 | |
2613 | return BLK_STS_OK; | |
2614 | } | |
2615 | ||
a4e1d0b7 | 2616 | static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) |
873946f4 | 2617 | { |
06427ca0 | 2618 | struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); |
a249d306 KB |
2619 | |
2620 | nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); | |
873946f4 SG |
2621 | } |
2622 | ||
5a72e899 | 2623 | static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) |
1a9460ce SG |
2624 | { |
2625 | struct nvme_tcp_queue *queue = hctx->driver_data; | |
2626 | struct sock *sk = queue->sock->sk; | |
2627 | ||
f86e5bf8 SG |
2628 | if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) |
2629 | return 0; | |
2630 | ||
72e5d757 | 2631 | set_bit(NVME_TCP_Q_POLLING, &queue->flags); |
3f926af3 | 2632 | if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) |
1a9460ce SG |
2633 | sk_busy_loop(sk, true); |
2634 | nvme_tcp_try_recv(queue); | |
72e5d757 | 2635 | clear_bit(NVME_TCP_Q_POLLING, &queue->flags); |
1a9460ce SG |
2636 | return queue->nr_cqe; |
2637 | } | |
2638 | ||
02c57a82 MB |
2639 | static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) |
2640 | { | |
2641 | struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0]; | |
2642 | struct sockaddr_storage src_addr; | |
2643 | int ret, len; | |
2644 | ||
2645 | len = nvmf_get_address(ctrl, buf, size); | |
2646 | ||
76d54bf2 AM |
2647 | mutex_lock(&queue->queue_lock); |
2648 | ||
2649 | if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) | |
2650 | goto done; | |
02c57a82 MB |
2651 | ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); |
2652 | if (ret > 0) { | |
2653 | if (len > 0) | |
2654 | len--; /* strip trailing newline */ | |
2655 | len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", | |
2656 | (len) ? "," : "", &src_addr); | |
2657 | } | |
76d54bf2 AM |
2658 | done: |
2659 | mutex_unlock(&queue->queue_lock); | |
02c57a82 MB |
2660 | |
2661 | return len; | |
2662 | } | |
2663 | ||
6acbd961 | 2664 | static const struct blk_mq_ops nvme_tcp_mq_ops = { |
3f2304f8 | 2665 | .queue_rq = nvme_tcp_queue_rq, |
86f0348a | 2666 | .commit_rqs = nvme_tcp_commit_rqs, |
3f2304f8 SG |
2667 | .complete = nvme_complete_rq, |
2668 | .init_request = nvme_tcp_init_request, | |
2669 | .exit_request = nvme_tcp_exit_request, | |
2670 | .init_hctx = nvme_tcp_init_hctx, | |
2671 | .timeout = nvme_tcp_timeout, | |
873946f4 | 2672 | .map_queues = nvme_tcp_map_queues, |
1a9460ce | 2673 | .poll = nvme_tcp_poll, |
3f2304f8 SG |
2674 | }; |
2675 | ||
6acbd961 | 2676 | static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { |
3f2304f8 SG |
2677 | .queue_rq = nvme_tcp_queue_rq, |
2678 | .complete = nvme_complete_rq, | |
2679 | .init_request = nvme_tcp_init_request, | |
2680 | .exit_request = nvme_tcp_exit_request, | |
2681 | .init_hctx = nvme_tcp_init_admin_hctx, | |
2682 | .timeout = nvme_tcp_timeout, | |
2683 | }; | |
2684 | ||
2685 | static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { | |
2686 | .name = "tcp", | |
2687 | .module = THIS_MODULE, | |
db45e1a5 | 2688 | .flags = NVME_F_FABRICS | NVME_F_BLOCKING, |
3f2304f8 SG |
2689 | .reg_read32 = nvmf_reg_read32, |
2690 | .reg_read64 = nvmf_reg_read64, | |
2691 | .reg_write32 = nvmf_reg_write32, | |
210b1f65 | 2692 | .subsystem_reset = nvmf_subsystem_reset, |
3f2304f8 SG |
2693 | .free_ctrl = nvme_tcp_free_ctrl, |
2694 | .submit_async_event = nvme_tcp_submit_async_event, | |
2695 | .delete_ctrl = nvme_tcp_delete_ctrl, | |
02c57a82 | 2696 | .get_address = nvme_tcp_get_address, |
f7f70f4a | 2697 | .stop_ctrl = nvme_tcp_stop_ctrl, |
3f2304f8 SG |
2698 | }; |
2699 | ||
2700 | static bool | |
2701 | nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) | |
2702 | { | |
2703 | struct nvme_tcp_ctrl *ctrl; | |
2704 | bool found = false; | |
2705 | ||
2706 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
2707 | list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) { | |
2708 | found = nvmf_ip_options_match(&ctrl->ctrl, opts); | |
2709 | if (found) | |
2710 | break; | |
2711 | } | |
2712 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2713 | ||
2714 | return found; | |
2715 | } | |
2716 | ||
10fd7fb6 | 2717 | static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, |
3f2304f8 SG |
2718 | struct nvmf_ctrl_options *opts) |
2719 | { | |
2720 | struct nvme_tcp_ctrl *ctrl; | |
2721 | int ret; | |
2722 | ||
2723 | ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); | |
2724 | if (!ctrl) | |
2725 | return ERR_PTR(-ENOMEM); | |
2726 | ||
2727 | INIT_LIST_HEAD(&ctrl->list); | |
2728 | ctrl->ctrl.opts = opts; | |
1a9460ce SG |
2729 | ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + |
2730 | opts->nr_poll_queues + 1; | |
3f2304f8 SG |
2731 | ctrl->ctrl.sqsize = opts->queue_size - 1; |
2732 | ctrl->ctrl.kato = opts->kato; | |
2733 | ||
2734 | INIT_DELAYED_WORK(&ctrl->connect_work, | |
2735 | nvme_tcp_reconnect_ctrl_work); | |
2736 | INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); | |
2737 | INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); | |
2738 | ||
2739 | if (!(opts->mask & NVMF_OPT_TRSVCID)) { | |
2740 | opts->trsvcid = | |
2741 | kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL); | |
2742 | if (!opts->trsvcid) { | |
2743 | ret = -ENOMEM; | |
2744 | goto out_free_ctrl; | |
2745 | } | |
2746 | opts->mask |= NVMF_OPT_TRSVCID; | |
2747 | } | |
2748 | ||
2749 | ret = inet_pton_with_scope(&init_net, AF_UNSPEC, | |
2750 | opts->traddr, opts->trsvcid, &ctrl->addr); | |
2751 | if (ret) { | |
2752 | pr_err("malformed address passed: %s:%s\n", | |
2753 | opts->traddr, opts->trsvcid); | |
2754 | goto out_free_ctrl; | |
2755 | } | |
2756 | ||
2757 | if (opts->mask & NVMF_OPT_HOST_TRADDR) { | |
2758 | ret = inet_pton_with_scope(&init_net, AF_UNSPEC, | |
2759 | opts->host_traddr, NULL, &ctrl->src_addr); | |
2760 | if (ret) { | |
2761 | pr_err("malformed src address passed: %s\n", | |
2762 | opts->host_traddr); | |
2763 | goto out_free_ctrl; | |
2764 | } | |
2765 | } | |
2766 | ||
3ede8f72 | 2767 | if (opts->mask & NVMF_OPT_HOST_IFACE) { |
8b43ced6 | 2768 | if (!__dev_get_by_name(&init_net, opts->host_iface)) { |
3ede8f72 MB |
2769 | pr_err("invalid interface passed: %s\n", |
2770 | opts->host_iface); | |
2771 | ret = -ENODEV; | |
2772 | goto out_free_ctrl; | |
2773 | } | |
2774 | } | |
2775 | ||
3f2304f8 SG |
2776 | if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) { |
2777 | ret = -EALREADY; | |
2778 | goto out_free_ctrl; | |
2779 | } | |
2780 | ||
873946f4 | 2781 | ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), |
3f2304f8 SG |
2782 | GFP_KERNEL); |
2783 | if (!ctrl->queues) { | |
2784 | ret = -ENOMEM; | |
2785 | goto out_free_ctrl; | |
2786 | } | |
2787 | ||
2788 | ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0); | |
2789 | if (ret) | |
2790 | goto out_kfree_queues; | |
2791 | ||
10fd7fb6 KB |
2792 | return ctrl; |
2793 | out_kfree_queues: | |
2794 | kfree(ctrl->queues); | |
2795 | out_free_ctrl: | |
2796 | kfree(ctrl); | |
2797 | return ERR_PTR(ret); | |
2798 | } | |
2799 | ||
2800 | static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, | |
2801 | struct nvmf_ctrl_options *opts) | |
2802 | { | |
2803 | struct nvme_tcp_ctrl *ctrl; | |
2804 | int ret; | |
2805 | ||
2806 | ctrl = nvme_tcp_alloc_ctrl(dev, opts); | |
2807 | if (IS_ERR(ctrl)) | |
2808 | return ERR_CAST(ctrl); | |
2809 | ||
1a9e2181 KB |
2810 | ret = nvme_add_ctrl(&ctrl->ctrl); |
2811 | if (ret) | |
2812 | goto out_put_ctrl; | |
2813 | ||
3f2304f8 SG |
2814 | if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { |
2815 | WARN_ON_ONCE(1); | |
2816 | ret = -EINTR; | |
2817 | goto out_uninit_ctrl; | |
2818 | } | |
2819 | ||
2820 | ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true); | |
2821 | if (ret) | |
2822 | goto out_uninit_ctrl; | |
2823 | ||
524719b4 NY |
2824 | dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp, hostnqn: %s\n", |
2825 | nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr, opts->host->nqn); | |
3f2304f8 | 2826 | |
3f2304f8 SG |
2827 | mutex_lock(&nvme_tcp_ctrl_mutex); |
2828 | list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); | |
2829 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2830 | ||
2831 | return &ctrl->ctrl; | |
2832 | ||
2833 | out_uninit_ctrl: | |
2834 | nvme_uninit_ctrl(&ctrl->ctrl); | |
1a9e2181 | 2835 | out_put_ctrl: |
3f2304f8 SG |
2836 | nvme_put_ctrl(&ctrl->ctrl); |
2837 | if (ret > 0) | |
2838 | ret = -EIO; | |
2839 | return ERR_PTR(ret); | |
3f2304f8 SG |
2840 | } |
2841 | ||
2842 | static struct nvmf_transport_ops nvme_tcp_transport = { | |
2843 | .name = "tcp", | |
2844 | .module = THIS_MODULE, | |
2845 | .required_opts = NVMF_OPT_TRADDR, | |
2846 | .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | | |
2847 | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | | |
873946f4 | 2848 | NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | |
bb13985d | 2849 | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | |
adf22c52 HR |
2850 | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | |
2851 | NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, | |
3f2304f8 SG |
2852 | .create_ctrl = nvme_tcp_create_ctrl, |
2853 | }; | |
2854 | ||
2855 | static int __init nvme_tcp_init_module(void) | |
2856 | { | |
0c29f9fa LF |
2857 | unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS; |
2858 | ||
7e87965d SG |
2859 | BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); |
2860 | BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); | |
2861 | BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); | |
2862 | BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24); | |
2863 | BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24); | |
2864 | BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128); | |
2865 | BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); | |
2866 | BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); | |
2867 | ||
0c29f9fa LF |
2868 | if (wq_unbound) |
2869 | wq_flags |= WQ_UNBOUND; | |
2870 | ||
2871 | nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0); | |
3f2304f8 SG |
2872 | if (!nvme_tcp_wq) |
2873 | return -ENOMEM; | |
2874 | ||
2875 | nvmf_register_transport(&nvme_tcp_transport); | |
2876 | return 0; | |
2877 | } | |
2878 | ||
2879 | static void __exit nvme_tcp_cleanup_module(void) | |
2880 | { | |
2881 | struct nvme_tcp_ctrl *ctrl; | |
2882 | ||
2883 | nvmf_unregister_transport(&nvme_tcp_transport); | |
2884 | ||
2885 | mutex_lock(&nvme_tcp_ctrl_mutex); | |
2886 | list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) | |
2887 | nvme_delete_ctrl(&ctrl->ctrl); | |
2888 | mutex_unlock(&nvme_tcp_ctrl_mutex); | |
2889 | flush_workqueue(nvme_delete_wq); | |
2890 | ||
2891 | destroy_workqueue(nvme_tcp_wq); | |
2892 | } | |
2893 | ||
2894 | module_init(nvme_tcp_init_module); | |
2895 | module_exit(nvme_tcp_cleanup_module); | |
2896 | ||
92b0b0ff | 2897 | MODULE_DESCRIPTION("NVMe host TCP transport driver"); |
3f2304f8 | 2898 | MODULE_LICENSE("GPL v2"); |