Commit | Line | Data |
---|---|---|
872d26a3 SG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * NVMe over Fabrics TCP target. | |
4 | * Copyright (c) 2018 Lightbits Labs. All rights reserved. | |
5 | */ | |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | #include <linux/module.h> | |
8 | #include <linux/init.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/err.h> | |
11 | #include <linux/nvme-tcp.h> | |
12 | #include <net/sock.h> | |
13 | #include <net/tcp.h> | |
14 | #include <linux/inet.h> | |
15 | #include <linux/llist.h> | |
16 | #include <crypto/hash.h> | |
17 | ||
18 | #include "nvmet.h" | |
19 | ||
20 | #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) | |
21 | ||
22 | #define NVMET_TCP_RECV_BUDGET 8 | |
23 | #define NVMET_TCP_SEND_BUDGET 8 | |
24 | #define NVMET_TCP_IO_WORK_BUDGET 64 | |
25 | ||
26 | enum nvmet_tcp_send_state { | |
27 | NVMET_TCP_SEND_DATA_PDU, | |
28 | NVMET_TCP_SEND_DATA, | |
29 | NVMET_TCP_SEND_R2T, | |
30 | NVMET_TCP_SEND_DDGST, | |
31 | NVMET_TCP_SEND_RESPONSE | |
32 | }; | |
33 | ||
34 | enum nvmet_tcp_recv_state { | |
35 | NVMET_TCP_RECV_PDU, | |
36 | NVMET_TCP_RECV_DATA, | |
37 | NVMET_TCP_RECV_DDGST, | |
38 | NVMET_TCP_RECV_ERR, | |
39 | }; | |
40 | ||
41 | enum { | |
42 | NVMET_TCP_F_INIT_FAILED = (1 << 0), | |
43 | }; | |
44 | ||
45 | struct nvmet_tcp_cmd { | |
46 | struct nvmet_tcp_queue *queue; | |
47 | struct nvmet_req req; | |
48 | ||
49 | struct nvme_tcp_cmd_pdu *cmd_pdu; | |
50 | struct nvme_tcp_rsp_pdu *rsp_pdu; | |
51 | struct nvme_tcp_data_pdu *data_pdu; | |
52 | struct nvme_tcp_r2t_pdu *r2t_pdu; | |
53 | ||
54 | u32 rbytes_done; | |
55 | u32 wbytes_done; | |
56 | ||
57 | u32 pdu_len; | |
58 | u32 pdu_recv; | |
59 | int sg_idx; | |
60 | int nr_mapped; | |
61 | struct msghdr recv_msg; | |
62 | struct kvec *iov; | |
63 | u32 flags; | |
64 | ||
65 | struct list_head entry; | |
66 | struct llist_node lentry; | |
67 | ||
68 | /* send state */ | |
69 | u32 offset; | |
70 | struct scatterlist *cur_sg; | |
71 | enum nvmet_tcp_send_state state; | |
72 | ||
73 | __le32 exp_ddgst; | |
74 | __le32 recv_ddgst; | |
75 | }; | |
76 | ||
77 | enum nvmet_tcp_queue_state { | |
78 | NVMET_TCP_Q_CONNECTING, | |
79 | NVMET_TCP_Q_LIVE, | |
80 | NVMET_TCP_Q_DISCONNECTING, | |
81 | }; | |
82 | ||
83 | struct nvmet_tcp_queue { | |
84 | struct socket *sock; | |
85 | struct nvmet_tcp_port *port; | |
86 | struct work_struct io_work; | |
87 | int cpu; | |
88 | struct nvmet_cq nvme_cq; | |
89 | struct nvmet_sq nvme_sq; | |
90 | ||
91 | /* send state */ | |
92 | struct nvmet_tcp_cmd *cmds; | |
93 | unsigned int nr_cmds; | |
94 | struct list_head free_list; | |
95 | struct llist_head resp_list; | |
96 | struct list_head resp_send_list; | |
97 | int send_list_len; | |
98 | struct nvmet_tcp_cmd *snd_cmd; | |
99 | ||
100 | /* recv state */ | |
101 | int offset; | |
102 | int left; | |
103 | enum nvmet_tcp_recv_state rcv_state; | |
104 | struct nvmet_tcp_cmd *cmd; | |
105 | union nvme_tcp_pdu pdu; | |
106 | ||
107 | /* digest state */ | |
108 | bool hdr_digest; | |
109 | bool data_digest; | |
110 | struct ahash_request *snd_hash; | |
111 | struct ahash_request *rcv_hash; | |
112 | ||
113 | spinlock_t state_lock; | |
114 | enum nvmet_tcp_queue_state state; | |
115 | ||
116 | struct sockaddr_storage sockaddr; | |
117 | struct sockaddr_storage sockaddr_peer; | |
118 | struct work_struct release_work; | |
119 | ||
120 | int idx; | |
121 | struct list_head queue_list; | |
122 | ||
123 | struct nvmet_tcp_cmd connect; | |
124 | ||
125 | struct page_frag_cache pf_cache; | |
126 | ||
127 | void (*data_ready)(struct sock *); | |
128 | void (*state_change)(struct sock *); | |
129 | void (*write_space)(struct sock *); | |
130 | }; | |
131 | ||
132 | struct nvmet_tcp_port { | |
133 | struct socket *sock; | |
134 | struct work_struct accept_work; | |
135 | struct nvmet_port *nport; | |
136 | struct sockaddr_storage addr; | |
137 | int last_cpu; | |
138 | void (*data_ready)(struct sock *); | |
139 | }; | |
140 | ||
141 | static DEFINE_IDA(nvmet_tcp_queue_ida); | |
142 | static LIST_HEAD(nvmet_tcp_queue_list); | |
143 | static DEFINE_MUTEX(nvmet_tcp_queue_mutex); | |
144 | ||
145 | static struct workqueue_struct *nvmet_tcp_wq; | |
146 | static struct nvmet_fabrics_ops nvmet_tcp_ops; | |
147 | static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); | |
148 | static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); | |
149 | ||
150 | static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, | |
151 | struct nvmet_tcp_cmd *cmd) | |
152 | { | |
153 | return cmd - queue->cmds; | |
154 | } | |
155 | ||
156 | static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) | |
157 | { | |
158 | return nvme_is_write(cmd->req.cmd) && | |
159 | cmd->rbytes_done < cmd->req.transfer_len; | |
160 | } | |
161 | ||
162 | static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) | |
163 | { | |
fc6c9730 | 164 | return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status; |
872d26a3 SG |
165 | } |
166 | ||
167 | static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) | |
168 | { | |
169 | return !nvme_is_write(cmd->req.cmd) && | |
170 | cmd->req.transfer_len > 0 && | |
fc6c9730 | 171 | !cmd->req.cqe->status; |
872d26a3 SG |
172 | } |
173 | ||
174 | static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) | |
175 | { | |
176 | return nvme_is_write(cmd->req.cmd) && cmd->pdu_len && | |
177 | !cmd->rbytes_done; | |
178 | } | |
179 | ||
180 | static inline struct nvmet_tcp_cmd * | |
181 | nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue) | |
182 | { | |
183 | struct nvmet_tcp_cmd *cmd; | |
184 | ||
185 | cmd = list_first_entry_or_null(&queue->free_list, | |
186 | struct nvmet_tcp_cmd, entry); | |
187 | if (!cmd) | |
188 | return NULL; | |
189 | list_del_init(&cmd->entry); | |
190 | ||
191 | cmd->rbytes_done = cmd->wbytes_done = 0; | |
192 | cmd->pdu_len = 0; | |
193 | cmd->pdu_recv = 0; | |
194 | cmd->iov = NULL; | |
195 | cmd->flags = 0; | |
196 | return cmd; | |
197 | } | |
198 | ||
199 | static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) | |
200 | { | |
201 | if (unlikely(cmd == &cmd->queue->connect)) | |
202 | return; | |
203 | ||
204 | list_add_tail(&cmd->entry, &cmd->queue->free_list); | |
205 | } | |
206 | ||
207 | static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) | |
208 | { | |
209 | return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
210 | } | |
211 | ||
212 | static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) | |
213 | { | |
214 | return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
215 | } | |
216 | ||
217 | static inline void nvmet_tcp_hdgst(struct ahash_request *hash, | |
218 | void *pdu, size_t len) | |
219 | { | |
220 | struct scatterlist sg; | |
221 | ||
222 | sg_init_one(&sg, pdu, len); | |
223 | ahash_request_set_crypt(hash, &sg, pdu + len, len); | |
224 | crypto_ahash_digest(hash); | |
225 | } | |
226 | ||
227 | static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, | |
228 | void *pdu, size_t len) | |
229 | { | |
230 | struct nvme_tcp_hdr *hdr = pdu; | |
231 | __le32 recv_digest; | |
232 | __le32 exp_digest; | |
233 | ||
234 | if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { | |
235 | pr_err("queue %d: header digest enabled but no header digest\n", | |
236 | queue->idx); | |
237 | return -EPROTO; | |
238 | } | |
239 | ||
240 | recv_digest = *(__le32 *)(pdu + hdr->hlen); | |
241 | nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); | |
242 | exp_digest = *(__le32 *)(pdu + hdr->hlen); | |
243 | if (recv_digest != exp_digest) { | |
244 | pr_err("queue %d: header digest error: recv %#x expected %#x\n", | |
245 | queue->idx, le32_to_cpu(recv_digest), | |
246 | le32_to_cpu(exp_digest)); | |
247 | return -EPROTO; | |
248 | } | |
249 | ||
250 | return 0; | |
251 | } | |
252 | ||
253 | static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) | |
254 | { | |
255 | struct nvme_tcp_hdr *hdr = pdu; | |
256 | u8 digest_len = nvmet_tcp_hdgst_len(queue); | |
257 | u32 len; | |
258 | ||
259 | len = le32_to_cpu(hdr->plen) - hdr->hlen - | |
260 | (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0); | |
261 | ||
262 | if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { | |
263 | pr_err("queue %d: data digest flag is cleared\n", queue->idx); | |
264 | return -EPROTO; | |
265 | } | |
266 | ||
267 | return 0; | |
268 | } | |
269 | ||
270 | static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) | |
271 | { | |
272 | struct scatterlist *sg; | |
273 | int i; | |
274 | ||
275 | sg = &cmd->req.sg[cmd->sg_idx]; | |
276 | ||
277 | for (i = 0; i < cmd->nr_mapped; i++) | |
278 | kunmap(sg_page(&sg[i])); | |
279 | } | |
280 | ||
281 | static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) | |
282 | { | |
283 | struct kvec *iov = cmd->iov; | |
284 | struct scatterlist *sg; | |
285 | u32 length, offset, sg_offset; | |
286 | ||
287 | length = cmd->pdu_len; | |
288 | cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); | |
289 | offset = cmd->rbytes_done; | |
290 | cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); | |
291 | sg_offset = offset % PAGE_SIZE; | |
292 | sg = &cmd->req.sg[cmd->sg_idx]; | |
293 | ||
294 | while (length) { | |
295 | u32 iov_len = min_t(u32, length, sg->length - sg_offset); | |
296 | ||
297 | iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; | |
298 | iov->iov_len = iov_len; | |
299 | ||
300 | length -= iov_len; | |
301 | sg = sg_next(sg); | |
302 | iov++; | |
303 | } | |
304 | ||
305 | iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, | |
306 | cmd->nr_mapped, cmd->pdu_len); | |
307 | } | |
308 | ||
309 | static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) | |
310 | { | |
311 | queue->rcv_state = NVMET_TCP_RECV_ERR; | |
312 | if (queue->nvme_sq.ctrl) | |
313 | nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); | |
314 | else | |
315 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
316 | } | |
317 | ||
318 | static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) | |
319 | { | |
320 | struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; | |
321 | u32 len = le32_to_cpu(sgl->length); | |
322 | ||
323 | if (!cmd->req.data_len) | |
324 | return 0; | |
325 | ||
326 | if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | | |
327 | NVME_SGL_FMT_OFFSET)) { | |
328 | if (!nvme_is_write(cmd->req.cmd)) | |
329 | return NVME_SC_INVALID_FIELD | NVME_SC_DNR; | |
330 | ||
331 | if (len > cmd->req.port->inline_data_size) | |
332 | return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; | |
333 | cmd->pdu_len = len; | |
334 | } | |
335 | cmd->req.transfer_len += len; | |
336 | ||
337 | cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt); | |
338 | if (!cmd->req.sg) | |
339 | return NVME_SC_INTERNAL; | |
340 | cmd->cur_sg = cmd->req.sg; | |
341 | ||
342 | if (nvmet_tcp_has_data_in(cmd)) { | |
343 | cmd->iov = kmalloc_array(cmd->req.sg_cnt, | |
344 | sizeof(*cmd->iov), GFP_KERNEL); | |
345 | if (!cmd->iov) | |
346 | goto err; | |
347 | } | |
348 | ||
349 | return 0; | |
350 | err: | |
351 | sgl_free(cmd->req.sg); | |
352 | return NVME_SC_INTERNAL; | |
353 | } | |
354 | ||
355 | static void nvmet_tcp_ddgst(struct ahash_request *hash, | |
356 | struct nvmet_tcp_cmd *cmd) | |
357 | { | |
358 | ahash_request_set_crypt(hash, cmd->req.sg, | |
359 | (void *)&cmd->exp_ddgst, cmd->req.transfer_len); | |
360 | crypto_ahash_digest(hash); | |
361 | } | |
362 | ||
363 | static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) | |
364 | { | |
365 | struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; | |
366 | struct nvmet_tcp_queue *queue = cmd->queue; | |
367 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
368 | u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue); | |
369 | ||
370 | cmd->offset = 0; | |
371 | cmd->state = NVMET_TCP_SEND_DATA_PDU; | |
372 | ||
373 | pdu->hdr.type = nvme_tcp_c2h_data; | |
70583295 SG |
374 | pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ? |
375 | NVME_TCP_F_DATA_SUCCESS : 0); | |
872d26a3 SG |
376 | pdu->hdr.hlen = sizeof(*pdu); |
377 | pdu->hdr.pdo = pdu->hdr.hlen + hdgst; | |
378 | pdu->hdr.plen = | |
379 | cpu_to_le32(pdu->hdr.hlen + hdgst + | |
380 | cmd->req.transfer_len + ddgst); | |
fc6c9730 | 381 | pdu->command_id = cmd->req.cqe->command_id; |
872d26a3 SG |
382 | pdu->data_length = cpu_to_le32(cmd->req.transfer_len); |
383 | pdu->data_offset = cpu_to_le32(cmd->wbytes_done); | |
384 | ||
385 | if (queue->data_digest) { | |
386 | pdu->hdr.flags |= NVME_TCP_F_DDGST; | |
387 | nvmet_tcp_ddgst(queue->snd_hash, cmd); | |
388 | } | |
389 | ||
390 | if (cmd->queue->hdr_digest) { | |
391 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
392 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
393 | } | |
394 | } | |
395 | ||
396 | static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) | |
397 | { | |
398 | struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; | |
399 | struct nvmet_tcp_queue *queue = cmd->queue; | |
400 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
401 | ||
402 | cmd->offset = 0; | |
403 | cmd->state = NVMET_TCP_SEND_R2T; | |
404 | ||
405 | pdu->hdr.type = nvme_tcp_r2t; | |
406 | pdu->hdr.flags = 0; | |
407 | pdu->hdr.hlen = sizeof(*pdu); | |
408 | pdu->hdr.pdo = 0; | |
409 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
410 | ||
411 | pdu->command_id = cmd->req.cmd->common.command_id; | |
412 | pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd); | |
413 | pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done); | |
414 | pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); | |
415 | if (cmd->queue->hdr_digest) { | |
416 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
417 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
418 | } | |
419 | } | |
420 | ||
421 | static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) | |
422 | { | |
423 | struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; | |
424 | struct nvmet_tcp_queue *queue = cmd->queue; | |
425 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
426 | ||
427 | cmd->offset = 0; | |
428 | cmd->state = NVMET_TCP_SEND_RESPONSE; | |
429 | ||
430 | pdu->hdr.type = nvme_tcp_rsp; | |
431 | pdu->hdr.flags = 0; | |
432 | pdu->hdr.hlen = sizeof(*pdu); | |
433 | pdu->hdr.pdo = 0; | |
434 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
435 | if (cmd->queue->hdr_digest) { | |
436 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
437 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
438 | } | |
439 | } | |
440 | ||
441 | static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) | |
442 | { | |
443 | struct llist_node *node; | |
444 | ||
445 | node = llist_del_all(&queue->resp_list); | |
446 | if (!node) | |
447 | return; | |
448 | ||
449 | while (node) { | |
450 | struct nvmet_tcp_cmd *cmd = llist_entry(node, | |
451 | struct nvmet_tcp_cmd, lentry); | |
452 | ||
453 | list_add(&cmd->entry, &queue->resp_send_list); | |
454 | node = node->next; | |
455 | queue->send_list_len++; | |
456 | } | |
457 | } | |
458 | ||
459 | static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue) | |
460 | { | |
461 | queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list, | |
462 | struct nvmet_tcp_cmd, entry); | |
463 | if (!queue->snd_cmd) { | |
464 | nvmet_tcp_process_resp_list(queue); | |
465 | queue->snd_cmd = | |
466 | list_first_entry_or_null(&queue->resp_send_list, | |
467 | struct nvmet_tcp_cmd, entry); | |
468 | if (unlikely(!queue->snd_cmd)) | |
469 | return NULL; | |
470 | } | |
471 | ||
472 | list_del_init(&queue->snd_cmd->entry); | |
473 | queue->send_list_len--; | |
474 | ||
475 | if (nvmet_tcp_need_data_out(queue->snd_cmd)) | |
476 | nvmet_setup_c2h_data_pdu(queue->snd_cmd); | |
477 | else if (nvmet_tcp_need_data_in(queue->snd_cmd)) | |
478 | nvmet_setup_r2t_pdu(queue->snd_cmd); | |
479 | else | |
480 | nvmet_setup_response_pdu(queue->snd_cmd); | |
481 | ||
482 | return queue->snd_cmd; | |
483 | } | |
484 | ||
485 | static void nvmet_tcp_queue_response(struct nvmet_req *req) | |
486 | { | |
487 | struct nvmet_tcp_cmd *cmd = | |
488 | container_of(req, struct nvmet_tcp_cmd, req); | |
489 | struct nvmet_tcp_queue *queue = cmd->queue; | |
490 | ||
491 | llist_add(&cmd->lentry, &queue->resp_list); | |
492 | queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work); | |
493 | } | |
494 | ||
495 | static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) | |
496 | { | |
497 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
498 | int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst; | |
499 | int ret; | |
500 | ||
501 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), | |
502 | offset_in_page(cmd->data_pdu) + cmd->offset, | |
503 | left, MSG_DONTWAIT | MSG_MORE); | |
504 | if (ret <= 0) | |
505 | return ret; | |
506 | ||
507 | cmd->offset += ret; | |
508 | left -= ret; | |
509 | ||
510 | if (left) | |
511 | return -EAGAIN; | |
512 | ||
513 | cmd->state = NVMET_TCP_SEND_DATA; | |
514 | cmd->offset = 0; | |
515 | return 1; | |
516 | } | |
517 | ||
518 | static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) | |
519 | { | |
520 | struct nvmet_tcp_queue *queue = cmd->queue; | |
521 | int ret; | |
522 | ||
523 | while (cmd->cur_sg) { | |
524 | struct page *page = sg_page(cmd->cur_sg); | |
525 | u32 left = cmd->cur_sg->length - cmd->offset; | |
526 | ||
527 | ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, | |
528 | left, MSG_DONTWAIT | MSG_MORE); | |
529 | if (ret <= 0) | |
530 | return ret; | |
531 | ||
532 | cmd->offset += ret; | |
533 | cmd->wbytes_done += ret; | |
534 | ||
535 | /* Done with sg?*/ | |
536 | if (cmd->offset == cmd->cur_sg->length) { | |
537 | cmd->cur_sg = sg_next(cmd->cur_sg); | |
538 | cmd->offset = 0; | |
539 | } | |
540 | } | |
541 | ||
542 | if (queue->data_digest) { | |
543 | cmd->state = NVMET_TCP_SEND_DDGST; | |
544 | cmd->offset = 0; | |
545 | } else { | |
70583295 SG |
546 | if (queue->nvme_sq.sqhd_disabled) { |
547 | cmd->queue->snd_cmd = NULL; | |
548 | nvmet_tcp_put_cmd(cmd); | |
549 | } else { | |
550 | nvmet_setup_response_pdu(cmd); | |
551 | } | |
872d26a3 | 552 | } |
70583295 SG |
553 | |
554 | if (queue->nvme_sq.sqhd_disabled) { | |
555 | kfree(cmd->iov); | |
556 | sgl_free(cmd->req.sg); | |
557 | } | |
558 | ||
872d26a3 SG |
559 | return 1; |
560 | ||
561 | } | |
562 | ||
563 | static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, | |
564 | bool last_in_batch) | |
565 | { | |
566 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
567 | int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst; | |
568 | int flags = MSG_DONTWAIT; | |
569 | int ret; | |
570 | ||
571 | if (!last_in_batch && cmd->queue->send_list_len) | |
572 | flags |= MSG_MORE; | |
573 | else | |
574 | flags |= MSG_EOR; | |
575 | ||
576 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu), | |
577 | offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags); | |
578 | if (ret <= 0) | |
579 | return ret; | |
580 | cmd->offset += ret; | |
581 | left -= ret; | |
582 | ||
583 | if (left) | |
584 | return -EAGAIN; | |
585 | ||
586 | kfree(cmd->iov); | |
587 | sgl_free(cmd->req.sg); | |
588 | cmd->queue->snd_cmd = NULL; | |
589 | nvmet_tcp_put_cmd(cmd); | |
590 | return 1; | |
591 | } | |
592 | ||
593 | static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) | |
594 | { | |
595 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
596 | int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst; | |
597 | int flags = MSG_DONTWAIT; | |
598 | int ret; | |
599 | ||
600 | if (!last_in_batch && cmd->queue->send_list_len) | |
601 | flags |= MSG_MORE; | |
602 | else | |
603 | flags |= MSG_EOR; | |
604 | ||
605 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu), | |
606 | offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags); | |
607 | if (ret <= 0) | |
608 | return ret; | |
609 | cmd->offset += ret; | |
610 | left -= ret; | |
611 | ||
612 | if (left) | |
613 | return -EAGAIN; | |
614 | ||
615 | cmd->queue->snd_cmd = NULL; | |
616 | return 1; | |
617 | } | |
618 | ||
619 | static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) | |
620 | { | |
621 | struct nvmet_tcp_queue *queue = cmd->queue; | |
622 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
623 | struct kvec iov = { | |
624 | .iov_base = &cmd->exp_ddgst + cmd->offset, | |
625 | .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset | |
626 | }; | |
627 | int ret; | |
628 | ||
629 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
630 | if (unlikely(ret <= 0)) | |
631 | return ret; | |
632 | ||
633 | cmd->offset += ret; | |
70583295 SG |
634 | |
635 | if (queue->nvme_sq.sqhd_disabled) { | |
636 | cmd->queue->snd_cmd = NULL; | |
637 | nvmet_tcp_put_cmd(cmd); | |
638 | } else { | |
639 | nvmet_setup_response_pdu(cmd); | |
640 | } | |
872d26a3 SG |
641 | return 1; |
642 | } | |
643 | ||
644 | static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, | |
645 | bool last_in_batch) | |
646 | { | |
647 | struct nvmet_tcp_cmd *cmd = queue->snd_cmd; | |
648 | int ret = 0; | |
649 | ||
650 | if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) { | |
651 | cmd = nvmet_tcp_fetch_cmd(queue); | |
652 | if (unlikely(!cmd)) | |
653 | return 0; | |
654 | } | |
655 | ||
656 | if (cmd->state == NVMET_TCP_SEND_DATA_PDU) { | |
657 | ret = nvmet_try_send_data_pdu(cmd); | |
658 | if (ret <= 0) | |
659 | goto done_send; | |
660 | } | |
661 | ||
662 | if (cmd->state == NVMET_TCP_SEND_DATA) { | |
663 | ret = nvmet_try_send_data(cmd); | |
664 | if (ret <= 0) | |
665 | goto done_send; | |
666 | } | |
667 | ||
668 | if (cmd->state == NVMET_TCP_SEND_DDGST) { | |
669 | ret = nvmet_try_send_ddgst(cmd); | |
670 | if (ret <= 0) | |
671 | goto done_send; | |
672 | } | |
673 | ||
674 | if (cmd->state == NVMET_TCP_SEND_R2T) { | |
675 | ret = nvmet_try_send_r2t(cmd, last_in_batch); | |
676 | if (ret <= 0) | |
677 | goto done_send; | |
678 | } | |
679 | ||
680 | if (cmd->state == NVMET_TCP_SEND_RESPONSE) | |
681 | ret = nvmet_try_send_response(cmd, last_in_batch); | |
682 | ||
683 | done_send: | |
684 | if (ret < 0) { | |
685 | if (ret == -EAGAIN) | |
686 | return 0; | |
687 | return ret; | |
688 | } | |
689 | ||
690 | return 1; | |
691 | } | |
692 | ||
693 | static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, | |
694 | int budget, int *sends) | |
695 | { | |
696 | int i, ret = 0; | |
697 | ||
698 | for (i = 0; i < budget; i++) { | |
699 | ret = nvmet_tcp_try_send_one(queue, i == budget - 1); | |
700 | if (ret <= 0) | |
701 | break; | |
702 | (*sends)++; | |
703 | } | |
704 | ||
705 | return ret; | |
706 | } | |
707 | ||
708 | static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) | |
709 | { | |
710 | queue->offset = 0; | |
711 | queue->left = sizeof(struct nvme_tcp_hdr); | |
712 | queue->cmd = NULL; | |
713 | queue->rcv_state = NVMET_TCP_RECV_PDU; | |
714 | } | |
715 | ||
716 | static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) | |
717 | { | |
718 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); | |
719 | ||
720 | ahash_request_free(queue->rcv_hash); | |
721 | ahash_request_free(queue->snd_hash); | |
722 | crypto_free_ahash(tfm); | |
723 | } | |
724 | ||
725 | static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) | |
726 | { | |
727 | struct crypto_ahash *tfm; | |
728 | ||
729 | tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); | |
730 | if (IS_ERR(tfm)) | |
731 | return PTR_ERR(tfm); | |
732 | ||
733 | queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
734 | if (!queue->snd_hash) | |
735 | goto free_tfm; | |
736 | ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); | |
737 | ||
738 | queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
739 | if (!queue->rcv_hash) | |
740 | goto free_snd_hash; | |
741 | ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); | |
742 | ||
743 | return 0; | |
744 | free_snd_hash: | |
745 | ahash_request_free(queue->snd_hash); | |
746 | free_tfm: | |
747 | crypto_free_ahash(tfm); | |
748 | return -ENOMEM; | |
749 | } | |
750 | ||
751 | ||
752 | static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) | |
753 | { | |
754 | struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; | |
755 | struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp; | |
756 | struct msghdr msg = {}; | |
757 | struct kvec iov; | |
758 | int ret; | |
759 | ||
760 | if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) { | |
761 | pr_err("bad nvme-tcp pdu length (%d)\n", | |
762 | le32_to_cpu(icreq->hdr.plen)); | |
763 | nvmet_tcp_fatal_error(queue); | |
764 | } | |
765 | ||
766 | if (icreq->pfv != NVME_TCP_PFV_1_0) { | |
767 | pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv); | |
768 | return -EPROTO; | |
769 | } | |
770 | ||
771 | if (icreq->hpda != 0) { | |
772 | pr_err("queue %d: unsupported hpda %d\n", queue->idx, | |
773 | icreq->hpda); | |
774 | return -EPROTO; | |
775 | } | |
776 | ||
872d26a3 SG |
777 | queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); |
778 | queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); | |
779 | if (queue->hdr_digest || queue->data_digest) { | |
780 | ret = nvmet_tcp_alloc_crypto(queue); | |
781 | if (ret) | |
782 | return ret; | |
783 | } | |
784 | ||
785 | memset(icresp, 0, sizeof(*icresp)); | |
786 | icresp->hdr.type = nvme_tcp_icresp; | |
787 | icresp->hdr.hlen = sizeof(*icresp); | |
788 | icresp->hdr.pdo = 0; | |
789 | icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); | |
790 | icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); | |
f4d10b5c | 791 | icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ |
872d26a3 SG |
792 | icresp->cpda = 0; |
793 | if (queue->hdr_digest) | |
794 | icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; | |
795 | if (queue->data_digest) | |
796 | icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE; | |
797 | ||
798 | iov.iov_base = icresp; | |
799 | iov.iov_len = sizeof(*icresp); | |
800 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
801 | if (ret < 0) | |
802 | goto free_crypto; | |
803 | ||
804 | queue->state = NVMET_TCP_Q_LIVE; | |
805 | nvmet_prepare_receive_pdu(queue); | |
806 | return 0; | |
807 | free_crypto: | |
808 | if (queue->hdr_digest || queue->data_digest) | |
809 | nvmet_tcp_free_crypto(queue); | |
810 | return ret; | |
811 | } | |
812 | ||
813 | static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, | |
814 | struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) | |
815 | { | |
816 | int ret; | |
817 | ||
818 | /* recover the expected data transfer length */ | |
819 | req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); | |
820 | ||
821 | if (!nvme_is_write(cmd->req.cmd) || | |
822 | req->data_len > cmd->req.port->inline_data_size) { | |
823 | nvmet_prepare_receive_pdu(queue); | |
824 | return; | |
825 | } | |
826 | ||
827 | ret = nvmet_tcp_map_data(cmd); | |
828 | if (unlikely(ret)) { | |
829 | pr_err("queue %d: failed to map data\n", queue->idx); | |
830 | nvmet_tcp_fatal_error(queue); | |
831 | return; | |
832 | } | |
833 | ||
834 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
835 | nvmet_tcp_map_pdu_iovec(cmd); | |
836 | cmd->flags |= NVMET_TCP_F_INIT_FAILED; | |
837 | } | |
838 | ||
839 | static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) | |
840 | { | |
841 | struct nvme_tcp_data_pdu *data = &queue->pdu.data; | |
842 | struct nvmet_tcp_cmd *cmd; | |
843 | ||
844 | cmd = &queue->cmds[data->ttag]; | |
845 | ||
846 | if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { | |
847 | pr_err("ttag %u unexpected data offset %u (expected %u)\n", | |
848 | data->ttag, le32_to_cpu(data->data_offset), | |
849 | cmd->rbytes_done); | |
850 | /* FIXME: use path and transport errors */ | |
851 | nvmet_req_complete(&cmd->req, | |
852 | NVME_SC_INVALID_FIELD | NVME_SC_DNR); | |
853 | return -EPROTO; | |
854 | } | |
855 | ||
856 | cmd->pdu_len = le32_to_cpu(data->data_length); | |
857 | cmd->pdu_recv = 0; | |
858 | nvmet_tcp_map_pdu_iovec(cmd); | |
859 | queue->cmd = cmd; | |
860 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
861 | ||
862 | return 0; | |
863 | } | |
864 | ||
865 | static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) | |
866 | { | |
867 | struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; | |
868 | struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd; | |
869 | struct nvmet_req *req; | |
870 | int ret; | |
871 | ||
872 | if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { | |
873 | if (hdr->type != nvme_tcp_icreq) { | |
874 | pr_err("unexpected pdu type (%d) before icreq\n", | |
875 | hdr->type); | |
876 | nvmet_tcp_fatal_error(queue); | |
877 | return -EPROTO; | |
878 | } | |
879 | return nvmet_tcp_handle_icreq(queue); | |
880 | } | |
881 | ||
882 | if (hdr->type == nvme_tcp_h2c_data) { | |
883 | ret = nvmet_tcp_handle_h2c_data_pdu(queue); | |
884 | if (unlikely(ret)) | |
885 | return ret; | |
886 | return 0; | |
887 | } | |
888 | ||
889 | queue->cmd = nvmet_tcp_get_cmd(queue); | |
890 | if (unlikely(!queue->cmd)) { | |
891 | /* This should never happen */ | |
892 | pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d", | |
893 | queue->idx, queue->nr_cmds, queue->send_list_len, | |
894 | nvme_cmd->common.opcode); | |
895 | nvmet_tcp_fatal_error(queue); | |
896 | return -ENOMEM; | |
897 | } | |
898 | ||
899 | req = &queue->cmd->req; | |
900 | memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); | |
901 | ||
902 | if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, | |
903 | &queue->nvme_sq, &nvmet_tcp_ops))) { | |
904 | pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", | |
905 | req->cmd, req->cmd->common.command_id, | |
906 | req->cmd->common.opcode, | |
907 | le32_to_cpu(req->cmd->common.dptr.sgl.length)); | |
908 | ||
909 | nvmet_tcp_handle_req_failure(queue, queue->cmd, req); | |
910 | return -EAGAIN; | |
911 | } | |
912 | ||
913 | ret = nvmet_tcp_map_data(queue->cmd); | |
914 | if (unlikely(ret)) { | |
915 | pr_err("queue %d: failed to map data\n", queue->idx); | |
916 | if (nvmet_tcp_has_inline_data(queue->cmd)) | |
917 | nvmet_tcp_fatal_error(queue); | |
918 | else | |
919 | nvmet_req_complete(req, ret); | |
920 | ret = -EAGAIN; | |
921 | goto out; | |
922 | } | |
923 | ||
924 | if (nvmet_tcp_need_data_in(queue->cmd)) { | |
925 | if (nvmet_tcp_has_inline_data(queue->cmd)) { | |
926 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
927 | nvmet_tcp_map_pdu_iovec(queue->cmd); | |
928 | return 0; | |
929 | } | |
930 | /* send back R2T */ | |
931 | nvmet_tcp_queue_response(&queue->cmd->req); | |
932 | goto out; | |
933 | } | |
934 | ||
935 | nvmet_req_execute(&queue->cmd->req); | |
936 | out: | |
937 | nvmet_prepare_receive_pdu(queue); | |
938 | return ret; | |
939 | } | |
940 | ||
941 | static const u8 nvme_tcp_pdu_sizes[] = { | |
942 | [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu), | |
943 | [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu), | |
944 | [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu), | |
945 | }; | |
946 | ||
947 | static inline u8 nvmet_tcp_pdu_size(u8 type) | |
948 | { | |
949 | size_t idx = type; | |
950 | ||
951 | return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) && | |
952 | nvme_tcp_pdu_sizes[idx]) ? | |
953 | nvme_tcp_pdu_sizes[idx] : 0; | |
954 | } | |
955 | ||
956 | static inline bool nvmet_tcp_pdu_valid(u8 type) | |
957 | { | |
958 | switch (type) { | |
959 | case nvme_tcp_icreq: | |
960 | case nvme_tcp_cmd: | |
961 | case nvme_tcp_h2c_data: | |
962 | /* fallthru */ | |
963 | return true; | |
964 | } | |
965 | ||
966 | return false; | |
967 | } | |
968 | ||
969 | static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) | |
970 | { | |
971 | struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; | |
972 | int len; | |
973 | struct kvec iov; | |
974 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
975 | ||
976 | recv: | |
977 | iov.iov_base = (void *)&queue->pdu + queue->offset; | |
978 | iov.iov_len = queue->left; | |
979 | len = kernel_recvmsg(queue->sock, &msg, &iov, 1, | |
980 | iov.iov_len, msg.msg_flags); | |
981 | if (unlikely(len < 0)) | |
982 | return len; | |
983 | ||
984 | queue->offset += len; | |
985 | queue->left -= len; | |
986 | if (queue->left) | |
987 | return -EAGAIN; | |
988 | ||
989 | if (queue->offset == sizeof(struct nvme_tcp_hdr)) { | |
990 | u8 hdgst = nvmet_tcp_hdgst_len(queue); | |
991 | ||
992 | if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) { | |
993 | pr_err("unexpected pdu type %d\n", hdr->type); | |
994 | nvmet_tcp_fatal_error(queue); | |
995 | return -EIO; | |
996 | } | |
997 | ||
998 | if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) { | |
999 | pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen); | |
1000 | return -EIO; | |
1001 | } | |
1002 | ||
1003 | queue->left = hdr->hlen - queue->offset + hdgst; | |
1004 | goto recv; | |
1005 | } | |
1006 | ||
1007 | if (queue->hdr_digest && | |
1008 | nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { | |
1009 | nvmet_tcp_fatal_error(queue); /* fatal */ | |
1010 | return -EPROTO; | |
1011 | } | |
1012 | ||
1013 | if (queue->data_digest && | |
1014 | nvmet_tcp_check_ddgst(queue, &queue->pdu)) { | |
1015 | nvmet_tcp_fatal_error(queue); /* fatal */ | |
1016 | return -EPROTO; | |
1017 | } | |
1018 | ||
1019 | return nvmet_tcp_done_recv_pdu(queue); | |
1020 | } | |
1021 | ||
1022 | static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) | |
1023 | { | |
1024 | struct nvmet_tcp_queue *queue = cmd->queue; | |
1025 | ||
1026 | nvmet_tcp_ddgst(queue->rcv_hash, cmd); | |
1027 | queue->offset = 0; | |
1028 | queue->left = NVME_TCP_DIGEST_LENGTH; | |
1029 | queue->rcv_state = NVMET_TCP_RECV_DDGST; | |
1030 | } | |
1031 | ||
1032 | static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) | |
1033 | { | |
1034 | struct nvmet_tcp_cmd *cmd = queue->cmd; | |
1035 | int ret; | |
1036 | ||
1037 | while (msg_data_left(&cmd->recv_msg)) { | |
1038 | ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, | |
1039 | cmd->recv_msg.msg_flags); | |
1040 | if (ret <= 0) | |
1041 | return ret; | |
1042 | ||
1043 | cmd->pdu_recv += ret; | |
1044 | cmd->rbytes_done += ret; | |
1045 | } | |
1046 | ||
1047 | nvmet_tcp_unmap_pdu_iovec(cmd); | |
1048 | ||
1049 | if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && | |
1050 | cmd->rbytes_done == cmd->req.transfer_len) { | |
1051 | if (queue->data_digest) { | |
1052 | nvmet_tcp_prep_recv_ddgst(cmd); | |
1053 | return 0; | |
1054 | } | |
1055 | nvmet_req_execute(&cmd->req); | |
1056 | } | |
1057 | ||
1058 | nvmet_prepare_receive_pdu(queue); | |
1059 | return 0; | |
1060 | } | |
1061 | ||
1062 | static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) | |
1063 | { | |
1064 | struct nvmet_tcp_cmd *cmd = queue->cmd; | |
1065 | int ret; | |
1066 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
1067 | struct kvec iov = { | |
1068 | .iov_base = (void *)&cmd->recv_ddgst + queue->offset, | |
1069 | .iov_len = queue->left | |
1070 | }; | |
1071 | ||
1072 | ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, | |
1073 | iov.iov_len, msg.msg_flags); | |
1074 | if (unlikely(ret < 0)) | |
1075 | return ret; | |
1076 | ||
1077 | queue->offset += ret; | |
1078 | queue->left -= ret; | |
1079 | if (queue->left) | |
1080 | return -EAGAIN; | |
1081 | ||
1082 | if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) { | |
1083 | pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n", | |
1084 | queue->idx, cmd->req.cmd->common.command_id, | |
1085 | queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), | |
1086 | le32_to_cpu(cmd->exp_ddgst)); | |
1087 | nvmet_tcp_finish_cmd(cmd); | |
1088 | nvmet_tcp_fatal_error(queue); | |
1089 | ret = -EPROTO; | |
1090 | goto out; | |
1091 | } | |
1092 | ||
1093 | if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && | |
1094 | cmd->rbytes_done == cmd->req.transfer_len) | |
1095 | nvmet_req_execute(&cmd->req); | |
1096 | ret = 0; | |
1097 | out: | |
1098 | nvmet_prepare_receive_pdu(queue); | |
1099 | return ret; | |
1100 | } | |
1101 | ||
1102 | static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue) | |
1103 | { | |
fb865858 | 1104 | int result = 0; |
872d26a3 SG |
1105 | |
1106 | if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR)) | |
1107 | return 0; | |
1108 | ||
1109 | if (queue->rcv_state == NVMET_TCP_RECV_PDU) { | |
1110 | result = nvmet_tcp_try_recv_pdu(queue); | |
1111 | if (result != 0) | |
1112 | goto done_recv; | |
1113 | } | |
1114 | ||
1115 | if (queue->rcv_state == NVMET_TCP_RECV_DATA) { | |
1116 | result = nvmet_tcp_try_recv_data(queue); | |
1117 | if (result != 0) | |
1118 | goto done_recv; | |
1119 | } | |
1120 | ||
1121 | if (queue->rcv_state == NVMET_TCP_RECV_DDGST) { | |
1122 | result = nvmet_tcp_try_recv_ddgst(queue); | |
1123 | if (result != 0) | |
1124 | goto done_recv; | |
1125 | } | |
1126 | ||
1127 | done_recv: | |
1128 | if (result < 0) { | |
1129 | if (result == -EAGAIN) | |
1130 | return 0; | |
1131 | return result; | |
1132 | } | |
1133 | return 1; | |
1134 | } | |
1135 | ||
1136 | static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, | |
1137 | int budget, int *recvs) | |
1138 | { | |
1139 | int i, ret = 0; | |
1140 | ||
1141 | for (i = 0; i < budget; i++) { | |
1142 | ret = nvmet_tcp_try_recv_one(queue); | |
1143 | if (ret <= 0) | |
1144 | break; | |
1145 | (*recvs)++; | |
1146 | } | |
1147 | ||
1148 | return ret; | |
1149 | } | |
1150 | ||
1151 | static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) | |
1152 | { | |
1153 | spin_lock(&queue->state_lock); | |
1154 | if (queue->state != NVMET_TCP_Q_DISCONNECTING) { | |
1155 | queue->state = NVMET_TCP_Q_DISCONNECTING; | |
1156 | schedule_work(&queue->release_work); | |
1157 | } | |
1158 | spin_unlock(&queue->state_lock); | |
1159 | } | |
1160 | ||
1161 | static void nvmet_tcp_io_work(struct work_struct *w) | |
1162 | { | |
1163 | struct nvmet_tcp_queue *queue = | |
1164 | container_of(w, struct nvmet_tcp_queue, io_work); | |
1165 | bool pending; | |
1166 | int ret, ops = 0; | |
1167 | ||
1168 | do { | |
1169 | pending = false; | |
1170 | ||
1171 | ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); | |
1172 | if (ret > 0) { | |
1173 | pending = true; | |
1174 | } else if (ret < 0) { | |
1175 | if (ret == -EPIPE || ret == -ECONNRESET) | |
1176 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1177 | else | |
1178 | nvmet_tcp_fatal_error(queue); | |
1179 | return; | |
1180 | } | |
1181 | ||
1182 | ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); | |
1183 | if (ret > 0) { | |
1184 | /* transmitted message/data */ | |
1185 | pending = true; | |
1186 | } else if (ret < 0) { | |
1187 | if (ret == -EPIPE || ret == -ECONNRESET) | |
1188 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1189 | else | |
1190 | nvmet_tcp_fatal_error(queue); | |
1191 | return; | |
1192 | } | |
1193 | ||
1194 | } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); | |
1195 | ||
1196 | /* | |
1197 | * We exahusted our budget, requeue our selves | |
1198 | */ | |
1199 | if (pending) | |
1200 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1201 | } | |
1202 | ||
1203 | static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, | |
1204 | struct nvmet_tcp_cmd *c) | |
1205 | { | |
1206 | u8 hdgst = nvmet_tcp_hdgst_len(queue); | |
1207 | ||
1208 | c->queue = queue; | |
1209 | c->req.port = queue->port->nport; | |
1210 | ||
1211 | c->cmd_pdu = page_frag_alloc(&queue->pf_cache, | |
1212 | sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1213 | if (!c->cmd_pdu) | |
1214 | return -ENOMEM; | |
1215 | c->req.cmd = &c->cmd_pdu->cmd; | |
1216 | ||
1217 | c->rsp_pdu = page_frag_alloc(&queue->pf_cache, | |
1218 | sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1219 | if (!c->rsp_pdu) | |
1220 | goto out_free_cmd; | |
fc6c9730 | 1221 | c->req.cqe = &c->rsp_pdu->cqe; |
872d26a3 SG |
1222 | |
1223 | c->data_pdu = page_frag_alloc(&queue->pf_cache, | |
1224 | sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1225 | if (!c->data_pdu) | |
1226 | goto out_free_rsp; | |
1227 | ||
1228 | c->r2t_pdu = page_frag_alloc(&queue->pf_cache, | |
1229 | sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1230 | if (!c->r2t_pdu) | |
1231 | goto out_free_data; | |
1232 | ||
1233 | c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | |
1234 | ||
1235 | list_add_tail(&c->entry, &queue->free_list); | |
1236 | ||
1237 | return 0; | |
1238 | out_free_data: | |
1239 | page_frag_free(c->data_pdu); | |
1240 | out_free_rsp: | |
1241 | page_frag_free(c->rsp_pdu); | |
1242 | out_free_cmd: | |
1243 | page_frag_free(c->cmd_pdu); | |
1244 | return -ENOMEM; | |
1245 | } | |
1246 | ||
1247 | static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c) | |
1248 | { | |
1249 | page_frag_free(c->r2t_pdu); | |
1250 | page_frag_free(c->data_pdu); | |
1251 | page_frag_free(c->rsp_pdu); | |
1252 | page_frag_free(c->cmd_pdu); | |
1253 | } | |
1254 | ||
1255 | static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) | |
1256 | { | |
1257 | struct nvmet_tcp_cmd *cmds; | |
1258 | int i, ret = -EINVAL, nr_cmds = queue->nr_cmds; | |
1259 | ||
1260 | cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); | |
1261 | if (!cmds) | |
1262 | goto out; | |
1263 | ||
1264 | for (i = 0; i < nr_cmds; i++) { | |
1265 | ret = nvmet_tcp_alloc_cmd(queue, cmds + i); | |
1266 | if (ret) | |
1267 | goto out_free; | |
1268 | } | |
1269 | ||
1270 | queue->cmds = cmds; | |
1271 | ||
1272 | return 0; | |
1273 | out_free: | |
1274 | while (--i >= 0) | |
1275 | nvmet_tcp_free_cmd(cmds + i); | |
1276 | kfree(cmds); | |
1277 | out: | |
1278 | return ret; | |
1279 | } | |
1280 | ||
1281 | static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue) | |
1282 | { | |
1283 | struct nvmet_tcp_cmd *cmds = queue->cmds; | |
1284 | int i; | |
1285 | ||
1286 | for (i = 0; i < queue->nr_cmds; i++) | |
1287 | nvmet_tcp_free_cmd(cmds + i); | |
1288 | ||
1289 | nvmet_tcp_free_cmd(&queue->connect); | |
1290 | kfree(cmds); | |
1291 | } | |
1292 | ||
1293 | static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) | |
1294 | { | |
1295 | struct socket *sock = queue->sock; | |
1296 | ||
1297 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1298 | sock->sk->sk_data_ready = queue->data_ready; | |
1299 | sock->sk->sk_state_change = queue->state_change; | |
1300 | sock->sk->sk_write_space = queue->write_space; | |
1301 | sock->sk->sk_user_data = NULL; | |
1302 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1303 | } | |
1304 | ||
1305 | static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) | |
1306 | { | |
1307 | nvmet_req_uninit(&cmd->req); | |
1308 | nvmet_tcp_unmap_pdu_iovec(cmd); | |
1309 | sgl_free(cmd->req.sg); | |
1310 | } | |
1311 | ||
1312 | static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) | |
1313 | { | |
1314 | struct nvmet_tcp_cmd *cmd = queue->cmds; | |
1315 | int i; | |
1316 | ||
1317 | for (i = 0; i < queue->nr_cmds; i++, cmd++) { | |
1318 | if (nvmet_tcp_need_data_in(cmd)) | |
1319 | nvmet_tcp_finish_cmd(cmd); | |
1320 | } | |
1321 | ||
1322 | if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { | |
1323 | /* failed in connect */ | |
1324 | nvmet_tcp_finish_cmd(&queue->connect); | |
1325 | } | |
1326 | } | |
1327 | ||
1328 | static void nvmet_tcp_release_queue_work(struct work_struct *w) | |
1329 | { | |
1330 | struct nvmet_tcp_queue *queue = | |
1331 | container_of(w, struct nvmet_tcp_queue, release_work); | |
1332 | ||
1333 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1334 | list_del_init(&queue->queue_list); | |
1335 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1336 | ||
1337 | nvmet_tcp_restore_socket_callbacks(queue); | |
1338 | flush_work(&queue->io_work); | |
1339 | ||
1340 | nvmet_tcp_uninit_data_in_cmds(queue); | |
1341 | nvmet_sq_destroy(&queue->nvme_sq); | |
1342 | cancel_work_sync(&queue->io_work); | |
1343 | sock_release(queue->sock); | |
1344 | nvmet_tcp_free_cmds(queue); | |
1345 | if (queue->hdr_digest || queue->data_digest) | |
1346 | nvmet_tcp_free_crypto(queue); | |
1347 | ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); | |
1348 | ||
1349 | kfree(queue); | |
1350 | } | |
1351 | ||
1352 | static void nvmet_tcp_data_ready(struct sock *sk) | |
1353 | { | |
1354 | struct nvmet_tcp_queue *queue; | |
1355 | ||
1356 | read_lock_bh(&sk->sk_callback_lock); | |
1357 | queue = sk->sk_user_data; | |
1358 | if (likely(queue)) | |
1359 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1360 | read_unlock_bh(&sk->sk_callback_lock); | |
1361 | } | |
1362 | ||
1363 | static void nvmet_tcp_write_space(struct sock *sk) | |
1364 | { | |
1365 | struct nvmet_tcp_queue *queue; | |
1366 | ||
1367 | read_lock_bh(&sk->sk_callback_lock); | |
1368 | queue = sk->sk_user_data; | |
1369 | if (unlikely(!queue)) | |
1370 | goto out; | |
1371 | ||
1372 | if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { | |
1373 | queue->write_space(sk); | |
1374 | goto out; | |
1375 | } | |
1376 | ||
1377 | if (sk_stream_is_writeable(sk)) { | |
1378 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
1379 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1380 | } | |
1381 | out: | |
1382 | read_unlock_bh(&sk->sk_callback_lock); | |
1383 | } | |
1384 | ||
1385 | static void nvmet_tcp_state_change(struct sock *sk) | |
1386 | { | |
1387 | struct nvmet_tcp_queue *queue; | |
1388 | ||
1389 | write_lock_bh(&sk->sk_callback_lock); | |
1390 | queue = sk->sk_user_data; | |
1391 | if (!queue) | |
1392 | goto done; | |
1393 | ||
1394 | switch (sk->sk_state) { | |
1395 | case TCP_FIN_WAIT1: | |
1396 | case TCP_CLOSE_WAIT: | |
1397 | case TCP_CLOSE: | |
1398 | /* FALLTHRU */ | |
1399 | sk->sk_user_data = NULL; | |
1400 | nvmet_tcp_schedule_release_queue(queue); | |
1401 | break; | |
1402 | default: | |
1403 | pr_warn("queue %d unhandled state %d\n", | |
1404 | queue->idx, sk->sk_state); | |
1405 | } | |
1406 | done: | |
1407 | write_unlock_bh(&sk->sk_callback_lock); | |
1408 | } | |
1409 | ||
1410 | static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) | |
1411 | { | |
1412 | struct socket *sock = queue->sock; | |
1413 | struct linger sol = { .l_onoff = 1, .l_linger = 0 }; | |
1414 | int ret; | |
1415 | ||
1416 | ret = kernel_getsockname(sock, | |
1417 | (struct sockaddr *)&queue->sockaddr); | |
1418 | if (ret < 0) | |
1419 | return ret; | |
1420 | ||
1421 | ret = kernel_getpeername(sock, | |
1422 | (struct sockaddr *)&queue->sockaddr_peer); | |
1423 | if (ret < 0) | |
1424 | return ret; | |
1425 | ||
1426 | /* | |
1427 | * Cleanup whatever is sitting in the TCP transmit queue on socket | |
1428 | * close. This is done to prevent stale data from being sent should | |
1429 | * the network connection be restored before TCP times out. | |
1430 | */ | |
1431 | ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, | |
1432 | (char *)&sol, sizeof(sol)); | |
1433 | if (ret) | |
1434 | return ret; | |
1435 | ||
1436 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1437 | sock->sk->sk_user_data = queue; | |
1438 | queue->data_ready = sock->sk->sk_data_ready; | |
1439 | sock->sk->sk_data_ready = nvmet_tcp_data_ready; | |
1440 | queue->state_change = sock->sk->sk_state_change; | |
1441 | sock->sk->sk_state_change = nvmet_tcp_state_change; | |
1442 | queue->write_space = sock->sk->sk_write_space; | |
1443 | sock->sk->sk_write_space = nvmet_tcp_write_space; | |
1444 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1445 | ||
1446 | return 0; | |
1447 | } | |
1448 | ||
1449 | static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, | |
1450 | struct socket *newsock) | |
1451 | { | |
1452 | struct nvmet_tcp_queue *queue; | |
1453 | int ret; | |
1454 | ||
1455 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); | |
1456 | if (!queue) | |
1457 | return -ENOMEM; | |
1458 | ||
1459 | INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); | |
1460 | INIT_WORK(&queue->io_work, nvmet_tcp_io_work); | |
1461 | queue->sock = newsock; | |
1462 | queue->port = port; | |
1463 | queue->nr_cmds = 0; | |
1464 | spin_lock_init(&queue->state_lock); | |
1465 | queue->state = NVMET_TCP_Q_CONNECTING; | |
1466 | INIT_LIST_HEAD(&queue->free_list); | |
1467 | init_llist_head(&queue->resp_list); | |
1468 | INIT_LIST_HEAD(&queue->resp_send_list); | |
1469 | ||
1470 | queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); | |
1471 | if (queue->idx < 0) { | |
1472 | ret = queue->idx; | |
1473 | goto out_free_queue; | |
1474 | } | |
1475 | ||
1476 | ret = nvmet_tcp_alloc_cmd(queue, &queue->connect); | |
1477 | if (ret) | |
1478 | goto out_ida_remove; | |
1479 | ||
1480 | ret = nvmet_sq_init(&queue->nvme_sq); | |
1481 | if (ret) | |
1482 | goto out_free_connect; | |
1483 | ||
1484 | port->last_cpu = cpumask_next_wrap(port->last_cpu, | |
1485 | cpu_online_mask, -1, false); | |
1486 | queue->cpu = port->last_cpu; | |
1487 | nvmet_prepare_receive_pdu(queue); | |
1488 | ||
1489 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1490 | list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); | |
1491 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1492 | ||
1493 | ret = nvmet_tcp_set_queue_sock(queue); | |
1494 | if (ret) | |
1495 | goto out_destroy_sq; | |
1496 | ||
1497 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1498 | ||
1499 | return 0; | |
1500 | out_destroy_sq: | |
1501 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1502 | list_del_init(&queue->queue_list); | |
1503 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1504 | nvmet_sq_destroy(&queue->nvme_sq); | |
1505 | out_free_connect: | |
1506 | nvmet_tcp_free_cmd(&queue->connect); | |
1507 | out_ida_remove: | |
1508 | ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); | |
1509 | out_free_queue: | |
1510 | kfree(queue); | |
1511 | return ret; | |
1512 | } | |
1513 | ||
1514 | static void nvmet_tcp_accept_work(struct work_struct *w) | |
1515 | { | |
1516 | struct nvmet_tcp_port *port = | |
1517 | container_of(w, struct nvmet_tcp_port, accept_work); | |
1518 | struct socket *newsock; | |
1519 | int ret; | |
1520 | ||
1521 | while (true) { | |
1522 | ret = kernel_accept(port->sock, &newsock, O_NONBLOCK); | |
1523 | if (ret < 0) { | |
1524 | if (ret != -EAGAIN) | |
1525 | pr_warn("failed to accept err=%d\n", ret); | |
1526 | return; | |
1527 | } | |
1528 | ret = nvmet_tcp_alloc_queue(port, newsock); | |
1529 | if (ret) { | |
1530 | pr_err("failed to allocate queue\n"); | |
1531 | sock_release(newsock); | |
1532 | } | |
1533 | } | |
1534 | } | |
1535 | ||
1536 | static void nvmet_tcp_listen_data_ready(struct sock *sk) | |
1537 | { | |
1538 | struct nvmet_tcp_port *port; | |
1539 | ||
1540 | read_lock_bh(&sk->sk_callback_lock); | |
1541 | port = sk->sk_user_data; | |
1542 | if (!port) | |
1543 | goto out; | |
1544 | ||
1545 | if (sk->sk_state == TCP_LISTEN) | |
1546 | schedule_work(&port->accept_work); | |
1547 | out: | |
1548 | read_unlock_bh(&sk->sk_callback_lock); | |
1549 | } | |
1550 | ||
1551 | static int nvmet_tcp_add_port(struct nvmet_port *nport) | |
1552 | { | |
1553 | struct nvmet_tcp_port *port; | |
1554 | __kernel_sa_family_t af; | |
1555 | int opt, ret; | |
1556 | ||
1557 | port = kzalloc(sizeof(*port), GFP_KERNEL); | |
1558 | if (!port) | |
1559 | return -ENOMEM; | |
1560 | ||
1561 | switch (nport->disc_addr.adrfam) { | |
1562 | case NVMF_ADDR_FAMILY_IP4: | |
1563 | af = AF_INET; | |
1564 | break; | |
1565 | case NVMF_ADDR_FAMILY_IP6: | |
1566 | af = AF_INET6; | |
1567 | break; | |
1568 | default: | |
1569 | pr_err("address family %d not supported\n", | |
1570 | nport->disc_addr.adrfam); | |
1571 | ret = -EINVAL; | |
1572 | goto err_port; | |
1573 | } | |
1574 | ||
1575 | ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, | |
1576 | nport->disc_addr.trsvcid, &port->addr); | |
1577 | if (ret) { | |
1578 | pr_err("malformed ip/port passed: %s:%s\n", | |
1579 | nport->disc_addr.traddr, nport->disc_addr.trsvcid); | |
1580 | goto err_port; | |
1581 | } | |
1582 | ||
1583 | port->nport = nport; | |
1584 | port->last_cpu = -1; | |
1585 | INIT_WORK(&port->accept_work, nvmet_tcp_accept_work); | |
1586 | if (port->nport->inline_data_size < 0) | |
1587 | port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE; | |
1588 | ||
1589 | ret = sock_create(port->addr.ss_family, SOCK_STREAM, | |
1590 | IPPROTO_TCP, &port->sock); | |
1591 | if (ret) { | |
1592 | pr_err("failed to create a socket\n"); | |
1593 | goto err_port; | |
1594 | } | |
1595 | ||
1596 | port->sock->sk->sk_user_data = port; | |
1597 | port->data_ready = port->sock->sk->sk_data_ready; | |
1598 | port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; | |
1599 | ||
1600 | opt = 1; | |
1601 | ret = kernel_setsockopt(port->sock, IPPROTO_TCP, | |
1602 | TCP_NODELAY, (char *)&opt, sizeof(opt)); | |
1603 | if (ret) { | |
1604 | pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); | |
1605 | goto err_sock; | |
1606 | } | |
1607 | ||
1608 | ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR, | |
1609 | (char *)&opt, sizeof(opt)); | |
1610 | if (ret) { | |
1611 | pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret); | |
1612 | goto err_sock; | |
1613 | } | |
1614 | ||
1615 | ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, | |
1616 | sizeof(port->addr)); | |
1617 | if (ret) { | |
1618 | pr_err("failed to bind port socket %d\n", ret); | |
1619 | goto err_sock; | |
1620 | } | |
1621 | ||
1622 | ret = kernel_listen(port->sock, 128); | |
1623 | if (ret) { | |
1624 | pr_err("failed to listen %d on port sock\n", ret); | |
1625 | goto err_sock; | |
1626 | } | |
1627 | ||
1628 | nport->priv = port; | |
1629 | pr_info("enabling port %d (%pISpc)\n", | |
1630 | le16_to_cpu(nport->disc_addr.portid), &port->addr); | |
1631 | ||
1632 | return 0; | |
1633 | ||
1634 | err_sock: | |
1635 | sock_release(port->sock); | |
1636 | err_port: | |
1637 | kfree(port); | |
1638 | return ret; | |
1639 | } | |
1640 | ||
1641 | static void nvmet_tcp_remove_port(struct nvmet_port *nport) | |
1642 | { | |
1643 | struct nvmet_tcp_port *port = nport->priv; | |
1644 | ||
1645 | write_lock_bh(&port->sock->sk->sk_callback_lock); | |
1646 | port->sock->sk->sk_data_ready = port->data_ready; | |
1647 | port->sock->sk->sk_user_data = NULL; | |
1648 | write_unlock_bh(&port->sock->sk->sk_callback_lock); | |
1649 | cancel_work_sync(&port->accept_work); | |
1650 | ||
1651 | sock_release(port->sock); | |
1652 | kfree(port); | |
1653 | } | |
1654 | ||
1655 | static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl) | |
1656 | { | |
1657 | struct nvmet_tcp_queue *queue; | |
1658 | ||
1659 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1660 | list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) | |
1661 | if (queue->nvme_sq.ctrl == ctrl) | |
1662 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1663 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1664 | } | |
1665 | ||
1666 | static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) | |
1667 | { | |
1668 | struct nvmet_tcp_queue *queue = | |
1669 | container_of(sq, struct nvmet_tcp_queue, nvme_sq); | |
1670 | ||
1671 | if (sq->qid == 0) { | |
1672 | /* Let inflight controller teardown complete */ | |
1673 | flush_scheduled_work(); | |
1674 | } | |
1675 | ||
1676 | queue->nr_cmds = sq->size * 2; | |
1677 | if (nvmet_tcp_alloc_cmds(queue)) | |
1678 | return NVME_SC_INTERNAL; | |
1679 | return 0; | |
1680 | } | |
1681 | ||
1682 | static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, | |
1683 | struct nvmet_port *nport, char *traddr) | |
1684 | { | |
1685 | struct nvmet_tcp_port *port = nport->priv; | |
1686 | ||
1687 | if (inet_addr_is_any((struct sockaddr *)&port->addr)) { | |
1688 | struct nvmet_tcp_cmd *cmd = | |
1689 | container_of(req, struct nvmet_tcp_cmd, req); | |
1690 | struct nvmet_tcp_queue *queue = cmd->queue; | |
1691 | ||
1692 | sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr); | |
1693 | } else { | |
1694 | memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); | |
1695 | } | |
1696 | } | |
1697 | ||
1698 | static struct nvmet_fabrics_ops nvmet_tcp_ops = { | |
1699 | .owner = THIS_MODULE, | |
1700 | .type = NVMF_TRTYPE_TCP, | |
1701 | .msdbd = 1, | |
1702 | .has_keyed_sgls = 0, | |
1703 | .add_port = nvmet_tcp_add_port, | |
1704 | .remove_port = nvmet_tcp_remove_port, | |
1705 | .queue_response = nvmet_tcp_queue_response, | |
1706 | .delete_ctrl = nvmet_tcp_delete_ctrl, | |
1707 | .install_queue = nvmet_tcp_install_queue, | |
1708 | .disc_traddr = nvmet_tcp_disc_port_addr, | |
1709 | }; | |
1710 | ||
1711 | static int __init nvmet_tcp_init(void) | |
1712 | { | |
1713 | int ret; | |
1714 | ||
1715 | nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0); | |
1716 | if (!nvmet_tcp_wq) | |
1717 | return -ENOMEM; | |
1718 | ||
1719 | ret = nvmet_register_transport(&nvmet_tcp_ops); | |
1720 | if (ret) | |
1721 | goto err; | |
1722 | ||
1723 | return 0; | |
1724 | err: | |
1725 | destroy_workqueue(nvmet_tcp_wq); | |
1726 | return ret; | |
1727 | } | |
1728 | ||
1729 | static void __exit nvmet_tcp_exit(void) | |
1730 | { | |
1731 | struct nvmet_tcp_queue *queue; | |
1732 | ||
1733 | nvmet_unregister_transport(&nvmet_tcp_ops); | |
1734 | ||
1735 | flush_scheduled_work(); | |
1736 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1737 | list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) | |
1738 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1739 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1740 | flush_scheduled_work(); | |
1741 | ||
1742 | destroy_workqueue(nvmet_tcp_wq); | |
1743 | } | |
1744 | ||
1745 | module_init(nvmet_tcp_init); | |
1746 | module_exit(nvmet_tcp_exit); | |
1747 | ||
1748 | MODULE_LICENSE("GPL v2"); | |
1749 | MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ |