Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. | |
3 | * | |
4 | * Copyright (c) 2012, Intel Corporation. | |
5 | * | |
6 | * Author: Zach Brown <zab@zabbo.net> | |
7 | * Author: Peter J. Braam <braam@clusterfs.com> | |
8 | * Author: Phil Schwan <phil@clusterfs.com> | |
9 | * Author: Eric Barton <eric@bartonsoftware.com> | |
10 | * | |
11 | * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ | |
12 | * | |
13 | * Portals is free software; you can redistribute it and/or | |
14 | * modify it under the terms of version 2 of the GNU General Public | |
15 | * License as published by the Free Software Foundation. | |
16 | * | |
17 | * Portals is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
21 | * | |
d7e09d03 PT |
22 | */ |
23 | ||
24 | #include "socklnd.h" | |
25 | ||
26 | /* | |
27 | * Protocol entries : | |
28 | * pro_send_hello : send hello message | |
29 | * pro_recv_hello : receive hello message | |
30 | * pro_pack : pack message header | |
31 | * pro_unpack : unpack message header | |
32 | * pro_queue_tx_zcack() : Called holding BH lock: kss_lock | |
33 | * return 1 if ACK is piggybacked, otherwise return 0 | |
34 | * pro_queue_tx_msg() : Called holding BH lock: kss_lock | |
35 | * return the ACK that piggybacked by my message, or NULL | |
36 | * pro_handle_zcreq() : handler of incoming ZC-REQ | |
37 | * pro_handle_zcack() : handler of incoming ZC-ACK | |
38 | * pro_match_tx() : Called holding glock | |
39 | */ | |
40 | ||
ff13fd40 JS |
41 | static struct ksock_tx * |
42 | ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg) | |
d7e09d03 PT |
43 | { |
44 | /* V1.x, just enqueue it */ | |
45 | list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); | |
46 | return NULL; | |
47 | } | |
48 | ||
49 | void | |
ff13fd40 | 50 | ksocknal_next_tx_carrier(struct ksock_conn *conn) |
d7e09d03 | 51 | { |
ff13fd40 | 52 | struct ksock_tx *tx = conn->ksnc_tx_carrier; |
d7e09d03 PT |
53 | |
54 | /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ | |
d3422d5e | 55 | LASSERT(!list_empty(&conn->ksnc_tx_queue)); |
06ace26e | 56 | LASSERT(tx); |
d7e09d03 PT |
57 | |
58 | /* Next TX that can carry ZC-ACK or LNet message */ | |
59 | if (tx->tx_list.next == &conn->ksnc_tx_queue) { | |
60 | /* no more packets queued */ | |
61 | conn->ksnc_tx_carrier = NULL; | |
62 | } else { | |
8f5b1435 | 63 | conn->ksnc_tx_carrier = list_next_entry(tx, tx_list); |
d3422d5e | 64 | LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type); |
d7e09d03 PT |
65 | } |
66 | } | |
67 | ||
68 | static int | |
ff13fd40 JS |
69 | ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn, |
70 | struct ksock_tx *tx_ack, __u64 cookie) | |
d7e09d03 | 71 | { |
ff13fd40 | 72 | struct ksock_tx *tx = conn->ksnc_tx_carrier; |
d7e09d03 | 73 | |
06ace26e | 74 | LASSERT(!tx_ack || |
c314c319 | 75 | tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); |
d7e09d03 PT |
76 | |
77 | /* | |
78 | * Enqueue or piggyback tx_ack / cookie | |
79 | * . no tx can piggyback cookie of tx_ack (or cookie), just | |
80 | * enqueue the tx_ack (if tx_ack != NUL) and return NULL. | |
81 | * . There is tx can piggyback cookie of tx_ack (or cookie), | |
82 | * piggyback the cookie and return the tx. | |
83 | */ | |
06ace26e JS |
84 | if (!tx) { |
85 | if (tx_ack) { | |
d7e09d03 | 86 | list_add_tail(&tx_ack->tx_list, |
c314c319 | 87 | &conn->ksnc_tx_queue); |
d7e09d03 PT |
88 | conn->ksnc_tx_carrier = tx_ack; |
89 | } | |
90 | return 0; | |
91 | } | |
92 | ||
93 | if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { | |
94 | /* tx is noop zc-ack, can't piggyback zc-ack cookie */ | |
06ace26e | 95 | if (tx_ack) |
d7e09d03 | 96 | list_add_tail(&tx_ack->tx_list, |
c314c319 | 97 | &conn->ksnc_tx_queue); |
d7e09d03 PT |
98 | return 0; |
99 | } | |
100 | ||
101 | LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); | |
5fd88337 | 102 | LASSERT(!tx->tx_msg.ksm_zc_cookies[1]); |
d7e09d03 | 103 | |
06ace26e | 104 | if (tx_ack) |
d7e09d03 PT |
105 | cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; |
106 | ||
107 | /* piggyback the zc-ack cookie */ | |
108 | tx->tx_msg.ksm_zc_cookies[1] = cookie; | |
109 | /* move on to the next TX which can carry cookie */ | |
110 | ksocknal_next_tx_carrier(conn); | |
111 | ||
112 | return 1; | |
113 | } | |
114 | ||
ff13fd40 JS |
115 | static struct ksock_tx * |
116 | ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg) | |
d7e09d03 | 117 | { |
ff13fd40 | 118 | struct ksock_tx *tx = conn->ksnc_tx_carrier; |
d7e09d03 PT |
119 | |
120 | /* | |
121 | * Enqueue tx_msg: | |
122 | * . If there is no NOOP on the connection, just enqueue | |
123 | * tx_msg and return NULL | |
124 | * . If there is NOOP on the connection, piggyback the cookie | |
125 | * and replace the NOOP tx, and return the NOOP tx. | |
126 | */ | |
06ace26e | 127 | if (!tx) { /* nothing on queue */ |
d7e09d03 PT |
128 | list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); |
129 | conn->ksnc_tx_carrier = tx_msg; | |
130 | return NULL; | |
131 | } | |
132 | ||
133 | if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */ | |
134 | list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue); | |
135 | return NULL; | |
136 | } | |
137 | ||
d3422d5e | 138 | LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); |
d7e09d03 PT |
139 | |
140 | /* There is a noop zc-ack can be piggybacked */ | |
141 | tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1]; | |
142 | ksocknal_next_tx_carrier(conn); | |
143 | ||
144 | /* use new_tx to replace the noop zc-ack packet */ | |
145 | list_add(&tx_msg->tx_list, &tx->tx_list); | |
146 | list_del(&tx->tx_list); | |
147 | ||
148 | return tx; | |
149 | } | |
150 | ||
151 | static int | |
ff13fd40 JS |
152 | ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn, |
153 | struct ksock_tx *tx_ack, __u64 cookie) | |
d7e09d03 | 154 | { |
ff13fd40 | 155 | struct ksock_tx *tx; |
d7e09d03 PT |
156 | |
157 | if (conn->ksnc_type != SOCKLND_CONN_ACK) | |
158 | return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie); | |
159 | ||
160 | /* non-blocking ZC-ACK (to router) */ | |
06ace26e | 161 | LASSERT(!tx_ack || |
c314c319 | 162 | tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP); |
d7e09d03 | 163 | |
4a87df3e | 164 | tx = conn->ksnc_tx_carrier; |
06ace26e JS |
165 | if (!tx) { |
166 | if (tx_ack) { | |
d7e09d03 | 167 | list_add_tail(&tx_ack->tx_list, |
c314c319 | 168 | &conn->ksnc_tx_queue); |
d7e09d03 PT |
169 | conn->ksnc_tx_carrier = tx_ack; |
170 | } | |
171 | return 0; | |
172 | } | |
173 | ||
06ace26e | 174 | /* conn->ksnc_tx_carrier */ |
d7e09d03 | 175 | |
06ace26e | 176 | if (tx_ack) |
d7e09d03 PT |
177 | cookie = tx_ack->tx_msg.ksm_zc_cookies[1]; |
178 | ||
179 | if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */ | |
180 | return 1; | |
181 | ||
182 | if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) { | |
183 | /* replace the keepalive PING with a real ACK */ | |
5fd88337 | 184 | LASSERT(!tx->tx_msg.ksm_zc_cookies[0]); |
d7e09d03 PT |
185 | tx->tx_msg.ksm_zc_cookies[1] = cookie; |
186 | return 1; | |
187 | } | |
188 | ||
189 | if (cookie == tx->tx_msg.ksm_zc_cookies[0] || | |
190 | cookie == tx->tx_msg.ksm_zc_cookies[1]) { | |
b0f5aad5 | 191 | CWARN("%s: duplicated ZC cookie: %llu\n", |
d7e09d03 PT |
192 | libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); |
193 | return 1; /* XXX return error in the future */ | |
194 | } | |
195 | ||
5fd88337 | 196 | if (!tx->tx_msg.ksm_zc_cookies[0]) { |
d7e09d03 PT |
197 | /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */ |
198 | if (tx->tx_msg.ksm_zc_cookies[1] > cookie) { | |
199 | tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1]; | |
200 | tx->tx_msg.ksm_zc_cookies[1] = cookie; | |
201 | } else { | |
202 | tx->tx_msg.ksm_zc_cookies[0] = cookie; | |
203 | } | |
204 | ||
205 | if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) { | |
206 | /* not likely to carry more ACKs, skip it to simplify logic */ | |
207 | ksocknal_next_tx_carrier(conn); | |
208 | } | |
209 | ||
210 | return 1; | |
211 | } | |
212 | ||
213 | /* takes two or more cookies already */ | |
214 | ||
215 | if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) { | |
216 | __u64 tmp = 0; | |
217 | ||
2b284326 | 218 | /* two separated cookies: (a+2, a) or (a+1, a) */ |
d3422d5e | 219 | LASSERT(tx->tx_msg.ksm_zc_cookies[0] - |
d7e09d03 PT |
220 | tx->tx_msg.ksm_zc_cookies[1] <= 2); |
221 | ||
222 | if (tx->tx_msg.ksm_zc_cookies[0] - | |
223 | tx->tx_msg.ksm_zc_cookies[1] == 2) { | |
224 | if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) | |
225 | tmp = cookie; | |
226 | } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) { | |
227 | tmp = tx->tx_msg.ksm_zc_cookies[1]; | |
228 | } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) { | |
229 | tmp = tx->tx_msg.ksm_zc_cookies[0]; | |
230 | } | |
231 | ||
5fd88337 | 232 | if (tmp) { |
d7e09d03 PT |
233 | /* range of cookies */ |
234 | tx->tx_msg.ksm_zc_cookies[0] = tmp - 1; | |
235 | tx->tx_msg.ksm_zc_cookies[1] = tmp + 1; | |
236 | return 1; | |
237 | } | |
238 | ||
239 | } else { | |
240 | /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */ | |
241 | if (cookie >= tx->tx_msg.ksm_zc_cookies[0] && | |
242 | cookie <= tx->tx_msg.ksm_zc_cookies[1]) { | |
b0f5aad5 | 243 | CWARN("%s: duplicated ZC cookie: %llu\n", |
d7e09d03 PT |
244 | libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie); |
245 | return 1; /* XXX: return error in the future */ | |
246 | } | |
247 | ||
248 | if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) { | |
249 | tx->tx_msg.ksm_zc_cookies[1] = cookie; | |
250 | return 1; | |
251 | } | |
252 | ||
253 | if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) { | |
254 | tx->tx_msg.ksm_zc_cookies[0] = cookie; | |
255 | return 1; | |
256 | } | |
257 | } | |
258 | ||
259 | /* failed to piggyback ZC-ACK */ | |
06ace26e | 260 | if (tx_ack) { |
d7e09d03 PT |
261 | list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue); |
262 | /* the next tx can piggyback at least 1 ACK */ | |
263 | ksocknal_next_tx_carrier(conn); | |
264 | } | |
265 | ||
266 | return 0; | |
267 | } | |
268 | ||
269 | static int | |
ff13fd40 | 270 | ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) |
d7e09d03 PT |
271 | { |
272 | int nob; | |
273 | ||
274 | #if SOCKNAL_VERSION_DEBUG | |
275 | if (!*ksocknal_tunables.ksnd_typed_conns) | |
276 | return SOCKNAL_MATCH_YES; | |
277 | #endif | |
278 | ||
06ace26e | 279 | if (!tx || !tx->tx_lnetmsg) { |
d7e09d03 PT |
280 | /* noop packet */ |
281 | nob = offsetof(ksock_msg_t, ksm_u); | |
282 | } else { | |
283 | nob = tx->tx_lnetmsg->msg_len + | |
284 | ((conn->ksnc_proto == &ksocknal_protocol_v1x) ? | |
285 | sizeof(lnet_hdr_t) : sizeof(ksock_msg_t)); | |
286 | } | |
287 | ||
288 | /* default checking for typed connection */ | |
289 | switch (conn->ksnc_type) { | |
290 | default: | |
291 | CERROR("ksnc_type bad: %u\n", conn->ksnc_type); | |
292 | LBUG(); | |
293 | case SOCKLND_CONN_ANY: | |
294 | return SOCKNAL_MATCH_YES; | |
295 | ||
296 | case SOCKLND_CONN_BULK_IN: | |
297 | return SOCKNAL_MATCH_MAY; | |
298 | ||
299 | case SOCKLND_CONN_BULK_OUT: | |
300 | if (nob < *ksocknal_tunables.ksnd_min_bulk) | |
301 | return SOCKNAL_MATCH_MAY; | |
302 | else | |
303 | return SOCKNAL_MATCH_YES; | |
304 | ||
305 | case SOCKLND_CONN_CONTROL: | |
306 | if (nob >= *ksocknal_tunables.ksnd_min_bulk) | |
307 | return SOCKNAL_MATCH_MAY; | |
308 | else | |
309 | return SOCKNAL_MATCH_YES; | |
310 | } | |
311 | } | |
312 | ||
313 | static int | |
ff13fd40 | 314 | ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk) |
d7e09d03 PT |
315 | { |
316 | int nob; | |
317 | ||
06ace26e | 318 | if (!tx || !tx->tx_lnetmsg) |
d7e09d03 PT |
319 | nob = offsetof(ksock_msg_t, ksm_u); |
320 | else | |
321 | nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t); | |
322 | ||
323 | switch (conn->ksnc_type) { | |
324 | default: | |
325 | CERROR("ksnc_type bad: %u\n", conn->ksnc_type); | |
326 | LBUG(); | |
327 | case SOCKLND_CONN_ANY: | |
328 | return SOCKNAL_MATCH_NO; | |
329 | ||
330 | case SOCKLND_CONN_ACK: | |
331 | if (nonblk) | |
332 | return SOCKNAL_MATCH_YES; | |
06ace26e | 333 | else if (!tx || !tx->tx_lnetmsg) |
d7e09d03 PT |
334 | return SOCKNAL_MATCH_MAY; |
335 | else | |
336 | return SOCKNAL_MATCH_NO; | |
337 | ||
338 | case SOCKLND_CONN_BULK_OUT: | |
339 | if (nonblk) | |
340 | return SOCKNAL_MATCH_NO; | |
341 | else if (nob < *ksocknal_tunables.ksnd_min_bulk) | |
342 | return SOCKNAL_MATCH_MAY; | |
343 | else | |
344 | return SOCKNAL_MATCH_YES; | |
345 | ||
346 | case SOCKLND_CONN_CONTROL: | |
347 | if (nonblk) | |
348 | return SOCKNAL_MATCH_NO; | |
349 | else if (nob >= *ksocknal_tunables.ksnd_min_bulk) | |
350 | return SOCKNAL_MATCH_MAY; | |
351 | else | |
352 | return SOCKNAL_MATCH_YES; | |
353 | } | |
354 | } | |
355 | ||
356 | /* (Sink) handle incoming ZC request from sender */ | |
357 | static int | |
ff13fd40 | 358 | ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote) |
d7e09d03 | 359 | { |
ff13fd40 JS |
360 | struct ksock_peer *peer = c->ksnc_peer; |
361 | struct ksock_conn *conn; | |
362 | struct ksock_tx *tx; | |
97d10d0a | 363 | int rc; |
d7e09d03 PT |
364 | |
365 | read_lock(&ksocknal_data.ksnd_global_lock); | |
366 | ||
367 | conn = ksocknal_find_conn_locked(peer, NULL, !!remote); | |
06ace26e | 368 | if (conn) { |
ff13fd40 | 369 | struct ksock_sched *sched = conn->ksnc_scheduler; |
d7e09d03 | 370 | |
06ace26e | 371 | LASSERT(conn->ksnc_proto->pro_queue_tx_zcack); |
d7e09d03 PT |
372 | |
373 | spin_lock_bh(&sched->kss_lock); | |
374 | ||
375 | rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie); | |
376 | ||
377 | spin_unlock_bh(&sched->kss_lock); | |
378 | ||
379 | if (rc) { /* piggybacked */ | |
380 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
381 | return 0; | |
382 | } | |
383 | } | |
384 | ||
385 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
386 | ||
387 | /* ACK connection is not ready, or can't piggyback the ACK */ | |
388 | tx = ksocknal_alloc_tx_noop(cookie, !!remote); | |
06ace26e | 389 | if (!tx) |
d7e09d03 PT |
390 | return -ENOMEM; |
391 | ||
4a87df3e | 392 | rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id); |
5fd88337 | 393 | if (!rc) |
d7e09d03 PT |
394 | return 0; |
395 | ||
396 | ksocknal_free_tx(tx); | |
397 | return rc; | |
398 | } | |
399 | ||
400 | /* (Sender) handle ZC_ACK from sink */ | |
401 | static int | |
ff13fd40 | 402 | ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2) |
d7e09d03 | 403 | { |
ff13fd40 JS |
404 | struct ksock_peer *peer = conn->ksnc_peer; |
405 | struct ksock_tx *tx; | |
406 | struct ksock_tx *temp; | |
407 | struct ksock_tx *tmp; | |
d3422d5e | 408 | LIST_HEAD(zlist); |
97d10d0a | 409 | int count; |
d7e09d03 | 410 | |
5fd88337 | 411 | if (!cookie1) |
d7e09d03 PT |
412 | cookie1 = cookie2; |
413 | ||
414 | count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1); | |
415 | ||
416 | if (cookie2 == SOCKNAL_KEEPALIVE_PING && | |
417 | conn->ksnc_proto == &ksocknal_protocol_v3x) { | |
418 | /* keepalive PING for V3.x, just ignore it */ | |
419 | return count == 1 ? 0 : -EPROTO; | |
420 | } | |
421 | ||
422 | spin_lock(&peer->ksnp_lock); | |
423 | ||
c314c319 JS |
424 | list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, |
425 | tx_zc_list) { | |
d7e09d03 PT |
426 | __u64 c = tx->tx_msg.ksm_zc_cookies[0]; |
427 | ||
428 | if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) { | |
429 | tx->tx_msg.ksm_zc_cookies[0] = 0; | |
430 | list_del(&tx->tx_zc_list); | |
431 | list_add(&tx->tx_zc_list, &zlist); | |
432 | ||
5fd88337 | 433 | if (!--count) |
d7e09d03 PT |
434 | break; |
435 | } | |
436 | } | |
437 | ||
438 | spin_unlock(&peer->ksnp_lock); | |
439 | ||
1edae04f | 440 | list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) { |
d7e09d03 PT |
441 | list_del(&tx->tx_zc_list); |
442 | ksocknal_tx_decref(tx); | |
443 | } | |
444 | ||
5fd88337 | 445 | return !count ? 0 : -EPROTO; |
d7e09d03 PT |
446 | } |
447 | ||
448 | static int | |
ff13fd40 | 449 | ksocknal_send_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello) |
d7e09d03 | 450 | { |
97d10d0a MS |
451 | struct socket *sock = conn->ksnc_sock; |
452 | lnet_hdr_t *hdr; | |
d7e09d03 | 453 | lnet_magicversion_t *hmv; |
97d10d0a MS |
454 | int rc; |
455 | int i; | |
d7e09d03 PT |
456 | |
457 | CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid)); | |
458 | ||
459 | LIBCFS_ALLOC(hdr, sizeof(*hdr)); | |
06ace26e | 460 | if (!hdr) { |
d7e09d03 PT |
461 | CERROR("Can't allocate lnet_hdr_t\n"); |
462 | return -ENOMEM; | |
463 | } | |
464 | ||
465 | hmv = (lnet_magicversion_t *)&hdr->dest_nid; | |
466 | ||
4420cfd3 JS |
467 | /* |
468 | * Re-organize V2.x message header to V1.x (lnet_hdr_t) | |
469 | * header and send out | |
470 | */ | |
b31e64c4 JS |
471 | hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC); |
472 | hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR); | |
473 | hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR); | |
d7e09d03 | 474 | |
5fd88337 | 475 | if (the_lnet.ln_testprotocompat) { |
d7e09d03 PT |
476 | /* single-shot proto check */ |
477 | LNET_LOCK(); | |
5fd88337 | 478 | if (the_lnet.ln_testprotocompat & 1) { |
d7e09d03 PT |
479 | hmv->version_major++; /* just different! */ |
480 | the_lnet.ln_testprotocompat &= ~1; | |
481 | } | |
5fd88337 | 482 | if (the_lnet.ln_testprotocompat & 2) { |
d7e09d03 PT |
483 | hmv->magic = LNET_PROTO_MAGIC; |
484 | the_lnet.ln_testprotocompat &= ~2; | |
485 | } | |
486 | LNET_UNLOCK(); | |
487 | } | |
488 | ||
b31e64c4 JS |
489 | hdr->src_nid = cpu_to_le64(hello->kshm_src_nid); |
490 | hdr->src_pid = cpu_to_le32(hello->kshm_src_pid); | |
491 | hdr->type = cpu_to_le32(LNET_MSG_HELLO); | |
492 | hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32)); | |
493 | hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype); | |
494 | hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation); | |
d7e09d03 | 495 | |
1ad6a73e | 496 | rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); |
5fd88337 | 497 | if (rc) { |
5e8f6920 PT |
498 | CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", |
499 | rc, &conn->ksnc_ipaddr, conn->ksnc_port); | |
d7e09d03 PT |
500 | goto out; |
501 | } | |
502 | ||
5fd88337 | 503 | if (!hello->kshm_nips) |
d7e09d03 PT |
504 | goto out; |
505 | ||
9797fb0e | 506 | for (i = 0; i < (int)hello->kshm_nips; i++) |
b31e64c4 | 507 | hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]); |
d7e09d03 | 508 | |
1ad6a73e JS |
509 | rc = lnet_sock_write(sock, hello->kshm_ips, |
510 | hello->kshm_nips * sizeof(__u32), | |
511 | lnet_acceptor_timeout()); | |
5fd88337 | 512 | if (rc) { |
2d00bd17 JP |
513 | CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", |
514 | rc, hello->kshm_nips, | |
5e8f6920 | 515 | &conn->ksnc_ipaddr, conn->ksnc_port); |
d7e09d03 PT |
516 | } |
517 | out: | |
518 | LIBCFS_FREE(hdr, sizeof(*hdr)); | |
519 | ||
520 | return rc; | |
521 | } | |
522 | ||
523 | static int | |
ff13fd40 | 524 | ksocknal_send_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello) |
d7e09d03 | 525 | { |
e327dc88 | 526 | struct socket *sock = conn->ksnc_sock; |
97d10d0a | 527 | int rc; |
d7e09d03 PT |
528 | |
529 | hello->kshm_magic = LNET_PROTO_MAGIC; | |
530 | hello->kshm_version = conn->ksnc_proto->pro_version; | |
531 | ||
5fd88337 | 532 | if (the_lnet.ln_testprotocompat) { |
d7e09d03 PT |
533 | /* single-shot proto check */ |
534 | LNET_LOCK(); | |
5fd88337 | 535 | if (the_lnet.ln_testprotocompat & 1) { |
d7e09d03 PT |
536 | hello->kshm_version++; /* just different! */ |
537 | the_lnet.ln_testprotocompat &= ~1; | |
538 | } | |
539 | LNET_UNLOCK(); | |
540 | } | |
541 | ||
1ad6a73e JS |
542 | rc = lnet_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips), |
543 | lnet_acceptor_timeout()); | |
5fd88337 | 544 | if (rc) { |
5e8f6920 PT |
545 | CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n", |
546 | rc, &conn->ksnc_ipaddr, conn->ksnc_port); | |
d7e09d03 PT |
547 | return rc; |
548 | } | |
549 | ||
5fd88337 | 550 | if (!hello->kshm_nips) |
d7e09d03 PT |
551 | return 0; |
552 | ||
1ad6a73e JS |
553 | rc = lnet_sock_write(sock, hello->kshm_ips, |
554 | hello->kshm_nips * sizeof(__u32), | |
555 | lnet_acceptor_timeout()); | |
5fd88337 | 556 | if (rc) { |
2d00bd17 JP |
557 | CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n", |
558 | rc, hello->kshm_nips, | |
5e8f6920 | 559 | &conn->ksnc_ipaddr, conn->ksnc_port); |
d7e09d03 PT |
560 | } |
561 | ||
562 | return rc; | |
563 | } | |
564 | ||
565 | static int | |
ff13fd40 | 566 | ksocknal_recv_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello, |
1d8cb70c | 567 | int timeout) |
d7e09d03 | 568 | { |
97d10d0a MS |
569 | struct socket *sock = conn->ksnc_sock; |
570 | lnet_hdr_t *hdr; | |
571 | int rc; | |
572 | int i; | |
d7e09d03 PT |
573 | |
574 | LIBCFS_ALLOC(hdr, sizeof(*hdr)); | |
06ace26e | 575 | if (!hdr) { |
d7e09d03 PT |
576 | CERROR("Can't allocate lnet_hdr_t\n"); |
577 | return -ENOMEM; | |
578 | } | |
579 | ||
1ad6a73e JS |
580 | rc = lnet_sock_read(sock, &hdr->src_nid, |
581 | sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid), | |
582 | timeout); | |
5fd88337 | 583 | if (rc) { |
5e8f6920 | 584 | CERROR("Error %d reading rest of HELLO hdr from %pI4h\n", |
c314c319 | 585 | rc, &conn->ksnc_ipaddr); |
d3422d5e | 586 | LASSERT(rc < 0 && rc != -EALREADY); |
d7e09d03 PT |
587 | goto out; |
588 | } | |
589 | ||
590 | /* ...and check we got what we expected */ | |
b31e64c4 | 591 | if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) { |
2d00bd17 JP |
592 | CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n", |
593 | le32_to_cpu(hdr->type), | |
594 | &conn->ksnc_ipaddr); | |
d7e09d03 PT |
595 | rc = -EPROTO; |
596 | goto out; | |
597 | } | |
598 | ||
97d10d0a MS |
599 | hello->kshm_src_nid = le64_to_cpu(hdr->src_nid); |
600 | hello->kshm_src_pid = le32_to_cpu(hdr->src_pid); | |
d3422d5e | 601 | hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation); |
97d10d0a MS |
602 | hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type); |
603 | hello->kshm_nips = le32_to_cpu(hdr->payload_length) / | |
604 | sizeof(__u32); | |
d7e09d03 PT |
605 | |
606 | if (hello->kshm_nips > LNET_MAX_INTERFACES) { | |
5e8f6920 PT |
607 | CERROR("Bad nips %d from ip %pI4h\n", |
608 | hello->kshm_nips, &conn->ksnc_ipaddr); | |
d7e09d03 PT |
609 | rc = -EPROTO; |
610 | goto out; | |
611 | } | |
612 | ||
5fd88337 | 613 | if (!hello->kshm_nips) |
d7e09d03 PT |
614 | goto out; |
615 | ||
1ad6a73e JS |
616 | rc = lnet_sock_read(sock, hello->kshm_ips, |
617 | hello->kshm_nips * sizeof(__u32), timeout); | |
5fd88337 | 618 | if (rc) { |
5e8f6920 | 619 | CERROR("Error %d reading IPs from ip %pI4h\n", |
c314c319 | 620 | rc, &conn->ksnc_ipaddr); |
5e8f6920 | 621 | LASSERT(rc < 0 && rc != -EALREADY); |
d7e09d03 PT |
622 | goto out; |
623 | } | |
624 | ||
9797fb0e | 625 | for (i = 0; i < (int)hello->kshm_nips; i++) { |
d7e09d03 PT |
626 | hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); |
627 | ||
5fd88337 | 628 | if (!hello->kshm_ips[i]) { |
5e8f6920 PT |
629 | CERROR("Zero IP[%d] from ip %pI4h\n", |
630 | i, &conn->ksnc_ipaddr); | |
d7e09d03 PT |
631 | rc = -EPROTO; |
632 | break; | |
633 | } | |
634 | } | |
635 | out: | |
636 | LIBCFS_FREE(hdr, sizeof(*hdr)); | |
637 | ||
638 | return rc; | |
639 | } | |
640 | ||
641 | static int | |
ff13fd40 | 642 | ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout) |
d7e09d03 | 643 | { |
97d10d0a MS |
644 | struct socket *sock = conn->ksnc_sock; |
645 | int rc; | |
646 | int i; | |
d7e09d03 PT |
647 | |
648 | if (hello->kshm_magic == LNET_PROTO_MAGIC) | |
649 | conn->ksnc_flip = 0; | |
650 | else | |
651 | conn->ksnc_flip = 1; | |
652 | ||
1ad6a73e JS |
653 | rc = lnet_sock_read(sock, &hello->kshm_src_nid, |
654 | offsetof(ksock_hello_msg_t, kshm_ips) - | |
655 | offsetof(ksock_hello_msg_t, kshm_src_nid), | |
656 | timeout); | |
5fd88337 | 657 | if (rc) { |
5e8f6920 | 658 | CERROR("Error %d reading HELLO from %pI4h\n", |
c314c319 | 659 | rc, &conn->ksnc_ipaddr); |
5e8f6920 | 660 | LASSERT(rc < 0 && rc != -EALREADY); |
d7e09d03 PT |
661 | return rc; |
662 | } | |
663 | ||
664 | if (conn->ksnc_flip) { | |
665 | __swab32s(&hello->kshm_src_pid); | |
666 | __swab64s(&hello->kshm_src_nid); | |
667 | __swab32s(&hello->kshm_dst_pid); | |
668 | __swab64s(&hello->kshm_dst_nid); | |
669 | __swab64s(&hello->kshm_src_incarnation); | |
670 | __swab64s(&hello->kshm_dst_incarnation); | |
671 | __swab32s(&hello->kshm_ctype); | |
672 | __swab32s(&hello->kshm_nips); | |
673 | } | |
674 | ||
675 | if (hello->kshm_nips > LNET_MAX_INTERFACES) { | |
5e8f6920 PT |
676 | CERROR("Bad nips %d from ip %pI4h\n", |
677 | hello->kshm_nips, &conn->ksnc_ipaddr); | |
d7e09d03 PT |
678 | return -EPROTO; |
679 | } | |
680 | ||
5fd88337 | 681 | if (!hello->kshm_nips) |
d7e09d03 PT |
682 | return 0; |
683 | ||
1ad6a73e JS |
684 | rc = lnet_sock_read(sock, hello->kshm_ips, |
685 | hello->kshm_nips * sizeof(__u32), timeout); | |
5fd88337 | 686 | if (rc) { |
5e8f6920 | 687 | CERROR("Error %d reading IPs from ip %pI4h\n", |
c314c319 | 688 | rc, &conn->ksnc_ipaddr); |
5e8f6920 | 689 | LASSERT(rc < 0 && rc != -EALREADY); |
d7e09d03 PT |
690 | return rc; |
691 | } | |
692 | ||
9797fb0e | 693 | for (i = 0; i < (int)hello->kshm_nips; i++) { |
d7e09d03 PT |
694 | if (conn->ksnc_flip) |
695 | __swab32s(&hello->kshm_ips[i]); | |
696 | ||
5fd88337 | 697 | if (!hello->kshm_ips[i]) { |
5e8f6920 PT |
698 | CERROR("Zero IP[%d] from ip %pI4h\n", |
699 | i, &conn->ksnc_ipaddr); | |
d7e09d03 PT |
700 | return -EPROTO; |
701 | } | |
702 | } | |
703 | ||
704 | return 0; | |
705 | } | |
706 | ||
707 | static void | |
ff13fd40 | 708 | ksocknal_pack_msg_v1(struct ksock_tx *tx) |
d7e09d03 PT |
709 | { |
710 | /* V1.x has no KSOCK_MSG_NOOP */ | |
711 | LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); | |
06ace26e | 712 | LASSERT(tx->tx_lnetmsg); |
d7e09d03 | 713 | |
f351bad2 | 714 | tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr; |
d7e09d03 PT |
715 | tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t); |
716 | ||
d3d3d37a JS |
717 | tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); |
718 | tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); | |
d7e09d03 PT |
719 | } |
720 | ||
721 | static void | |
ff13fd40 | 722 | ksocknal_pack_msg_v2(struct ksock_tx *tx) |
d7e09d03 | 723 | { |
f351bad2 | 724 | tx->tx_iov[0].iov_base = &tx->tx_msg; |
d7e09d03 | 725 | |
06ace26e | 726 | if (tx->tx_lnetmsg) { |
d7e09d03 PT |
727 | LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); |
728 | ||
729 | tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; | |
730 | tx->tx_iov[0].iov_len = sizeof(ksock_msg_t); | |
d3d3d37a JS |
731 | tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; |
732 | tx->tx_resid = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len; | |
d7e09d03 PT |
733 | } else { |
734 | LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); | |
735 | ||
736 | tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); | |
d3d3d37a JS |
737 | tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); |
738 | tx->tx_resid = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); | |
d7e09d03 PT |
739 | } |
740 | /* Don't checksum before start sending, because packet can be piggybacked with ACK */ | |
741 | } | |
742 | ||
743 | static void | |
744 | ksocknal_unpack_msg_v1(ksock_msg_t *msg) | |
745 | { | |
97d10d0a MS |
746 | msg->ksm_csum = 0; |
747 | msg->ksm_type = KSOCK_MSG_LNET; | |
d3d3d37a JS |
748 | msg->ksm_zc_cookies[0] = 0; |
749 | msg->ksm_zc_cookies[1] = 0; | |
d7e09d03 PT |
750 | } |
751 | ||
752 | static void | |
753 | ksocknal_unpack_msg_v2(ksock_msg_t *msg) | |
754 | { | |
755 | return; /* Do nothing */ | |
756 | } | |
757 | ||
ff13fd40 | 758 | struct ksock_proto ksocknal_protocol_v1x = { |
97d10d0a MS |
759 | .pro_version = KSOCK_PROTO_V1, |
760 | .pro_send_hello = ksocknal_send_hello_v1, | |
761 | .pro_recv_hello = ksocknal_recv_hello_v1, | |
762 | .pro_pack = ksocknal_pack_msg_v1, | |
763 | .pro_unpack = ksocknal_unpack_msg_v1, | |
764 | .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1, | |
765 | .pro_handle_zcreq = NULL, | |
766 | .pro_handle_zcack = NULL, | |
767 | .pro_queue_tx_zcack = NULL, | |
768 | .pro_match_tx = ksocknal_match_tx | |
d7e09d03 PT |
769 | }; |
770 | ||
ff13fd40 | 771 | struct ksock_proto ksocknal_protocol_v2x = { |
97d10d0a MS |
772 | .pro_version = KSOCK_PROTO_V2, |
773 | .pro_send_hello = ksocknal_send_hello_v2, | |
774 | .pro_recv_hello = ksocknal_recv_hello_v2, | |
775 | .pro_pack = ksocknal_pack_msg_v2, | |
776 | .pro_unpack = ksocknal_unpack_msg_v2, | |
777 | .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, | |
778 | .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2, | |
779 | .pro_handle_zcreq = ksocknal_handle_zcreq, | |
780 | .pro_handle_zcack = ksocknal_handle_zcack, | |
781 | .pro_match_tx = ksocknal_match_tx | |
d7e09d03 PT |
782 | }; |
783 | ||
ff13fd40 | 784 | struct ksock_proto ksocknal_protocol_v3x = { |
97d10d0a MS |
785 | .pro_version = KSOCK_PROTO_V3, |
786 | .pro_send_hello = ksocknal_send_hello_v2, | |
787 | .pro_recv_hello = ksocknal_recv_hello_v2, | |
788 | .pro_pack = ksocknal_pack_msg_v2, | |
789 | .pro_unpack = ksocknal_unpack_msg_v2, | |
790 | .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2, | |
791 | .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3, | |
792 | .pro_handle_zcreq = ksocknal_handle_zcreq, | |
793 | .pro_handle_zcack = ksocknal_handle_zcack, | |
794 | .pro_match_tx = ksocknal_match_tx_v3 | |
d7e09d03 | 795 | }; |