Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / staging / lustre / lnet / klnds / socklnd / socklnd_proto.c
CommitLineData
d7e09d03
PT
1/*
2 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
3 *
4 * Copyright (c) 2012, Intel Corporation.
5 *
6 * Author: Zach Brown <zab@zabbo.net>
7 * Author: Peter J. Braam <braam@clusterfs.com>
8 * Author: Phil Schwan <phil@clusterfs.com>
9 * Author: Eric Barton <eric@bartonsoftware.com>
10 *
11 * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
12 *
13 * Portals is free software; you can redistribute it and/or
14 * modify it under the terms of version 2 of the GNU General Public
15 * License as published by the Free Software Foundation.
16 *
17 * Portals is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
d7e09d03
PT
22 */
23
24#include "socklnd.h"
25
26/*
27 * Protocol entries :
28 * pro_send_hello : send hello message
29 * pro_recv_hello : receive hello message
30 * pro_pack : pack message header
31 * pro_unpack : unpack message header
32 * pro_queue_tx_zcack() : Called holding BH lock: kss_lock
33 * return 1 if ACK is piggybacked, otherwise return 0
34 * pro_queue_tx_msg() : Called holding BH lock: kss_lock
35 * return the ACK that piggybacked by my message, or NULL
36 * pro_handle_zcreq() : handler of incoming ZC-REQ
37 * pro_handle_zcack() : handler of incoming ZC-ACK
38 * pro_match_tx() : Called holding glock
39 */
40
ff13fd40
JS
41static struct ksock_tx *
42ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
d7e09d03
PT
43{
44 /* V1.x, just enqueue it */
45 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
46 return NULL;
47}
48
49void
ff13fd40 50ksocknal_next_tx_carrier(struct ksock_conn *conn)
d7e09d03 51{
ff13fd40 52 struct ksock_tx *tx = conn->ksnc_tx_carrier;
d7e09d03
PT
53
54 /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
d3422d5e 55 LASSERT(!list_empty(&conn->ksnc_tx_queue));
06ace26e 56 LASSERT(tx);
d7e09d03
PT
57
58 /* Next TX that can carry ZC-ACK or LNet message */
59 if (tx->tx_list.next == &conn->ksnc_tx_queue) {
60 /* no more packets queued */
61 conn->ksnc_tx_carrier = NULL;
62 } else {
8f5b1435 63 conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
d3422d5e 64 LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
d7e09d03
PT
65 }
66}
67
68static int
ff13fd40
JS
69ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
70 struct ksock_tx *tx_ack, __u64 cookie)
d7e09d03 71{
ff13fd40 72 struct ksock_tx *tx = conn->ksnc_tx_carrier;
d7e09d03 73
06ace26e 74 LASSERT(!tx_ack ||
c314c319 75 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
d7e09d03
PT
76
77 /*
78 * Enqueue or piggyback tx_ack / cookie
79 * . no tx can piggyback cookie of tx_ack (or cookie), just
80 * enqueue the tx_ack (if tx_ack != NUL) and return NULL.
81 * . There is tx can piggyback cookie of tx_ack (or cookie),
82 * piggyback the cookie and return the tx.
83 */
06ace26e
JS
84 if (!tx) {
85 if (tx_ack) {
d7e09d03 86 list_add_tail(&tx_ack->tx_list,
c314c319 87 &conn->ksnc_tx_queue);
d7e09d03
PT
88 conn->ksnc_tx_carrier = tx_ack;
89 }
90 return 0;
91 }
92
93 if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
94 /* tx is noop zc-ack, can't piggyback zc-ack cookie */
06ace26e 95 if (tx_ack)
d7e09d03 96 list_add_tail(&tx_ack->tx_list,
c314c319 97 &conn->ksnc_tx_queue);
d7e09d03
PT
98 return 0;
99 }
100
101 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
5fd88337 102 LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
d7e09d03 103
06ace26e 104 if (tx_ack)
d7e09d03
PT
105 cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
106
107 /* piggyback the zc-ack cookie */
108 tx->tx_msg.ksm_zc_cookies[1] = cookie;
109 /* move on to the next TX which can carry cookie */
110 ksocknal_next_tx_carrier(conn);
111
112 return 1;
113}
114
ff13fd40
JS
115static struct ksock_tx *
116ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
d7e09d03 117{
ff13fd40 118 struct ksock_tx *tx = conn->ksnc_tx_carrier;
d7e09d03
PT
119
120 /*
121 * Enqueue tx_msg:
122 * . If there is no NOOP on the connection, just enqueue
123 * tx_msg and return NULL
124 * . If there is NOOP on the connection, piggyback the cookie
125 * and replace the NOOP tx, and return the NOOP tx.
126 */
06ace26e 127 if (!tx) { /* nothing on queue */
d7e09d03
PT
128 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
129 conn->ksnc_tx_carrier = tx_msg;
130 return NULL;
131 }
132
133 if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
134 list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
135 return NULL;
136 }
137
d3422d5e 138 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
d7e09d03
PT
139
140 /* There is a noop zc-ack can be piggybacked */
141 tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
142 ksocknal_next_tx_carrier(conn);
143
144 /* use new_tx to replace the noop zc-ack packet */
145 list_add(&tx_msg->tx_list, &tx->tx_list);
146 list_del(&tx->tx_list);
147
148 return tx;
149}
150
151static int
ff13fd40
JS
152ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
153 struct ksock_tx *tx_ack, __u64 cookie)
d7e09d03 154{
ff13fd40 155 struct ksock_tx *tx;
d7e09d03
PT
156
157 if (conn->ksnc_type != SOCKLND_CONN_ACK)
158 return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
159
160 /* non-blocking ZC-ACK (to router) */
06ace26e 161 LASSERT(!tx_ack ||
c314c319 162 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
d7e09d03 163
4a87df3e 164 tx = conn->ksnc_tx_carrier;
06ace26e
JS
165 if (!tx) {
166 if (tx_ack) {
d7e09d03 167 list_add_tail(&tx_ack->tx_list,
c314c319 168 &conn->ksnc_tx_queue);
d7e09d03
PT
169 conn->ksnc_tx_carrier = tx_ack;
170 }
171 return 0;
172 }
173
06ace26e 174 /* conn->ksnc_tx_carrier */
d7e09d03 175
06ace26e 176 if (tx_ack)
d7e09d03
PT
177 cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
178
179 if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
180 return 1;
181
182 if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
183 /* replace the keepalive PING with a real ACK */
5fd88337 184 LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
d7e09d03
PT
185 tx->tx_msg.ksm_zc_cookies[1] = cookie;
186 return 1;
187 }
188
189 if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
190 cookie == tx->tx_msg.ksm_zc_cookies[1]) {
b0f5aad5 191 CWARN("%s: duplicated ZC cookie: %llu\n",
d7e09d03
PT
192 libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
193 return 1; /* XXX return error in the future */
194 }
195
5fd88337 196 if (!tx->tx_msg.ksm_zc_cookies[0]) {
d7e09d03
PT
197 /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
198 if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
199 tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
200 tx->tx_msg.ksm_zc_cookies[1] = cookie;
201 } else {
202 tx->tx_msg.ksm_zc_cookies[0] = cookie;
203 }
204
205 if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
206 /* not likely to carry more ACKs, skip it to simplify logic */
207 ksocknal_next_tx_carrier(conn);
208 }
209
210 return 1;
211 }
212
213 /* takes two or more cookies already */
214
215 if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
216 __u64 tmp = 0;
217
2b284326 218 /* two separated cookies: (a+2, a) or (a+1, a) */
d3422d5e 219 LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
d7e09d03
PT
220 tx->tx_msg.ksm_zc_cookies[1] <= 2);
221
222 if (tx->tx_msg.ksm_zc_cookies[0] -
223 tx->tx_msg.ksm_zc_cookies[1] == 2) {
224 if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
225 tmp = cookie;
226 } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
227 tmp = tx->tx_msg.ksm_zc_cookies[1];
228 } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
229 tmp = tx->tx_msg.ksm_zc_cookies[0];
230 }
231
5fd88337 232 if (tmp) {
d7e09d03
PT
233 /* range of cookies */
234 tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
235 tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
236 return 1;
237 }
238
239 } else {
240 /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
241 if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
242 cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
b0f5aad5 243 CWARN("%s: duplicated ZC cookie: %llu\n",
d7e09d03
PT
244 libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
245 return 1; /* XXX: return error in the future */
246 }
247
248 if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
249 tx->tx_msg.ksm_zc_cookies[1] = cookie;
250 return 1;
251 }
252
253 if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
254 tx->tx_msg.ksm_zc_cookies[0] = cookie;
255 return 1;
256 }
257 }
258
259 /* failed to piggyback ZC-ACK */
06ace26e 260 if (tx_ack) {
d7e09d03
PT
261 list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
262 /* the next tx can piggyback at least 1 ACK */
263 ksocknal_next_tx_carrier(conn);
264 }
265
266 return 0;
267}
268
269static int
ff13fd40 270ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
d7e09d03
PT
271{
272 int nob;
273
274#if SOCKNAL_VERSION_DEBUG
275 if (!*ksocknal_tunables.ksnd_typed_conns)
276 return SOCKNAL_MATCH_YES;
277#endif
278
06ace26e 279 if (!tx || !tx->tx_lnetmsg) {
d7e09d03
PT
280 /* noop packet */
281 nob = offsetof(ksock_msg_t, ksm_u);
282 } else {
283 nob = tx->tx_lnetmsg->msg_len +
284 ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
285 sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
286 }
287
288 /* default checking for typed connection */
289 switch (conn->ksnc_type) {
290 default:
291 CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
292 LBUG();
293 case SOCKLND_CONN_ANY:
294 return SOCKNAL_MATCH_YES;
295
296 case SOCKLND_CONN_BULK_IN:
297 return SOCKNAL_MATCH_MAY;
298
299 case SOCKLND_CONN_BULK_OUT:
300 if (nob < *ksocknal_tunables.ksnd_min_bulk)
301 return SOCKNAL_MATCH_MAY;
302 else
303 return SOCKNAL_MATCH_YES;
304
305 case SOCKLND_CONN_CONTROL:
306 if (nob >= *ksocknal_tunables.ksnd_min_bulk)
307 return SOCKNAL_MATCH_MAY;
308 else
309 return SOCKNAL_MATCH_YES;
310 }
311}
312
313static int
ff13fd40 314ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
d7e09d03
PT
315{
316 int nob;
317
06ace26e 318 if (!tx || !tx->tx_lnetmsg)
d7e09d03
PT
319 nob = offsetof(ksock_msg_t, ksm_u);
320 else
321 nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
322
323 switch (conn->ksnc_type) {
324 default:
325 CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
326 LBUG();
327 case SOCKLND_CONN_ANY:
328 return SOCKNAL_MATCH_NO;
329
330 case SOCKLND_CONN_ACK:
331 if (nonblk)
332 return SOCKNAL_MATCH_YES;
06ace26e 333 else if (!tx || !tx->tx_lnetmsg)
d7e09d03
PT
334 return SOCKNAL_MATCH_MAY;
335 else
336 return SOCKNAL_MATCH_NO;
337
338 case SOCKLND_CONN_BULK_OUT:
339 if (nonblk)
340 return SOCKNAL_MATCH_NO;
341 else if (nob < *ksocknal_tunables.ksnd_min_bulk)
342 return SOCKNAL_MATCH_MAY;
343 else
344 return SOCKNAL_MATCH_YES;
345
346 case SOCKLND_CONN_CONTROL:
347 if (nonblk)
348 return SOCKNAL_MATCH_NO;
349 else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
350 return SOCKNAL_MATCH_MAY;
351 else
352 return SOCKNAL_MATCH_YES;
353 }
354}
355
356/* (Sink) handle incoming ZC request from sender */
357static int
ff13fd40 358ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
d7e09d03 359{
ff13fd40
JS
360 struct ksock_peer *peer = c->ksnc_peer;
361 struct ksock_conn *conn;
362 struct ksock_tx *tx;
97d10d0a 363 int rc;
d7e09d03
PT
364
365 read_lock(&ksocknal_data.ksnd_global_lock);
366
367 conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
06ace26e 368 if (conn) {
ff13fd40 369 struct ksock_sched *sched = conn->ksnc_scheduler;
d7e09d03 370
06ace26e 371 LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
d7e09d03
PT
372
373 spin_lock_bh(&sched->kss_lock);
374
375 rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
376
377 spin_unlock_bh(&sched->kss_lock);
378
379 if (rc) { /* piggybacked */
380 read_unlock(&ksocknal_data.ksnd_global_lock);
381 return 0;
382 }
383 }
384
385 read_unlock(&ksocknal_data.ksnd_global_lock);
386
387 /* ACK connection is not ready, or can't piggyback the ACK */
388 tx = ksocknal_alloc_tx_noop(cookie, !!remote);
06ace26e 389 if (!tx)
d7e09d03
PT
390 return -ENOMEM;
391
4a87df3e 392 rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
5fd88337 393 if (!rc)
d7e09d03
PT
394 return 0;
395
396 ksocknal_free_tx(tx);
397 return rc;
398}
399
400/* (Sender) handle ZC_ACK from sink */
401static int
ff13fd40 402ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
d7e09d03 403{
ff13fd40
JS
404 struct ksock_peer *peer = conn->ksnc_peer;
405 struct ksock_tx *tx;
406 struct ksock_tx *temp;
407 struct ksock_tx *tmp;
d3422d5e 408 LIST_HEAD(zlist);
97d10d0a 409 int count;
d7e09d03 410
5fd88337 411 if (!cookie1)
d7e09d03
PT
412 cookie1 = cookie2;
413
414 count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
415
416 if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
417 conn->ksnc_proto == &ksocknal_protocol_v3x) {
418 /* keepalive PING for V3.x, just ignore it */
419 return count == 1 ? 0 : -EPROTO;
420 }
421
422 spin_lock(&peer->ksnp_lock);
423
c314c319
JS
424 list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
425 tx_zc_list) {
d7e09d03
PT
426 __u64 c = tx->tx_msg.ksm_zc_cookies[0];
427
428 if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
429 tx->tx_msg.ksm_zc_cookies[0] = 0;
430 list_del(&tx->tx_zc_list);
431 list_add(&tx->tx_zc_list, &zlist);
432
5fd88337 433 if (!--count)
d7e09d03
PT
434 break;
435 }
436 }
437
438 spin_unlock(&peer->ksnp_lock);
439
1edae04f 440 list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
d7e09d03
PT
441 list_del(&tx->tx_zc_list);
442 ksocknal_tx_decref(tx);
443 }
444
5fd88337 445 return !count ? 0 : -EPROTO;
d7e09d03
PT
446}
447
448static int
ff13fd40 449ksocknal_send_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello)
d7e09d03 450{
97d10d0a
MS
451 struct socket *sock = conn->ksnc_sock;
452 lnet_hdr_t *hdr;
d7e09d03 453 lnet_magicversion_t *hmv;
97d10d0a
MS
454 int rc;
455 int i;
d7e09d03
PT
456
457 CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
458
459 LIBCFS_ALLOC(hdr, sizeof(*hdr));
06ace26e 460 if (!hdr) {
d7e09d03
PT
461 CERROR("Can't allocate lnet_hdr_t\n");
462 return -ENOMEM;
463 }
464
465 hmv = (lnet_magicversion_t *)&hdr->dest_nid;
466
4420cfd3
JS
467 /*
468 * Re-organize V2.x message header to V1.x (lnet_hdr_t)
469 * header and send out
470 */
b31e64c4
JS
471 hmv->magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
472 hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
473 hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
d7e09d03 474
5fd88337 475 if (the_lnet.ln_testprotocompat) {
d7e09d03
PT
476 /* single-shot proto check */
477 LNET_LOCK();
5fd88337 478 if (the_lnet.ln_testprotocompat & 1) {
d7e09d03
PT
479 hmv->version_major++; /* just different! */
480 the_lnet.ln_testprotocompat &= ~1;
481 }
5fd88337 482 if (the_lnet.ln_testprotocompat & 2) {
d7e09d03
PT
483 hmv->magic = LNET_PROTO_MAGIC;
484 the_lnet.ln_testprotocompat &= ~2;
485 }
486 LNET_UNLOCK();
487 }
488
b31e64c4
JS
489 hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
490 hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
491 hdr->type = cpu_to_le32(LNET_MSG_HELLO);
492 hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
493 hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
494 hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
d7e09d03 495
1ad6a73e 496 rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
5fd88337 497 if (rc) {
5e8f6920
PT
498 CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
499 rc, &conn->ksnc_ipaddr, conn->ksnc_port);
d7e09d03
PT
500 goto out;
501 }
502
5fd88337 503 if (!hello->kshm_nips)
d7e09d03
PT
504 goto out;
505
9797fb0e 506 for (i = 0; i < (int)hello->kshm_nips; i++)
b31e64c4 507 hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
d7e09d03 508
1ad6a73e
JS
509 rc = lnet_sock_write(sock, hello->kshm_ips,
510 hello->kshm_nips * sizeof(__u32),
511 lnet_acceptor_timeout());
5fd88337 512 if (rc) {
2d00bd17
JP
513 CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
514 rc, hello->kshm_nips,
5e8f6920 515 &conn->ksnc_ipaddr, conn->ksnc_port);
d7e09d03
PT
516 }
517out:
518 LIBCFS_FREE(hdr, sizeof(*hdr));
519
520 return rc;
521}
522
523static int
ff13fd40 524ksocknal_send_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello)
d7e09d03 525{
e327dc88 526 struct socket *sock = conn->ksnc_sock;
97d10d0a 527 int rc;
d7e09d03
PT
528
529 hello->kshm_magic = LNET_PROTO_MAGIC;
530 hello->kshm_version = conn->ksnc_proto->pro_version;
531
5fd88337 532 if (the_lnet.ln_testprotocompat) {
d7e09d03
PT
533 /* single-shot proto check */
534 LNET_LOCK();
5fd88337 535 if (the_lnet.ln_testprotocompat & 1) {
d7e09d03
PT
536 hello->kshm_version++; /* just different! */
537 the_lnet.ln_testprotocompat &= ~1;
538 }
539 LNET_UNLOCK();
540 }
541
1ad6a73e
JS
542 rc = lnet_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
543 lnet_acceptor_timeout());
5fd88337 544 if (rc) {
5e8f6920
PT
545 CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
546 rc, &conn->ksnc_ipaddr, conn->ksnc_port);
d7e09d03
PT
547 return rc;
548 }
549
5fd88337 550 if (!hello->kshm_nips)
d7e09d03
PT
551 return 0;
552
1ad6a73e
JS
553 rc = lnet_sock_write(sock, hello->kshm_ips,
554 hello->kshm_nips * sizeof(__u32),
555 lnet_acceptor_timeout());
5fd88337 556 if (rc) {
2d00bd17
JP
557 CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
558 rc, hello->kshm_nips,
5e8f6920 559 &conn->ksnc_ipaddr, conn->ksnc_port);
d7e09d03
PT
560 }
561
562 return rc;
563}
564
565static int
ff13fd40 566ksocknal_recv_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello,
1d8cb70c 567 int timeout)
d7e09d03 568{
97d10d0a
MS
569 struct socket *sock = conn->ksnc_sock;
570 lnet_hdr_t *hdr;
571 int rc;
572 int i;
d7e09d03
PT
573
574 LIBCFS_ALLOC(hdr, sizeof(*hdr));
06ace26e 575 if (!hdr) {
d7e09d03
PT
576 CERROR("Can't allocate lnet_hdr_t\n");
577 return -ENOMEM;
578 }
579
1ad6a73e
JS
580 rc = lnet_sock_read(sock, &hdr->src_nid,
581 sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid),
582 timeout);
5fd88337 583 if (rc) {
5e8f6920 584 CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
c314c319 585 rc, &conn->ksnc_ipaddr);
d3422d5e 586 LASSERT(rc < 0 && rc != -EALREADY);
d7e09d03
PT
587 goto out;
588 }
589
590 /* ...and check we got what we expected */
b31e64c4 591 if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
2d00bd17
JP
592 CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
593 le32_to_cpu(hdr->type),
594 &conn->ksnc_ipaddr);
d7e09d03
PT
595 rc = -EPROTO;
596 goto out;
597 }
598
97d10d0a
MS
599 hello->kshm_src_nid = le64_to_cpu(hdr->src_nid);
600 hello->kshm_src_pid = le32_to_cpu(hdr->src_pid);
d3422d5e 601 hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
97d10d0a
MS
602 hello->kshm_ctype = le32_to_cpu(hdr->msg.hello.type);
603 hello->kshm_nips = le32_to_cpu(hdr->payload_length) /
604 sizeof(__u32);
d7e09d03
PT
605
606 if (hello->kshm_nips > LNET_MAX_INTERFACES) {
5e8f6920
PT
607 CERROR("Bad nips %d from ip %pI4h\n",
608 hello->kshm_nips, &conn->ksnc_ipaddr);
d7e09d03
PT
609 rc = -EPROTO;
610 goto out;
611 }
612
5fd88337 613 if (!hello->kshm_nips)
d7e09d03
PT
614 goto out;
615
1ad6a73e
JS
616 rc = lnet_sock_read(sock, hello->kshm_ips,
617 hello->kshm_nips * sizeof(__u32), timeout);
5fd88337 618 if (rc) {
5e8f6920 619 CERROR("Error %d reading IPs from ip %pI4h\n",
c314c319 620 rc, &conn->ksnc_ipaddr);
5e8f6920 621 LASSERT(rc < 0 && rc != -EALREADY);
d7e09d03
PT
622 goto out;
623 }
624
9797fb0e 625 for (i = 0; i < (int)hello->kshm_nips; i++) {
d7e09d03
PT
626 hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
627
5fd88337 628 if (!hello->kshm_ips[i]) {
5e8f6920
PT
629 CERROR("Zero IP[%d] from ip %pI4h\n",
630 i, &conn->ksnc_ipaddr);
d7e09d03
PT
631 rc = -EPROTO;
632 break;
633 }
634 }
635out:
636 LIBCFS_FREE(hdr, sizeof(*hdr));
637
638 return rc;
639}
640
641static int
ff13fd40 642ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout)
d7e09d03 643{
97d10d0a
MS
644 struct socket *sock = conn->ksnc_sock;
645 int rc;
646 int i;
d7e09d03
PT
647
648 if (hello->kshm_magic == LNET_PROTO_MAGIC)
649 conn->ksnc_flip = 0;
650 else
651 conn->ksnc_flip = 1;
652
1ad6a73e
JS
653 rc = lnet_sock_read(sock, &hello->kshm_src_nid,
654 offsetof(ksock_hello_msg_t, kshm_ips) -
655 offsetof(ksock_hello_msg_t, kshm_src_nid),
656 timeout);
5fd88337 657 if (rc) {
5e8f6920 658 CERROR("Error %d reading HELLO from %pI4h\n",
c314c319 659 rc, &conn->ksnc_ipaddr);
5e8f6920 660 LASSERT(rc < 0 && rc != -EALREADY);
d7e09d03
PT
661 return rc;
662 }
663
664 if (conn->ksnc_flip) {
665 __swab32s(&hello->kshm_src_pid);
666 __swab64s(&hello->kshm_src_nid);
667 __swab32s(&hello->kshm_dst_pid);
668 __swab64s(&hello->kshm_dst_nid);
669 __swab64s(&hello->kshm_src_incarnation);
670 __swab64s(&hello->kshm_dst_incarnation);
671 __swab32s(&hello->kshm_ctype);
672 __swab32s(&hello->kshm_nips);
673 }
674
675 if (hello->kshm_nips > LNET_MAX_INTERFACES) {
5e8f6920
PT
676 CERROR("Bad nips %d from ip %pI4h\n",
677 hello->kshm_nips, &conn->ksnc_ipaddr);
d7e09d03
PT
678 return -EPROTO;
679 }
680
5fd88337 681 if (!hello->kshm_nips)
d7e09d03
PT
682 return 0;
683
1ad6a73e
JS
684 rc = lnet_sock_read(sock, hello->kshm_ips,
685 hello->kshm_nips * sizeof(__u32), timeout);
5fd88337 686 if (rc) {
5e8f6920 687 CERROR("Error %d reading IPs from ip %pI4h\n",
c314c319 688 rc, &conn->ksnc_ipaddr);
5e8f6920 689 LASSERT(rc < 0 && rc != -EALREADY);
d7e09d03
PT
690 return rc;
691 }
692
9797fb0e 693 for (i = 0; i < (int)hello->kshm_nips; i++) {
d7e09d03
PT
694 if (conn->ksnc_flip)
695 __swab32s(&hello->kshm_ips[i]);
696
5fd88337 697 if (!hello->kshm_ips[i]) {
5e8f6920
PT
698 CERROR("Zero IP[%d] from ip %pI4h\n",
699 i, &conn->ksnc_ipaddr);
d7e09d03
PT
700 return -EPROTO;
701 }
702 }
703
704 return 0;
705}
706
707static void
ff13fd40 708ksocknal_pack_msg_v1(struct ksock_tx *tx)
d7e09d03
PT
709{
710 /* V1.x has no KSOCK_MSG_NOOP */
711 LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
06ace26e 712 LASSERT(tx->tx_lnetmsg);
d7e09d03 713
f351bad2 714 tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
d7e09d03
PT
715 tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t);
716
d3d3d37a
JS
717 tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
718 tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
d7e09d03
PT
719}
720
721static void
ff13fd40 722ksocknal_pack_msg_v2(struct ksock_tx *tx)
d7e09d03 723{
f351bad2 724 tx->tx_iov[0].iov_base = &tx->tx_msg;
d7e09d03 725
06ace26e 726 if (tx->tx_lnetmsg) {
d7e09d03
PT
727 LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
728
729 tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
730 tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
d3d3d37a
JS
731 tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
732 tx->tx_resid = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
d7e09d03
PT
733 } else {
734 LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
735
736 tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
d3d3d37a
JS
737 tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
738 tx->tx_resid = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
d7e09d03
PT
739 }
740 /* Don't checksum before start sending, because packet can be piggybacked with ACK */
741}
742
743static void
744ksocknal_unpack_msg_v1(ksock_msg_t *msg)
745{
97d10d0a
MS
746 msg->ksm_csum = 0;
747 msg->ksm_type = KSOCK_MSG_LNET;
d3d3d37a
JS
748 msg->ksm_zc_cookies[0] = 0;
749 msg->ksm_zc_cookies[1] = 0;
d7e09d03
PT
750}
751
752static void
753ksocknal_unpack_msg_v2(ksock_msg_t *msg)
754{
755 return; /* Do nothing */
756}
757
ff13fd40 758struct ksock_proto ksocknal_protocol_v1x = {
97d10d0a
MS
759 .pro_version = KSOCK_PROTO_V1,
760 .pro_send_hello = ksocknal_send_hello_v1,
761 .pro_recv_hello = ksocknal_recv_hello_v1,
762 .pro_pack = ksocknal_pack_msg_v1,
763 .pro_unpack = ksocknal_unpack_msg_v1,
764 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v1,
765 .pro_handle_zcreq = NULL,
766 .pro_handle_zcack = NULL,
767 .pro_queue_tx_zcack = NULL,
768 .pro_match_tx = ksocknal_match_tx
d7e09d03
PT
769};
770
ff13fd40 771struct ksock_proto ksocknal_protocol_v2x = {
97d10d0a
MS
772 .pro_version = KSOCK_PROTO_V2,
773 .pro_send_hello = ksocknal_send_hello_v2,
774 .pro_recv_hello = ksocknal_recv_hello_v2,
775 .pro_pack = ksocknal_pack_msg_v2,
776 .pro_unpack = ksocknal_unpack_msg_v2,
777 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
778 .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
779 .pro_handle_zcreq = ksocknal_handle_zcreq,
780 .pro_handle_zcack = ksocknal_handle_zcack,
781 .pro_match_tx = ksocknal_match_tx
d7e09d03
PT
782};
783
ff13fd40 784struct ksock_proto ksocknal_protocol_v3x = {
97d10d0a
MS
785 .pro_version = KSOCK_PROTO_V3,
786 .pro_send_hello = ksocknal_send_hello_v2,
787 .pro_recv_hello = ksocknal_recv_hello_v2,
788 .pro_pack = ksocknal_pack_msg_v2,
789 .pro_unpack = ksocknal_unpack_msg_v2,
790 .pro_queue_tx_msg = ksocknal_queue_tx_msg_v2,
791 .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
792 .pro_handle_zcreq = ksocknal_handle_zcreq,
793 .pro_handle_zcack = ksocknal_handle_zcack,
794 .pro_match_tx = ksocknal_match_tx_v3
d7e09d03 795};