Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
4f3ca893 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 PT |
19 | * |
20 | * GPL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
4f3ca893 | 26 | * Copyright (c) 2012 - 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
4f3ca893 | 30 | * Lustre is a trademark of Seagate, Inc. |
d7e09d03 PT |
31 | * |
32 | * lnet/include/lnet/lib-types.h | |
d7e09d03 PT |
33 | */ |
34 | ||
35 | #ifndef __LNET_LIB_TYPES_H__ | |
36 | #define __LNET_LIB_TYPES_H__ | |
37 | ||
db18b8e9 JS |
38 | #include <linux/kthread.h> |
39 | #include <linux/uio.h> | |
40 | #include <linux/types.h> | |
41 | #include <net/sock.h> | |
d7e09d03 | 42 | |
db18b8e9 | 43 | #include "types.h" |
d7e09d03 | 44 | |
db18b8e9 JS |
45 | /* Max payload size */ |
46 | #define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD | |
47 | #if (LNET_MAX_PAYLOAD < LNET_MTU) | |
48 | # error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" | |
49 | #elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) | |
188acc61 | 50 | # error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" |
db18b8e9 | 51 | #endif |
d7e09d03 PT |
52 | |
53 | /* forward refs */ | |
54 | struct lnet_libmd; | |
55 | ||
56 | typedef struct lnet_msg { | |
188acc61 JS |
57 | struct list_head msg_activelist; |
58 | struct list_head msg_list; /* Q for credits/MD */ | |
d7e09d03 | 59 | |
188acc61 | 60 | lnet_process_id_t msg_target; |
d7e09d03 PT |
61 | /* where is it from, it's only for building event */ |
62 | lnet_nid_t msg_from; | |
63 | __u32 msg_type; | |
64 | ||
253d50eb | 65 | /* committed for sending */ |
d7e09d03 PT |
66 | unsigned int msg_tx_committed:1; |
67 | /* CPT # this message committed for sending */ | |
68 | unsigned int msg_tx_cpt:15; | |
253d50eb | 69 | /* committed for receiving */ |
d7e09d03 PT |
70 | unsigned int msg_rx_committed:1; |
71 | /* CPT # this message committed for receiving */ | |
72 | unsigned int msg_rx_cpt:15; | |
73 | /* queued for tx credit */ | |
74 | unsigned int msg_tx_delayed:1; | |
75 | /* queued for RX buffer */ | |
76 | unsigned int msg_rx_delayed:1; | |
77 | /* ready for pending on RX delay list */ | |
78 | unsigned int msg_rx_ready_delay:1; | |
79 | ||
188acc61 JS |
80 | unsigned int msg_vmflush:1; /* VM trying to free memory */ |
81 | unsigned int msg_target_is_router:1; /* sending to a router */ | |
82 | unsigned int msg_routing:1; /* being forwarded */ | |
83 | unsigned int msg_ack:1; /* ack on finalize (PUT) */ | |
84 | unsigned int msg_sending:1; /* outgoing message */ | |
85 | unsigned int msg_receiving:1; /* being received */ | |
86 | unsigned int msg_txcredit:1; /* taken an NI send credit */ | |
87 | unsigned int msg_peertxcredit:1; /* taken a peer send credit */ | |
88 | unsigned int msg_rtrcredit:1; /* taken a global | |
89 | router credit */ | |
90 | unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ | |
91 | unsigned int msg_onactivelist:1; /* on the activelist */ | |
92 | ||
93 | struct lnet_peer *msg_txpeer; /* peer I'm sending to */ | |
94 | struct lnet_peer *msg_rxpeer; /* peer I received from */ | |
95 | ||
96 | void *msg_private; | |
97 | struct lnet_libmd *msg_md; | |
98 | ||
99 | unsigned int msg_len; | |
100 | unsigned int msg_wanted; | |
101 | unsigned int msg_offset; | |
102 | unsigned int msg_niov; | |
103 | struct kvec *msg_iov; | |
104 | lnet_kiov_t *msg_kiov; | |
105 | ||
106 | lnet_event_t msg_ev; | |
107 | lnet_hdr_t msg_hdr; | |
d7e09d03 PT |
108 | } lnet_msg_t; |
109 | ||
d7e09d03 | 110 | typedef struct lnet_libhandle { |
188acc61 JS |
111 | struct list_head lh_hash_chain; |
112 | __u64 lh_cookie; | |
d7e09d03 PT |
113 | } lnet_libhandle_t; |
114 | ||
115 | #define lh_entry(ptr, type, member) \ | |
116 | ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) | |
117 | ||
118 | typedef struct lnet_eq { | |
188acc61 JS |
119 | struct list_head eq_list; |
120 | lnet_libhandle_t eq_lh; | |
121 | lnet_seq_t eq_enq_seq; | |
122 | lnet_seq_t eq_deq_seq; | |
123 | unsigned int eq_size; | |
124 | lnet_eq_handler_t eq_callback; | |
125 | lnet_event_t *eq_events; | |
d7e09d03 PT |
126 | int **eq_refs; /* percpt refcount for EQ */ |
127 | } lnet_eq_t; | |
128 | ||
129 | typedef struct lnet_me { | |
188acc61 JS |
130 | struct list_head me_list; |
131 | lnet_libhandle_t me_lh; | |
132 | lnet_process_id_t me_match_id; | |
133 | unsigned int me_portal; | |
134 | unsigned int me_pos; /* hash offset in mt_hash */ | |
135 | __u64 me_match_bits; | |
136 | __u64 me_ignore_bits; | |
137 | lnet_unlink_t me_unlink; | |
138 | struct lnet_libmd *me_md; | |
d7e09d03 PT |
139 | } lnet_me_t; |
140 | ||
141 | typedef struct lnet_libmd { | |
188acc61 JS |
142 | struct list_head md_list; |
143 | lnet_libhandle_t md_lh; | |
144 | lnet_me_t *md_me; | |
145 | char *md_start; | |
146 | unsigned int md_offset; | |
147 | unsigned int md_length; | |
148 | unsigned int md_max_size; | |
149 | int md_threshold; | |
150 | int md_refcount; | |
151 | unsigned int md_options; | |
152 | unsigned int md_flags; | |
153 | void *md_user_ptr; | |
154 | lnet_eq_t *md_eq; | |
155 | unsigned int md_niov; /* # frags */ | |
d7e09d03 | 156 | union { |
188acc61 JS |
157 | struct kvec iov[LNET_MAX_IOV]; |
158 | lnet_kiov_t kiov[LNET_MAX_IOV]; | |
d7e09d03 PT |
159 | } md_iov; |
160 | } lnet_libmd_t; | |
161 | ||
188acc61 JS |
162 | #define LNET_MD_FLAG_ZOMBIE (1 << 0) |
163 | #define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) | |
164 | #define LNET_MD_FLAG_ABORTED (1 << 2) | |
d7e09d03 | 165 | |
d7e09d03 PT |
166 | typedef struct { |
167 | /* info about peers we are trying to fail */ | |
188acc61 JS |
168 | struct list_head tp_list; /* ln_test_peers */ |
169 | lnet_nid_t tp_nid; /* matching nid */ | |
170 | unsigned int tp_threshold; /* # failures to simulate */ | |
d7e09d03 PT |
171 | } lnet_test_peer_t; |
172 | ||
188acc61 JS |
173 | #define LNET_COOKIE_TYPE_MD 1 |
174 | #define LNET_COOKIE_TYPE_ME 2 | |
175 | #define LNET_COOKIE_TYPE_EQ 3 | |
176 | #define LNET_COOKIE_TYPE_BITS 2 | |
d7e09d03 PT |
177 | #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) |
178 | ||
188acc61 | 179 | struct lnet_ni; /* forward ref */ |
d7e09d03 | 180 | |
3b77f472 | 181 | typedef struct lnet_lnd { |
d7e09d03 | 182 | /* fields managed by portals */ |
188acc61 JS |
183 | struct list_head lnd_list; /* stash in the LND table */ |
184 | int lnd_refcount; /* # active instances */ | |
d7e09d03 PT |
185 | |
186 | /* fields initialised by the LND */ | |
db18b8e9 | 187 | __u32 lnd_type; |
d7e09d03 | 188 | |
b11866b3 AO |
189 | int (*lnd_startup)(struct lnet_ni *ni); |
190 | void (*lnd_shutdown)(struct lnet_ni *ni); | |
d7e09d03 PT |
191 | int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); |
192 | ||
193 | /* In data movement APIs below, payload buffers are described as a set | |
194 | * of 'niov' fragments which are... | |
195 | * EITHER | |
196 | * in virtual memory (struct iovec *iov != NULL) | |
197 | * OR | |
198 | * in pages (kernel only: plt_kiov_t *kiov != NULL). | |
199 | * The LND may NOT overwrite these fragment descriptors. | |
200 | * An 'offset' and may specify a byte offset within the set of | |
201 | * fragments to start from | |
202 | */ | |
203 | ||
204 | /* Start sending a preformatted message. 'private' is NULL for PUT and | |
205 | * GET messages; otherwise this is a response to an incoming message | |
206 | * and 'private' is the 'private' passed to lnet_parse(). Return | |
207 | * non-zero for immediate failure, otherwise complete later with | |
208 | * lnet_finalize() */ | |
209 | int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); | |
210 | ||
211 | /* Start receiving 'mlen' bytes of payload data, skipping the following | |
212 | * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to | |
d766b4b5 | 213 | * lnet_parse(). Return non-zero for immediate failure, otherwise |
d7e09d03 PT |
214 | * complete later with lnet_finalize(). This also gives back a receive |
215 | * credit if the LND does flow control. */ | |
216 | int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, | |
217 | int delayed, unsigned int niov, | |
f351bad2 | 218 | struct kvec *iov, lnet_kiov_t *kiov, |
188acc61 JS |
219 | unsigned int offset, unsigned int mlen, |
220 | unsigned int rlen); | |
d7e09d03 PT |
221 | |
222 | /* lnet_parse() has had to delay processing of this message | |
223 | * (e.g. waiting for a forwarding buffer or send credits). Give the | |
224 | * LND a chance to free urgently needed resources. If called, return 0 | |
225 | * for success and do NOT give back a receive credit; that has to wait | |
226 | * until lnd_recv() gets called. On failure return < 0 and | |
227 | * release resources; lnd_recv() will not be called. */ | |
188acc61 JS |
228 | int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, |
229 | lnet_msg_t *msg, void **new_privatep); | |
d7e09d03 PT |
230 | |
231 | /* notification of peer health */ | |
232 | void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); | |
233 | ||
234 | /* query of peer aliveness */ | |
188acc61 JS |
235 | void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, |
236 | unsigned long *when); | |
d7e09d03 PT |
237 | |
238 | /* accept a new connection */ | |
e327dc88 | 239 | int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); |
d7e09d03 PT |
240 | } lnd_t; |
241 | ||
d7e09d03 PT |
242 | struct lnet_tx_queue { |
243 | int tq_credits; /* # tx credits free */ | |
244 | int tq_credits_min; /* lowest it's been */ | |
245 | int tq_credits_max; /* total # tx credits */ | |
188acc61 | 246 | struct list_head tq_delayed; /* delayed TXs */ |
d7e09d03 PT |
247 | }; |
248 | ||
d7e09d03 | 249 | typedef struct lnet_ni { |
188acc61 JS |
250 | spinlock_t ni_lock; |
251 | struct list_head ni_list; /* chain on ln_nis */ | |
252 | struct list_head ni_cptlist; /* chain on ln_nis_cpt */ | |
253 | int ni_maxtxcredits; /* # tx credits */ | |
d7e09d03 | 254 | /* # per-peer send credits */ |
188acc61 | 255 | int ni_peertxcredits; |
d7e09d03 | 256 | /* # per-peer router buffer credits */ |
188acc61 | 257 | int ni_peerrtrcredits; |
d7e09d03 | 258 | /* seconds to consider peer dead */ |
188acc61 JS |
259 | int ni_peertimeout; |
260 | int ni_ncpts; /* number of CPTs */ | |
261 | __u32 *ni_cpts; /* bond NI on some CPTs */ | |
262 | lnet_nid_t ni_nid; /* interface's NID */ | |
263 | void *ni_data; /* instance-specific data */ | |
264 | lnd_t *ni_lnd; /* procedural interface */ | |
d7e09d03 PT |
265 | struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ |
266 | int **ni_refs; /* percpt reference count */ | |
188acc61 JS |
267 | long ni_last_alive;/* when I was last alive */ |
268 | lnet_ni_status_t *ni_status; /* my health status */ | |
d7e09d03 | 269 | /* equivalent interfaces to use */ |
188acc61 | 270 | char *ni_interfaces[LNET_MAX_INTERFACES]; |
d7e09d03 PT |
271 | } lnet_ni_t; |
272 | ||
273 | #define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL | |
274 | ||
275 | /* NB: value of these features equal to LNET_PROTO_PING_VERSION_x | |
276 | * of old LNet, so there shouldn't be any compatibility issue */ | |
277 | #define LNET_PING_FEAT_INVAL (0) /* no feature */ | |
278 | #define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ | |
279 | #define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ | |
280 | ||
281 | #define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ | |
282 | LNET_PING_FEAT_NI_STATUS) | |
283 | ||
d7e09d03 PT |
284 | /* router checker data, per router */ |
285 | #define LNET_MAX_RTR_NIS 16 | |
286 | #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) | |
287 | typedef struct { | |
288 | /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ | |
188acc61 JS |
289 | struct list_head rcd_list; |
290 | lnet_handle_md_t rcd_mdh; /* ping buffer MD */ | |
d7e09d03 PT |
291 | struct lnet_peer *rcd_gateway; /* reference to gateway */ |
292 | lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ | |
293 | } lnet_rc_data_t; | |
294 | ||
295 | typedef struct lnet_peer { | |
188acc61 JS |
296 | struct list_head lp_hashlist; /* chain on peer hash */ |
297 | struct list_head lp_txq; /* messages blocking for | |
298 | tx credits */ | |
299 | struct list_head lp_rtrq; /* messages blocking for | |
300 | router credits */ | |
301 | struct list_head lp_rtr_list; /* chain on router list */ | |
302 | int lp_txcredits; /* # tx credits available */ | |
303 | int lp_mintxcredits; /* low water mark */ | |
304 | int lp_rtrcredits; /* # router credits */ | |
305 | int lp_minrtrcredits; /* low water mark */ | |
306 | unsigned int lp_alive:1; /* alive/dead? */ | |
307 | unsigned int lp_notify:1; /* notification outstanding? */ | |
308 | unsigned int lp_notifylnd:1;/* outstanding notification | |
309 | for LND? */ | |
310 | unsigned int lp_notifying:1; /* some thread is handling | |
311 | notification */ | |
312 | unsigned int lp_ping_notsent;/* SEND event outstanding | |
313 | from ping */ | |
314 | int lp_alive_count; /* # times router went | |
315 | dead<->alive */ | |
316 | long lp_txqnob; /* bytes queued for sending */ | |
317 | unsigned long lp_timestamp; /* time of last aliveness | |
318 | news */ | |
319 | unsigned long lp_ping_timestamp;/* time of last ping | |
320 | attempt */ | |
321 | unsigned long lp_ping_deadline; /* != 0 if ping reply | |
322 | expected */ | |
323 | unsigned long lp_last_alive; /* when I was last alive */ | |
324 | unsigned long lp_last_query; /* when lp_ni was queried | |
325 | last time */ | |
326 | lnet_ni_t *lp_ni; /* interface peer is on */ | |
327 | lnet_nid_t lp_nid; /* peer's NID */ | |
328 | int lp_refcount; /* # refs */ | |
329 | int lp_cpt; /* CPT this peer attached on */ | |
d7e09d03 | 330 | /* # refs from lnet_route_t::lr_gateway */ |
188acc61 | 331 | int lp_rtr_refcount; |
d7e09d03 | 332 | /* returned RC ping features */ |
188acc61 JS |
333 | unsigned int lp_ping_feats; |
334 | struct list_head lp_routes; /* routers on this peer */ | |
d7e09d03 PT |
335 | lnet_rc_data_t *lp_rcd; /* router checker state */ |
336 | } lnet_peer_t; | |
337 | ||
d7e09d03 | 338 | /* peer hash size */ |
188acc61 JS |
339 | #define LNET_PEER_HASH_BITS 9 |
340 | #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) | |
d7e09d03 PT |
341 | |
342 | /* peer hash table */ | |
343 | struct lnet_peer_table { | |
188acc61 JS |
344 | int pt_version; /* /proc validity stamp */ |
345 | int pt_number; /* # peers extant */ | |
346 | struct list_head pt_deathrow; /* zombie peers */ | |
347 | struct list_head *pt_hash; /* NID->peer hash */ | |
d7e09d03 PT |
348 | }; |
349 | ||
350 | /* peer aliveness is enabled only on routers for peers in a network where the | |
351 | * lnet_ni_t::ni_peertimeout has been set to a positive value */ | |
352 | #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing != 0 && \ | |
353 | (lp)->lp_ni->ni_peertimeout > 0) | |
354 | ||
355 | typedef struct { | |
188acc61 JS |
356 | struct list_head lr_list; /* chain on net */ |
357 | struct list_head lr_gwlist; /* chain on gateway */ | |
d7e09d03 | 358 | lnet_peer_t *lr_gateway; /* router node */ |
188acc61 JS |
359 | __u32 lr_net; /* remote network number */ |
360 | int lr_seq; /* sequence for round-robin */ | |
361 | unsigned int lr_downis; /* number of down NIs */ | |
362 | unsigned int lr_hops; /* how far I am */ | |
363 | unsigned int lr_priority; /* route priority */ | |
d7e09d03 PT |
364 | } lnet_route_t; |
365 | ||
366 | #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) | |
367 | #define LNET_REMOTE_NETS_HASH_MAX (1U << 16) | |
368 | #define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) | |
369 | ||
370 | typedef struct { | |
188acc61 JS |
371 | struct list_head lrn_list; /* chain on |
372 | ln_remote_nets_hash */ | |
373 | struct list_head lrn_routes; /* routes to me */ | |
374 | __u32 lrn_net; /* my net number */ | |
d7e09d03 PT |
375 | } lnet_remotenet_t; |
376 | ||
db18b8e9 JS |
377 | /** lnet message has credit and can be submitted to lnd for send/receive */ |
378 | #define LNET_CREDIT_OK 0 | |
379 | /** lnet message is waiting for credit */ | |
380 | #define LNET_CREDIT_WAIT 1 | |
381 | ||
d7e09d03 | 382 | typedef struct { |
188acc61 JS |
383 | struct list_head rbp_bufs; /* my free buffer pool */ |
384 | struct list_head rbp_msgs; /* messages blocking | |
385 | for a buffer */ | |
386 | int rbp_npages; /* # pages in each buffer */ | |
387 | int rbp_nbuffers; /* # buffers */ | |
388 | int rbp_credits; /* # free buffers / | |
389 | blocked messages */ | |
390 | int rbp_mincredits; /* low water mark */ | |
d7e09d03 PT |
391 | } lnet_rtrbufpool_t; |
392 | ||
393 | typedef struct { | |
188acc61 JS |
394 | struct list_head rb_list; /* chain on rbp_bufs */ |
395 | lnet_rtrbufpool_t *rb_pool; /* owning pool */ | |
396 | lnet_kiov_t rb_kiov[0]; /* the buffer space */ | |
d7e09d03 PT |
397 | } lnet_rtrbuf_t; |
398 | ||
188acc61 | 399 | #define LNET_PEER_HASHSIZE 503 /* prime! */ |
d7e09d03 | 400 | |
db18b8e9 | 401 | #define LNET_NRBPOOLS 3 /* # different router buffer pools */ |
d7e09d03 PT |
402 | |
403 | enum { | |
404 | /* Didn't match anything */ | |
405 | LNET_MATCHMD_NONE = (1 << 0), | |
406 | /* Matched OK */ | |
407 | LNET_MATCHMD_OK = (1 << 1), | |
408 | /* Must be discarded */ | |
409 | LNET_MATCHMD_DROP = (1 << 2), | |
410 | /* match and buffer is exhausted */ | |
188acc61 | 411 | LNET_MATCHMD_EXHAUSTED = (1 << 3), |
d7e09d03 | 412 | /* match or drop */ |
188acc61 | 413 | LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), |
d7e09d03 PT |
414 | }; |
415 | ||
416 | /* Options for lnet_portal_t::ptl_options */ | |
188acc61 JS |
417 | #define LNET_PTL_LAZY (1 << 0) |
418 | #define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ | |
419 | #define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, | |
420 | request portal */ | |
d7e09d03 PT |
421 | |
422 | /* parameter for matching operations (GET, PUT) */ | |
423 | struct lnet_match_info { | |
424 | __u64 mi_mbits; | |
425 | lnet_process_id_t mi_id; | |
426 | unsigned int mi_opc; | |
427 | unsigned int mi_portal; | |
428 | unsigned int mi_rlength; | |
429 | unsigned int mi_roffset; | |
430 | }; | |
431 | ||
432 | /* ME hash of RDMA portal */ | |
433 | #define LNET_MT_HASH_BITS 8 | |
434 | #define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) | |
435 | #define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) | |
436 | /* we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, | |
437 | * the last entry is reserved for MEs with ignore-bits */ | |
438 | #define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE | |
439 | /* __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which | |
440 | * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the | |
441 | * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] */ | |
442 | #define LNET_MT_BITS_U64 6 /* 2^6 bits */ | |
443 | #define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) | |
444 | #define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) | |
445 | ||
446 | /* portal match table */ | |
447 | struct lnet_match_table { | |
448 | /* reserved for upcoming patches, CPU partition ID */ | |
188acc61 JS |
449 | unsigned int mt_cpt; |
450 | unsigned int mt_portal; /* portal index */ | |
d7e09d03 | 451 | /* match table is set as "enabled" if there's non-exhausted MD |
d766b4b5 | 452 | * attached on mt_mhash, it's only valid for wildcard portal */ |
188acc61 | 453 | unsigned int mt_enabled; |
d7e09d03 | 454 | /* bitmap to flag whether MEs on mt_hash are exhausted or not */ |
188acc61 JS |
455 | __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; |
456 | struct list_head *mt_mhash; /* matching hash */ | |
d7e09d03 PT |
457 | }; |
458 | ||
459 | /* these are only useful for wildcard portal */ | |
460 | /* Turn off message rotor for wildcard portals */ | |
461 | #define LNET_PTL_ROTOR_OFF 0 | |
462 | /* round-robin dispatch all PUT messages for wildcard portals */ | |
463 | #define LNET_PTL_ROTOR_ON 1 | |
464 | /* round-robin dispatch routed PUT message for wildcard portals */ | |
465 | #define LNET_PTL_ROTOR_RR_RT 2 | |
466 | /* dispatch routed PUT message by hashing source NID for wildcard portals */ | |
467 | #define LNET_PTL_ROTOR_HASH_RT 3 | |
468 | ||
469 | typedef struct lnet_portal { | |
188acc61 JS |
470 | spinlock_t ptl_lock; |
471 | unsigned int ptl_index; /* portal ID, reserved */ | |
d7e09d03 | 472 | /* flags on this portal: lazy, unique... */ |
188acc61 | 473 | unsigned int ptl_options; |
2b284326 | 474 | /* list of messages which are stealing buffer */ |
188acc61 | 475 | struct list_head ptl_msg_stealing; |
d7e09d03 | 476 | /* messages blocking for MD */ |
188acc61 | 477 | struct list_head ptl_msg_delayed; |
d7e09d03 PT |
478 | /* Match table for each CPT */ |
479 | struct lnet_match_table **ptl_mtables; | |
480 | /* spread rotor of incoming "PUT" */ | |
188acc61 | 481 | unsigned int ptl_rotor; |
d7e09d03 | 482 | /* # active entries for this portal */ |
188acc61 | 483 | int ptl_mt_nmaps; |
d7e09d03 | 484 | /* array of active entries' cpu-partition-id */ |
188acc61 | 485 | int ptl_mt_maps[0]; |
d7e09d03 PT |
486 | } lnet_portal_t; |
487 | ||
488 | #define LNET_LH_HASH_BITS 12 | |
489 | #define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) | |
490 | #define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) | |
491 | ||
492 | /* resource container (ME, MD, EQ) */ | |
493 | struct lnet_res_container { | |
188acc61 JS |
494 | unsigned int rec_type; /* container type */ |
495 | __u64 rec_lh_cookie; /* cookie generator */ | |
496 | struct list_head rec_active; /* active resource list */ | |
497 | struct list_head *rec_lh_hash; /* handle hash */ | |
d7e09d03 PT |
498 | }; |
499 | ||
500 | /* message container */ | |
501 | struct lnet_msg_container { | |
188acc61 | 502 | int msc_init; /* initialized or not */ |
d7e09d03 | 503 | /* max # threads finalizing */ |
188acc61 | 504 | int msc_nfinalizers; |
d7e09d03 | 505 | /* msgs waiting to complete finalizing */ |
188acc61 JS |
506 | struct list_head msc_finalizing; |
507 | struct list_head msc_active; /* active message list */ | |
d7e09d03 PT |
508 | /* threads doing finalization */ |
509 | void **msc_finalizers; | |
d7e09d03 PT |
510 | }; |
511 | ||
512 | /* Router Checker states */ | |
513 | #define LNET_RC_STATE_SHUTDOWN 0 /* not started */ | |
514 | #define LNET_RC_STATE_RUNNING 1 /* started up OK */ | |
515 | #define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ | |
516 | ||
3b77f472 | 517 | typedef struct { |
d7e09d03 | 518 | /* CPU partition table of LNet */ |
188acc61 | 519 | struct cfs_cpt_table *ln_cpt_table; |
d7e09d03 | 520 | /* number of CPTs in ln_cpt_table */ |
188acc61 JS |
521 | unsigned int ln_cpt_number; |
522 | unsigned int ln_cpt_bits; | |
d7e09d03 PT |
523 | |
524 | /* protect LNet resources (ME/MD/EQ) */ | |
188acc61 | 525 | struct cfs_percpt_lock *ln_res_lock; |
d7e09d03 | 526 | /* # portals */ |
188acc61 | 527 | int ln_nportals; |
d7e09d03 PT |
528 | /* the vector of portals */ |
529 | lnet_portal_t **ln_portals; | |
530 | /* percpt ME containers */ | |
531 | struct lnet_res_container **ln_me_containers; | |
532 | /* percpt MD container */ | |
533 | struct lnet_res_container **ln_md_containers; | |
534 | ||
535 | /* Event Queue container */ | |
188acc61 JS |
536 | struct lnet_res_container ln_eq_container; |
537 | wait_queue_head_t ln_eq_waitq; | |
538 | spinlock_t ln_eq_wait_lock; | |
539 | unsigned int ln_remote_nets_hbits; | |
d7e09d03 PT |
540 | |
541 | /* protect NI, peer table, credits, routers, rtrbuf... */ | |
188acc61 | 542 | struct cfs_percpt_lock *ln_net_lock; |
d7e09d03 PT |
543 | /* percpt message containers for active/finalizing/freed message */ |
544 | struct lnet_msg_container **ln_msg_containers; | |
545 | lnet_counters_t **ln_counters; | |
546 | struct lnet_peer_table **ln_peer_tables; | |
547 | /* failure simulation */ | |
188acc61 | 548 | struct list_head ln_test_peers; |
d7e09d03 | 549 | |
188acc61 | 550 | struct list_head ln_nis; /* LND instances */ |
d7e09d03 | 551 | /* NIs bond on specific CPT(s) */ |
188acc61 | 552 | struct list_head ln_nis_cpt; |
d7e09d03 | 553 | /* dying LND instances */ |
188acc61 JS |
554 | struct list_head ln_nis_zombie; |
555 | lnet_ni_t *ln_loni; /* the loopback NI */ | |
d7e09d03 | 556 | /* NI to wait for events in */ |
188acc61 | 557 | lnet_ni_t *ln_eq_waitni; |
d7e09d03 PT |
558 | |
559 | /* remote networks with routes to them */ | |
188acc61 | 560 | struct list_head *ln_remote_nets_hash; |
d7e09d03 | 561 | /* validity stamp */ |
188acc61 | 562 | __u64 ln_remote_nets_version; |
d7e09d03 | 563 | /* list of all known routers */ |
188acc61 | 564 | struct list_head ln_routers; |
d7e09d03 | 565 | /* validity stamp */ |
188acc61 | 566 | __u64 ln_routers_version; |
d7e09d03 PT |
567 | /* percpt router buffer pools */ |
568 | lnet_rtrbufpool_t **ln_rtrpools; | |
569 | ||
188acc61 JS |
570 | lnet_handle_md_t ln_ping_target_md; |
571 | lnet_handle_eq_t ln_ping_target_eq; | |
572 | lnet_ping_info_t *ln_ping_info; | |
d7e09d03 PT |
573 | |
574 | /* router checker startup/shutdown state */ | |
188acc61 | 575 | int ln_rc_state; |
d7e09d03 | 576 | /* router checker's event queue */ |
188acc61 | 577 | lnet_handle_eq_t ln_rc_eqh; |
d7e09d03 | 578 | /* rcd still pending on net */ |
188acc61 | 579 | struct list_head ln_rcd_deathrow; |
d7e09d03 | 580 | /* rcd ready for free */ |
188acc61 | 581 | struct list_head ln_rcd_zombie; |
d7e09d03 | 582 | /* serialise startup/shutdown */ |
188acc61 | 583 | struct semaphore ln_rc_signal; |
d7e09d03 | 584 | |
188acc61 JS |
585 | struct mutex ln_api_mutex; |
586 | struct mutex ln_lnd_mutex; | |
587 | int ln_init; /* lnet_init() | |
588 | called? */ | |
d7e09d03 | 589 | /* Have I called LNetNIInit myself? */ |
188acc61 | 590 | int ln_niinit_self; |
d7e09d03 | 591 | /* LNetNIInit/LNetNIFini counter */ |
188acc61 | 592 | int ln_refcount; |
d7e09d03 | 593 | /* shutdown in progress */ |
188acc61 | 594 | int ln_shutdown; |
d7e09d03 | 595 | |
188acc61 JS |
596 | int ln_routing; /* am I a router? */ |
597 | lnet_pid_t ln_pid; /* requested pid */ | |
d7e09d03 | 598 | /* uniquely identifies this ni in this epoch */ |
188acc61 | 599 | __u64 ln_interface_cookie; |
d7e09d03 | 600 | /* registered LNDs */ |
188acc61 | 601 | struct list_head ln_lnds; |
d7e09d03 PT |
602 | |
603 | /* space for network names */ | |
188acc61 JS |
604 | char *ln_network_tokens; |
605 | int ln_network_tokens_nob; | |
d7e09d03 | 606 | /* test protocol compatibility flags */ |
188acc61 | 607 | int ln_testprotocompat; |
d7e09d03 PT |
608 | |
609 | } lnet_t; | |
610 | ||
611 | #endif |