Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lnet/lnet/lib-md.c | |
37 | * | |
38 | * Memory Descriptor management routines | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_LNET | |
42 | ||
9fdaf8c0 | 43 | #include "../../include/linux/lnet/lib-lnet.h" |
d7e09d03 PT |
44 | |
45 | /* must be called with lnet_res_lock held */ | |
46 | void | |
47 | lnet_md_unlink(lnet_libmd_t *md) | |
48 | { | |
5fd88337 | 49 | if (!(md->md_flags & LNET_MD_FLAG_ZOMBIE)) { |
d7e09d03 PT |
50 | /* first unlink attempt... */ |
51 | lnet_me_t *me = md->md_me; | |
52 | ||
53 | md->md_flags |= LNET_MD_FLAG_ZOMBIE; | |
54 | ||
4420cfd3 JS |
55 | /* |
56 | * Disassociate from ME (if any), | |
242c7b52 | 57 | * and unlink it if it was created |
4420cfd3 JS |
58 | * with LNET_UNLINK |
59 | */ | |
06ace26e | 60 | if (me) { |
d7e09d03 PT |
61 | /* detach MD from portal */ |
62 | lnet_ptl_detach_md(me, md); | |
63 | if (me->me_unlink == LNET_UNLINK) | |
64 | lnet_me_unlink(me); | |
65 | } | |
66 | ||
67 | /* ensure all future handle lookups fail */ | |
68 | lnet_res_lh_invalidate(&md->md_lh); | |
69 | } | |
70 | ||
5fd88337 | 71 | if (md->md_refcount) { |
d7e09d03 PT |
72 | CDEBUG(D_NET, "Queueing unlink of md %p\n", md); |
73 | return; | |
74 | } | |
75 | ||
76 | CDEBUG(D_NET, "Unlinking md %p\n", md); | |
77 | ||
06ace26e | 78 | if (md->md_eq) { |
7e7ab095 | 79 | int cpt = lnet_cpt_of_cookie(md->md_lh.lh_cookie); |
d7e09d03 PT |
80 | |
81 | LASSERT(*md->md_eq->eq_refs[cpt] > 0); | |
82 | (*md->md_eq->eq_refs[cpt])--; | |
83 | } | |
84 | ||
85 | LASSERT(!list_empty(&md->md_list)); | |
86 | list_del_init(&md->md_list); | |
d9c90615 | 87 | lnet_md_free(md); |
d7e09d03 PT |
88 | } |
89 | ||
90 | static int | |
91 | lnet_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) | |
92 | { | |
7e7ab095 | 93 | int i; |
d7e09d03 | 94 | unsigned int niov; |
7e7ab095 | 95 | int total_length = 0; |
d7e09d03 PT |
96 | |
97 | lmd->md_me = NULL; | |
98 | lmd->md_start = umd->start; | |
99 | lmd->md_offset = 0; | |
100 | lmd->md_max_size = umd->max_size; | |
101 | lmd->md_options = umd->options; | |
102 | lmd->md_user_ptr = umd->user_ptr; | |
103 | lmd->md_eq = NULL; | |
104 | lmd->md_threshold = umd->threshold; | |
105 | lmd->md_refcount = 0; | |
106 | lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; | |
107 | ||
5fd88337 JS |
108 | if (umd->options & LNET_MD_IOVEC) { |
109 | if (umd->options & LNET_MD_KIOV) /* Can't specify both */ | |
d7e09d03 PT |
110 | return -EINVAL; |
111 | ||
d3d3d37a JS |
112 | niov = umd->length; |
113 | lmd->md_niov = umd->length; | |
d7e09d03 | 114 | memcpy(lmd->md_iov.iov, umd->start, |
fc8b040d | 115 | niov * sizeof(lmd->md_iov.iov[0])); |
d7e09d03 PT |
116 | |
117 | for (i = 0; i < (int)niov; i++) { | |
118 | /* We take the base address on trust */ | |
242c7b52 JL |
119 | /* invalid length */ |
120 | if (lmd->md_iov.iov[i].iov_len <= 0) | |
d7e09d03 PT |
121 | return -EINVAL; |
122 | ||
123 | total_length += lmd->md_iov.iov[i].iov_len; | |
124 | } | |
125 | ||
126 | lmd->md_length = total_length; | |
127 | ||
5fd88337 | 128 | if ((umd->options & LNET_MD_MAX_SIZE) && /* use max size */ |
d7e09d03 | 129 | (umd->max_size < 0 || |
be82d9b2 | 130 | umd->max_size > total_length)) /* illegal max_size */ |
d7e09d03 PT |
131 | return -EINVAL; |
132 | ||
5fd88337 | 133 | } else if (umd->options & LNET_MD_KIOV) { |
d3d3d37a JS |
134 | niov = umd->length; |
135 | lmd->md_niov = umd->length; | |
d7e09d03 | 136 | memcpy(lmd->md_iov.kiov, umd->start, |
fc8b040d | 137 | niov * sizeof(lmd->md_iov.kiov[0])); |
d7e09d03 PT |
138 | |
139 | for (i = 0; i < (int)niov; i++) { | |
140 | /* We take the page pointer on trust */ | |
141 | if (lmd->md_iov.kiov[i].kiov_offset + | |
09cbfeaf | 142 | lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE) |
d7e09d03 PT |
143 | return -EINVAL; /* invalid length */ |
144 | ||
145 | total_length += lmd->md_iov.kiov[i].kiov_len; | |
146 | } | |
147 | ||
148 | lmd->md_length = total_length; | |
149 | ||
5fd88337 | 150 | if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ |
d7e09d03 | 151 | (umd->max_size < 0 || |
be82d9b2 | 152 | umd->max_size > total_length)) /* illegal max_size */ |
d7e09d03 PT |
153 | return -EINVAL; |
154 | } else { /* contiguous */ | |
155 | lmd->md_length = umd->length; | |
d3d3d37a JS |
156 | niov = 1; |
157 | lmd->md_niov = 1; | |
d7e09d03 PT |
158 | lmd->md_iov.iov[0].iov_base = umd->start; |
159 | lmd->md_iov.iov[0].iov_len = umd->length; | |
160 | ||
5fd88337 | 161 | if ((umd->options & LNET_MD_MAX_SIZE) && /* max size used */ |
d7e09d03 | 162 | (umd->max_size < 0 || |
be82d9b2 | 163 | umd->max_size > (int)umd->length)) /* illegal max_size */ |
d7e09d03 PT |
164 | return -EINVAL; |
165 | } | |
166 | ||
167 | return 0; | |
168 | } | |
169 | ||
170 | /* must be called with resource lock held */ | |
171 | static int | |
172 | lnet_md_link(lnet_libmd_t *md, lnet_handle_eq_t eq_handle, int cpt) | |
173 | { | |
174 | struct lnet_res_container *container = the_lnet.ln_md_containers[cpt]; | |
175 | ||
4420cfd3 JS |
176 | /* |
177 | * NB we are passed an allocated, but inactive md. | |
d7e09d03 PT |
178 | * if we return success, caller may lnet_md_unlink() it. |
179 | * otherwise caller may only lnet_md_free() it. | |
180 | */ | |
4420cfd3 JS |
181 | /* |
182 | * This implementation doesn't know how to create START events or | |
d7e09d03 | 183 | * disable END events. Best to LASSERT our caller is compliant so |
4420cfd3 JS |
184 | * we find out quickly... |
185 | */ | |
186 | /* | |
187 | * TODO - reevaluate what should be here in light of | |
d7e09d03 PT |
188 | * the removal of the start and end events |
189 | * maybe there we shouldn't even allow LNET_EQ_NONE!) | |
06ace26e | 190 | * LASSERT(!eq); |
d7e09d03 PT |
191 | */ |
192 | if (!LNetHandleIsInvalid(eq_handle)) { | |
193 | md->md_eq = lnet_handle2eq(&eq_handle); | |
194 | ||
1f01063f | 195 | if (!md->md_eq) |
d7e09d03 PT |
196 | return -ENOENT; |
197 | ||
198 | (*md->md_eq->eq_refs[cpt])++; | |
199 | } | |
200 | ||
201 | lnet_res_lh_initialize(container, &md->md_lh); | |
202 | ||
203 | LASSERT(list_empty(&md->md_list)); | |
204 | list_add(&md->md_list, &container->rec_active); | |
205 | ||
206 | return 0; | |
207 | } | |
208 | ||
209 | /* must be called with lnet_res_lock held */ | |
210 | void | |
211 | lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) | |
212 | { | |
213 | /* NB this doesn't copy out all the iov entries so when a | |
214 | * discontiguous MD is copied out, the target gets to know the | |
215 | * original iov pointer (in start) and the number of entries it had | |
216 | * and that's all. | |
217 | */ | |
218 | umd->start = lmd->md_start; | |
5fd88337 JS |
219 | umd->length = !(lmd->md_options & |
220 | (LNET_MD_IOVEC | LNET_MD_KIOV)) ? | |
d7e09d03 PT |
221 | lmd->md_length : lmd->md_niov; |
222 | umd->threshold = lmd->md_threshold; | |
223 | umd->max_size = lmd->md_max_size; | |
224 | umd->options = lmd->md_options; | |
225 | umd->user_ptr = lmd->md_user_ptr; | |
226 | lnet_eq2handle(&umd->eq_handle, lmd->md_eq); | |
227 | } | |
228 | ||
f526b20a | 229 | static int |
d7e09d03 PT |
230 | lnet_md_validate(lnet_md_t *umd) |
231 | { | |
5fd88337 | 232 | if (!umd->start && umd->length) { |
d7e09d03 PT |
233 | CERROR("MD start pointer can not be NULL with length %u\n", |
234 | umd->length); | |
235 | return -EINVAL; | |
236 | } | |
237 | ||
5fd88337 | 238 | if ((umd->options & (LNET_MD_KIOV | LNET_MD_IOVEC)) && |
d7e09d03 PT |
239 | umd->length > LNET_MAX_IOV) { |
240 | CERROR("Invalid option: too many fragments %u, %d max\n", | |
241 | umd->length, LNET_MAX_IOV); | |
242 | return -EINVAL; | |
243 | } | |
244 | ||
245 | return 0; | |
246 | } | |
247 | ||
248 | /** | |
249 | * Create a memory descriptor and attach it to a ME | |
250 | * | |
251 | * \param meh A handle for a ME to associate the new MD with. | |
252 | * \param umd Provides initial values for the user-visible parts of a MD. | |
253 | * Other than its use for initialization, there is no linkage between this | |
254 | * structure and the MD maintained by the LNet. | |
255 | * \param unlink A flag to indicate whether the MD is automatically unlinked | |
256 | * when it becomes inactive, either because the operation threshold drops to | |
257 | * zero or because the available memory becomes less than \a umd.max_size. | |
258 | * (Note that the check for unlinking a MD only occurs after the completion | |
259 | * of a successful operation on the MD.) The value LNET_UNLINK enables auto | |
260 | * unlinking; the value LNET_RETAIN disables it. | |
261 | * \param handle On successful returns, a handle to the newly created MD is | |
262 | * saved here. This handle can be used later in LNetMDUnlink(). | |
263 | * | |
264 | * \retval 0 On success. | |
265 | * \retval -EINVAL If \a umd is not valid. | |
266 | * \retval -ENOMEM If new MD cannot be allocated. | |
267 | * \retval -ENOENT Either \a meh or \a umd.eq_handle does not point to a | |
268 | * valid object. Note that it's OK to supply a NULL \a umd.eq_handle by | |
269 | * calling LNetInvalidateHandle() on it. | |
270 | * \retval -EBUSY If the ME pointed to by \a meh is already associated with | |
271 | * a MD. | |
272 | */ | |
273 | int | |
274 | LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, | |
275 | lnet_unlink_t unlink, lnet_handle_md_t *handle) | |
276 | { | |
fc8b040d JL |
277 | LIST_HEAD(matches); |
278 | LIST_HEAD(drops); | |
7e7ab095 MS |
279 | struct lnet_me *me; |
280 | struct lnet_libmd *md; | |
281 | int cpt; | |
282 | int rc; | |
d7e09d03 | 283 | |
fc8b040d | 284 | LASSERT(the_lnet.ln_refcount > 0); |
d7e09d03 | 285 | |
5fd88337 | 286 | if (lnet_md_validate(&umd)) |
d7e09d03 PT |
287 | return -EINVAL; |
288 | ||
5fd88337 | 289 | if (!(umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { |
d7e09d03 PT |
290 | CERROR("Invalid option: no MD_OP set\n"); |
291 | return -EINVAL; | |
292 | } | |
293 | ||
294 | md = lnet_md_alloc(&umd); | |
1f01063f | 295 | if (!md) |
d7e09d03 PT |
296 | return -ENOMEM; |
297 | ||
298 | rc = lnet_md_build(md, &umd, unlink); | |
299 | cpt = lnet_cpt_of_cookie(meh.cookie); | |
300 | ||
301 | lnet_res_lock(cpt); | |
5fd88337 | 302 | if (rc) |
d7e09d03 PT |
303 | goto failed; |
304 | ||
305 | me = lnet_handle2me(&meh); | |
1f01063f | 306 | if (!me) |
d7e09d03 | 307 | rc = -ENOENT; |
06ace26e | 308 | else if (me->me_md) |
d7e09d03 PT |
309 | rc = -EBUSY; |
310 | else | |
311 | rc = lnet_md_link(md, umd.eq_handle, cpt); | |
312 | ||
5fd88337 | 313 | if (rc) |
d7e09d03 PT |
314 | goto failed; |
315 | ||
4420cfd3 JS |
316 | /* |
317 | * attach this MD to portal of ME and check if it matches any | |
318 | * blocked msgs on this portal | |
319 | */ | |
d7e09d03 PT |
320 | lnet_ptl_attach_md(me, md, &matches, &drops); |
321 | ||
322 | lnet_md2handle(handle, md); | |
323 | ||
324 | lnet_res_unlock(cpt); | |
325 | ||
326 | lnet_drop_delayed_msg_list(&drops, "Bad match"); | |
327 | lnet_recv_delayed_msg_list(&matches); | |
328 | ||
329 | return 0; | |
330 | ||
331 | failed: | |
d9c90615 | 332 | lnet_md_free(md); |
d7e09d03 PT |
333 | |
334 | lnet_res_unlock(cpt); | |
335 | return rc; | |
336 | } | |
337 | EXPORT_SYMBOL(LNetMDAttach); | |
338 | ||
339 | /** | |
340 | * Create a "free floating" memory descriptor - a MD that is not associated | |
341 | * with a ME. Such MDs are usually used in LNetPut() and LNetGet() operations. | |
342 | * | |
343 | * \param umd,unlink See the discussion for LNetMDAttach(). | |
344 | * \param handle On successful returns, a handle to the newly created MD is | |
345 | * saved here. This handle can be used later in LNetMDUnlink(), LNetPut(), | |
346 | * and LNetGet() operations. | |
347 | * | |
348 | * \retval 0 On success. | |
349 | * \retval -EINVAL If \a umd is not valid. | |
350 | * \retval -ENOMEM If new MD cannot be allocated. | |
351 | * \retval -ENOENT \a umd.eq_handle does not point to a valid EQ. Note that | |
352 | * it's OK to supply a NULL \a umd.eq_handle by calling | |
353 | * LNetInvalidateHandle() on it. | |
354 | */ | |
355 | int | |
356 | LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) | |
357 | { | |
7e7ab095 MS |
358 | lnet_libmd_t *md; |
359 | int cpt; | |
360 | int rc; | |
d7e09d03 | 361 | |
fc8b040d | 362 | LASSERT(the_lnet.ln_refcount > 0); |
d7e09d03 | 363 | |
5fd88337 | 364 | if (lnet_md_validate(&umd)) |
d7e09d03 PT |
365 | return -EINVAL; |
366 | ||
5fd88337 | 367 | if ((umd.options & (LNET_MD_OP_GET | LNET_MD_OP_PUT))) { |
d7e09d03 PT |
368 | CERROR("Invalid option: GET|PUT illegal on active MDs\n"); |
369 | return -EINVAL; | |
370 | } | |
371 | ||
372 | md = lnet_md_alloc(&umd); | |
1f01063f | 373 | if (!md) |
d7e09d03 PT |
374 | return -ENOMEM; |
375 | ||
376 | rc = lnet_md_build(md, &umd, unlink); | |
377 | ||
378 | cpt = lnet_res_lock_current(); | |
5fd88337 | 379 | if (rc) |
d7e09d03 PT |
380 | goto failed; |
381 | ||
382 | rc = lnet_md_link(md, umd.eq_handle, cpt); | |
5fd88337 | 383 | if (rc) |
d7e09d03 PT |
384 | goto failed; |
385 | ||
386 | lnet_md2handle(handle, md); | |
387 | ||
388 | lnet_res_unlock(cpt); | |
389 | return 0; | |
390 | ||
391 | failed: | |
d9c90615 | 392 | lnet_md_free(md); |
d7e09d03 PT |
393 | |
394 | lnet_res_unlock(cpt); | |
395 | return rc; | |
396 | } | |
397 | EXPORT_SYMBOL(LNetMDBind); | |
398 | ||
399 | /** | |
400 | * Unlink the memory descriptor from any ME it may be linked to and release | |
dee2857e IH |
401 | * the internal resources associated with it. As a result, active messages |
402 | * associated with the MD may get aborted. | |
d7e09d03 PT |
403 | * |
404 | * This function does not free the memory region associated with the MD; | |
405 | * i.e., the memory the user allocated for this MD. If the ME associated with | |
406 | * this MD is not NULL and was created with auto unlink enabled, the ME is | |
407 | * unlinked as well (see LNetMEAttach()). | |
408 | * | |
409 | * Explicitly unlinking a MD via this function call has the same behavior as | |
410 | * a MD that has been automatically unlinked, except that no LNET_EVENT_UNLINK | |
411 | * is generated in the latter case. | |
412 | * | |
413 | * An unlinked event can be reported in two ways: | |
414 | * - If there's no pending operations on the MD, it's unlinked immediately | |
415 | * and an LNET_EVENT_UNLINK event is logged before this function returns. | |
416 | * - Otherwise, the MD is only marked for deletion when this function | |
417 | * returns, and the unlinked event will be piggybacked on the event of | |
418 | * the completion of the last operation by setting the unlinked field of | |
419 | * the event. No dedicated LNET_EVENT_UNLINK event is generated. | |
420 | * | |
421 | * Note that in both cases the unlinked field of the event is always set; no | |
422 | * more event will happen on the MD after such an event is logged. | |
423 | * | |
424 | * \param mdh A handle for the MD to be unlinked. | |
425 | * | |
426 | * \retval 0 On success. | |
427 | * \retval -ENOENT If \a mdh does not point to a valid MD object. | |
428 | */ | |
429 | int | |
fc8b040d | 430 | LNetMDUnlink(lnet_handle_md_t mdh) |
d7e09d03 | 431 | { |
7e7ab095 MS |
432 | lnet_event_t ev; |
433 | lnet_libmd_t *md; | |
434 | int cpt; | |
d7e09d03 | 435 | |
d7e09d03 PT |
436 | LASSERT(the_lnet.ln_refcount > 0); |
437 | ||
438 | cpt = lnet_cpt_of_cookie(mdh.cookie); | |
439 | lnet_res_lock(cpt); | |
440 | ||
441 | md = lnet_handle2md(&mdh); | |
1f01063f | 442 | if (!md) { |
d7e09d03 PT |
443 | lnet_res_unlock(cpt); |
444 | return -ENOENT; | |
445 | } | |
446 | ||
dee2857e | 447 | md->md_flags |= LNET_MD_FLAG_ABORTED; |
4420cfd3 JS |
448 | /* |
449 | * If the MD is busy, lnet_md_unlink just marks it for deletion, and | |
dee2857e | 450 | * when the LND is done, the completion event flags that the MD was |
4420cfd3 JS |
451 | * unlinked. Otherwise, we enqueue an event now... |
452 | */ | |
5fd88337 | 453 | if (md->md_eq && !md->md_refcount) { |
d7e09d03 PT |
454 | lnet_build_unlink_event(md, &ev); |
455 | lnet_eq_enqueue_event(md->md_eq, &ev); | |
456 | } | |
457 | ||
458 | lnet_md_unlink(md); | |
459 | ||
460 | lnet_res_unlock(cpt); | |
461 | return 0; | |
462 | } | |
463 | EXPORT_SYMBOL(LNetMDUnlink); |