Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
1dc563a6 | 30 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
31 | */ |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * Implementation of cl_lock for OSC layer. | |
37 | * | |
38 | * Author: Nikita Danilov <nikita.danilov@sun.com> | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_OSC | |
42 | ||
9fdaf8c0 | 43 | #include "../../include/linux/libcfs/libcfs.h" |
d7e09d03 | 44 | /* fid_build_reg_res_name() */ |
3ee30015 | 45 | #include "../include/lustre_fid.h" |
d7e09d03 PT |
46 | |
47 | #include "osc_cl_internal.h" | |
48 | ||
49 | /** \addtogroup osc | |
50 | * @{ | |
51 | */ | |
52 | ||
53 | #define _PAGEREF_MAGIC (-10000000) | |
54 | ||
55 | /***************************************************************************** | |
56 | * | |
57 | * Type conversions. | |
58 | * | |
59 | */ | |
60 | ||
61 | static const struct cl_lock_operations osc_lock_ops; | |
62 | static const struct cl_lock_operations osc_lock_lockless_ops; | |
63 | static void osc_lock_to_lockless(const struct lu_env *env, | |
64 | struct osc_lock *ols, int force); | |
65 | static int osc_lock_has_pages(struct osc_lock *olck); | |
66 | ||
67 | int osc_lock_is_lockless(const struct osc_lock *olck) | |
68 | { | |
69 | return (olck->ols_cl.cls_ops == &osc_lock_lockless_ops); | |
70 | } | |
71 | ||
72 | /** | |
73 | * Returns a weak pointer to the ldlm lock identified by a handle. Returned | |
74 | * pointer cannot be dereferenced, as lock is not protected from concurrent | |
75 | * reclaim. This function is a helper for osc_lock_invariant(). | |
76 | */ | |
77 | static struct ldlm_lock *osc_handle_ptr(struct lustre_handle *handle) | |
78 | { | |
79 | struct ldlm_lock *lock; | |
80 | ||
81 | lock = ldlm_handle2lock(handle); | |
7f1ae4c0 | 82 | if (lock) |
d7e09d03 PT |
83 | LDLM_LOCK_PUT(lock); |
84 | return lock; | |
85 | } | |
86 | ||
87 | /** | |
88 | * Invariant that has to be true all of the time. | |
89 | */ | |
90 | static int osc_lock_invariant(struct osc_lock *ols) | |
91 | { | |
29ac6840 CH |
92 | struct ldlm_lock *lock = osc_handle_ptr(&ols->ols_handle); |
93 | struct ldlm_lock *olock = ols->ols_lock; | |
94 | int handle_used = lustre_handle_is_used(&ols->ols_handle); | |
f2145eae BK |
95 | |
96 | if (ergo(osc_lock_is_lockless(ols), | |
7f1ae4c0 | 97 | ols->ols_locklessable && !ols->ols_lock)) |
f2145eae BK |
98 | return 1; |
99 | ||
100 | /* | |
101 | * If all the following "ergo"s are true, return 1, otherwise 0 | |
102 | */ | |
7f1ae4c0 | 103 | if (!ergo(olock, handle_used)) |
f2145eae BK |
104 | return 0; |
105 | ||
7f1ae4c0 | 106 | if (!ergo(olock, olock->l_handle.h_cookie == ols->ols_handle.cookie)) |
f2145eae BK |
107 | return 0; |
108 | ||
c5ce36f6 | 109 | if (!ergo(handle_used, |
7f1ae4c0 OD |
110 | ergo(lock && olock, lock == olock) && |
111 | ergo(!lock, !olock))) | |
f2145eae BK |
112 | return 0; |
113 | /* | |
114 | * Check that ->ols_handle and ->ols_lock are consistent, but | |
115 | * take into account that they are set at the different time. | |
116 | */ | |
c5ce36f6 | 117 | if (!ergo(ols->ols_state == OLS_CANCELLED, |
7f1ae4c0 | 118 | !olock && !handle_used)) |
f2145eae BK |
119 | return 0; |
120 | /* | |
121 | * DLM lock is destroyed only after we have seen cancellation | |
122 | * ast. | |
123 | */ | |
7f1ae4c0 OD |
124 | if (!ergo(olock && ols->ols_state < OLS_CANCELLED, |
125 | ((olock->l_flags & LDLM_FL_DESTROYED) == 0))) | |
f2145eae BK |
126 | return 0; |
127 | ||
c5ce36f6 | 128 | if (!ergo(ols->ols_state == OLS_GRANTED, |
7f1ae4c0 OD |
129 | olock && olock->l_req_mode == olock->l_granted_mode && |
130 | ols->ols_hold)) | |
f2145eae BK |
131 | return 0; |
132 | return 1; | |
d7e09d03 PT |
133 | } |
134 | ||
135 | /***************************************************************************** | |
136 | * | |
137 | * Lock operations. | |
138 | * | |
139 | */ | |
140 | ||
141 | /** | |
142 | * Breaks a link between osc_lock and dlm_lock. | |
143 | */ | |
144 | static void osc_lock_detach(const struct lu_env *env, struct osc_lock *olck) | |
145 | { | |
146 | struct ldlm_lock *dlmlock; | |
147 | ||
148 | spin_lock(&osc_ast_guard); | |
149 | dlmlock = olck->ols_lock; | |
7f1ae4c0 | 150 | if (!dlmlock) { |
d7e09d03 PT |
151 | spin_unlock(&osc_ast_guard); |
152 | return; | |
153 | } | |
154 | ||
155 | olck->ols_lock = NULL; | |
156 | /* wb(); --- for all who checks (ols->ols_lock != NULL) before | |
30aa9c52 OD |
157 | * call to osc_lock_detach() |
158 | */ | |
d7e09d03 PT |
159 | dlmlock->l_ast_data = NULL; |
160 | olck->ols_handle.cookie = 0ULL; | |
161 | spin_unlock(&osc_ast_guard); | |
162 | ||
163 | lock_res_and_lock(dlmlock); | |
164 | if (dlmlock->l_granted_mode == dlmlock->l_req_mode) { | |
165 | struct cl_object *obj = olck->ols_cl.cls_obj; | |
29ac6840 | 166 | struct cl_attr *attr = &osc_env_info(env)->oti_attr; |
d7e09d03 PT |
167 | __u64 old_kms; |
168 | ||
169 | cl_object_attr_lock(obj); | |
170 | /* Must get the value under the lock to avoid possible races. */ | |
171 | old_kms = cl2osc(obj)->oo_oinfo->loi_kms; | |
172 | /* Update the kms. Need to loop all granted locks. | |
30aa9c52 OD |
173 | * Not a problem for the client |
174 | */ | |
d7e09d03 PT |
175 | attr->cat_kms = ldlm_extent_shift_kms(dlmlock, old_kms); |
176 | ||
177 | cl_object_attr_set(env, obj, attr, CAT_KMS); | |
178 | cl_object_attr_unlock(obj); | |
179 | } | |
180 | unlock_res_and_lock(dlmlock); | |
181 | ||
182 | /* release a reference taken in osc_lock_upcall0(). */ | |
183 | LASSERT(olck->ols_has_ref); | |
184 | lu_ref_del(&dlmlock->l_reference, "osc_lock", olck); | |
185 | LDLM_LOCK_RELEASE(dlmlock); | |
186 | olck->ols_has_ref = 0; | |
187 | } | |
188 | ||
189 | static int osc_lock_unhold(struct osc_lock *ols) | |
190 | { | |
191 | int result = 0; | |
192 | ||
193 | if (ols->ols_hold) { | |
194 | ols->ols_hold = 0; | |
195 | result = osc_cancel_base(&ols->ols_handle, | |
196 | ols->ols_einfo.ei_mode); | |
197 | } | |
198 | return result; | |
199 | } | |
200 | ||
201 | static int osc_lock_unuse(const struct lu_env *env, | |
202 | const struct cl_lock_slice *slice) | |
203 | { | |
204 | struct osc_lock *ols = cl2osc_lock(slice); | |
205 | ||
206 | LINVRNT(osc_lock_invariant(ols)); | |
207 | ||
208 | switch (ols->ols_state) { | |
209 | case OLS_NEW: | |
210 | LASSERT(!ols->ols_hold); | |
211 | LASSERT(ols->ols_agl); | |
212 | return 0; | |
213 | case OLS_UPCALL_RECEIVED: | |
214 | osc_lock_unhold(ols); | |
215 | case OLS_ENQUEUED: | |
216 | LASSERT(!ols->ols_hold); | |
217 | osc_lock_detach(env, ols); | |
218 | ols->ols_state = OLS_NEW; | |
219 | return 0; | |
220 | case OLS_GRANTED: | |
221 | LASSERT(!ols->ols_glimpse); | |
222 | LASSERT(ols->ols_hold); | |
223 | /* | |
224 | * Move lock into OLS_RELEASED state before calling | |
225 | * osc_cancel_base() so that possible synchronous cancellation | |
226 | * (that always happens e.g., for liblustre) sees that lock is | |
227 | * released. | |
228 | */ | |
229 | ols->ols_state = OLS_RELEASED; | |
230 | return osc_lock_unhold(ols); | |
231 | default: | |
232 | CERROR("Impossible state: %d\n", ols->ols_state); | |
233 | LBUG(); | |
234 | } | |
235 | } | |
236 | ||
237 | static void osc_lock_fini(const struct lu_env *env, | |
238 | struct cl_lock_slice *slice) | |
239 | { | |
29ac6840 | 240 | struct osc_lock *ols = cl2osc_lock(slice); |
d7e09d03 PT |
241 | |
242 | LINVRNT(osc_lock_invariant(ols)); | |
243 | /* | |
244 | * ->ols_hold can still be true at this point if, for example, a | |
245 | * thread that requested a lock was killed (and released a reference | |
246 | * to the lock), before reply from a server was received. In this case | |
247 | * lock is destroyed immediately after upcall. | |
248 | */ | |
249 | osc_lock_unhold(ols); | |
7f1ae4c0 | 250 | LASSERT(!ols->ols_lock); |
d7e09d03 PT |
251 | LASSERT(atomic_read(&ols->ols_pageref) == 0 || |
252 | atomic_read(&ols->ols_pageref) == _PAGEREF_MAGIC); | |
253 | ||
50d30362 | 254 | kmem_cache_free(osc_lock_kmem, ols); |
d7e09d03 PT |
255 | } |
256 | ||
257 | static void osc_lock_build_policy(const struct lu_env *env, | |
258 | const struct cl_lock *lock, | |
259 | ldlm_policy_data_t *policy) | |
260 | { | |
261 | const struct cl_lock_descr *d = &lock->cll_descr; | |
262 | ||
263 | osc_index2policy(policy, d->cld_obj, d->cld_start, d->cld_end); | |
264 | policy->l_extent.gid = d->cld_gid; | |
265 | } | |
266 | ||
267 | static __u64 osc_enq2ldlm_flags(__u32 enqflags) | |
268 | { | |
269 | __u64 result = 0; | |
270 | ||
271 | LASSERT((enqflags & ~CEF_MASK) == 0); | |
272 | ||
273 | if (enqflags & CEF_NONBLOCK) | |
274 | result |= LDLM_FL_BLOCK_NOWAIT; | |
275 | if (enqflags & CEF_ASYNC) | |
276 | result |= LDLM_FL_HAS_INTENT; | |
277 | if (enqflags & CEF_DISCARD_DATA) | |
f2145eae | 278 | result |= LDLM_FL_AST_DISCARD_DATA; |
d7e09d03 PT |
279 | return result; |
280 | } | |
281 | ||
282 | /** | |
283 | * Global spin-lock protecting consistency of ldlm_lock::l_ast_data | |
284 | * pointers. Initialized in osc_init(). | |
285 | */ | |
286 | spinlock_t osc_ast_guard; | |
287 | ||
288 | static struct osc_lock *osc_ast_data_get(struct ldlm_lock *dlm_lock) | |
289 | { | |
290 | struct osc_lock *olck; | |
291 | ||
292 | lock_res_and_lock(dlm_lock); | |
293 | spin_lock(&osc_ast_guard); | |
294 | olck = dlm_lock->l_ast_data; | |
7f1ae4c0 | 295 | if (olck) { |
d7e09d03 PT |
296 | struct cl_lock *lock = olck->ols_cl.cls_lock; |
297 | /* | |
298 | * If osc_lock holds a reference on ldlm lock, return it even | |
299 | * when cl_lock is in CLS_FREEING state. This way | |
300 | * | |
301 | * osc_ast_data_get(dlmlock) == NULL | |
302 | * | |
303 | * guarantees that all osc references on dlmlock were | |
304 | * released. osc_dlm_blocking_ast0() relies on that. | |
305 | */ | |
306 | if (lock->cll_state < CLS_FREEING || olck->ols_has_ref) { | |
307 | cl_lock_get_trust(lock); | |
308 | lu_ref_add_atomic(&lock->cll_reference, | |
309 | "ast", current); | |
310 | } else | |
311 | olck = NULL; | |
312 | } | |
313 | spin_unlock(&osc_ast_guard); | |
314 | unlock_res_and_lock(dlm_lock); | |
315 | return olck; | |
316 | } | |
317 | ||
318 | static void osc_ast_data_put(const struct lu_env *env, struct osc_lock *olck) | |
319 | { | |
320 | struct cl_lock *lock; | |
321 | ||
322 | lock = olck->ols_cl.cls_lock; | |
323 | lu_ref_del(&lock->cll_reference, "ast", current); | |
324 | cl_lock_put(env, lock); | |
325 | } | |
326 | ||
327 | /** | |
328 | * Updates object attributes from a lock value block (lvb) received together | |
329 | * with the DLM lock reply from the server. Copy of osc_update_enqueue() | |
330 | * logic. | |
331 | * | |
332 | * This can be optimized to not update attributes when lock is a result of a | |
333 | * local match. | |
334 | * | |
335 | * Called under lock and resource spin-locks. | |
336 | */ | |
337 | static void osc_lock_lvb_update(const struct lu_env *env, struct osc_lock *olck, | |
338 | int rc) | |
339 | { | |
29ac6840 CH |
340 | struct ost_lvb *lvb; |
341 | struct cl_object *obj; | |
342 | struct lov_oinfo *oinfo; | |
343 | struct cl_attr *attr; | |
344 | unsigned valid; | |
d7e09d03 | 345 | |
d7e09d03 | 346 | if (!(olck->ols_flags & LDLM_FL_LVB_READY)) |
e05e02e4 | 347 | return; |
d7e09d03 | 348 | |
29ac6840 CH |
349 | lvb = &olck->ols_lvb; |
350 | obj = olck->ols_cl.cls_obj; | |
d7e09d03 | 351 | oinfo = cl2osc(obj)->oo_oinfo; |
29ac6840 | 352 | attr = &osc_env_info(env)->oti_attr; |
d7e09d03 PT |
353 | valid = CAT_BLOCKS | CAT_ATIME | CAT_CTIME | CAT_MTIME | CAT_SIZE; |
354 | cl_lvb2attr(attr, lvb); | |
355 | ||
356 | cl_object_attr_lock(obj); | |
357 | if (rc == 0) { | |
29ac6840 | 358 | struct ldlm_lock *dlmlock; |
d7e09d03 PT |
359 | __u64 size; |
360 | ||
361 | dlmlock = olck->ols_lock; | |
d7e09d03 PT |
362 | |
363 | /* re-grab LVB from a dlm lock under DLM spin-locks. */ | |
364 | *lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; | |
365 | size = lvb->lvb_size; | |
366 | /* Extend KMS up to the end of this lock and no further | |
30aa9c52 OD |
367 | * A lock on [x,y] means a KMS of up to y + 1 bytes! |
368 | */ | |
d7e09d03 PT |
369 | if (size > dlmlock->l_policy_data.l_extent.end) |
370 | size = dlmlock->l_policy_data.l_extent.end + 1; | |
371 | if (size >= oinfo->loi_kms) { | |
b0f5aad5 GKH |
372 | LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu, kms=%llu", |
373 | lvb->lvb_size, size); | |
d7e09d03 PT |
374 | valid |= CAT_KMS; |
375 | attr->cat_kms = size; | |
376 | } else { | |
b0f5aad5 | 377 | LDLM_DEBUG(dlmlock, "lock acquired, setting rss=%llu; leaving kms=%llu, end=%llu", |
d7e09d03 PT |
378 | lvb->lvb_size, oinfo->loi_kms, |
379 | dlmlock->l_policy_data.l_extent.end); | |
380 | } | |
381 | ldlm_lock_allow_match_locked(dlmlock); | |
382 | } else if (rc == -ENAVAIL && olck->ols_glimpse) { | |
b0f5aad5 GKH |
383 | CDEBUG(D_INODE, "glimpsed, setting rss=%llu; leaving kms=%llu\n", |
384 | lvb->lvb_size, oinfo->loi_kms); | |
d7e09d03 PT |
385 | } else |
386 | valid = 0; | |
387 | ||
388 | if (valid != 0) | |
389 | cl_object_attr_set(env, obj, attr, valid); | |
390 | ||
391 | cl_object_attr_unlock(obj); | |
d7e09d03 PT |
392 | } |
393 | ||
394 | /** | |
395 | * Called when a lock is granted, from an upcall (when server returned a | |
396 | * granted lock), or from completion AST, when server returned a blocked lock. | |
397 | * | |
398 | * Called under lock and resource spin-locks, that are released temporarily | |
399 | * here. | |
400 | */ | |
401 | static void osc_lock_granted(const struct lu_env *env, struct osc_lock *olck, | |
402 | struct ldlm_lock *dlmlock, int rc) | |
403 | { | |
29ac6840 CH |
404 | struct ldlm_extent *ext; |
405 | struct cl_lock *lock; | |
d7e09d03 PT |
406 | struct cl_lock_descr *descr; |
407 | ||
408 | LASSERT(dlmlock->l_granted_mode == dlmlock->l_req_mode); | |
409 | ||
d7e09d03 | 410 | if (olck->ols_state < OLS_GRANTED) { |
29ac6840 CH |
411 | lock = olck->ols_cl.cls_lock; |
412 | ext = &dlmlock->l_policy_data.l_extent; | |
d7e09d03 PT |
413 | descr = &osc_env_info(env)->oti_descr; |
414 | descr->cld_obj = lock->cll_descr.cld_obj; | |
415 | ||
416 | /* XXX check that ->l_granted_mode is valid. */ | |
29ac6840 | 417 | descr->cld_mode = osc_ldlm2cl_lock(dlmlock->l_granted_mode); |
d7e09d03 | 418 | descr->cld_start = cl_index(descr->cld_obj, ext->start); |
29ac6840 CH |
419 | descr->cld_end = cl_index(descr->cld_obj, ext->end); |
420 | descr->cld_gid = ext->gid; | |
d7e09d03 PT |
421 | /* |
422 | * tell upper layers the extent of the lock that was actually | |
423 | * granted | |
424 | */ | |
425 | olck->ols_state = OLS_GRANTED; | |
426 | osc_lock_lvb_update(env, olck, rc); | |
427 | ||
428 | /* release DLM spin-locks to allow cl_lock_{modify,signal}() | |
429 | * to take a semaphore on a parent lock. This is safe, because | |
430 | * spin-locks are needed to protect consistency of | |
431 | * dlmlock->l_*_mode and LVB, and we have finished processing | |
30aa9c52 OD |
432 | * them. |
433 | */ | |
d7e09d03 PT |
434 | unlock_res_and_lock(dlmlock); |
435 | cl_lock_modify(env, lock, descr); | |
436 | cl_lock_signal(env, lock); | |
437 | LINVRNT(osc_lock_invariant(olck)); | |
438 | lock_res_and_lock(dlmlock); | |
439 | } | |
d7e09d03 PT |
440 | } |
441 | ||
442 | static void osc_lock_upcall0(const struct lu_env *env, struct osc_lock *olck) | |
443 | ||
444 | { | |
445 | struct ldlm_lock *dlmlock; | |
446 | ||
d7e09d03 | 447 | dlmlock = ldlm_handle2lock_long(&olck->ols_handle, 0); |
7f1ae4c0 | 448 | LASSERT(dlmlock); |
d7e09d03 PT |
449 | |
450 | lock_res_and_lock(dlmlock); | |
451 | spin_lock(&osc_ast_guard); | |
452 | LASSERT(dlmlock->l_ast_data == olck); | |
7f1ae4c0 | 453 | LASSERT(!olck->ols_lock); |
d7e09d03 PT |
454 | olck->ols_lock = dlmlock; |
455 | spin_unlock(&osc_ast_guard); | |
456 | ||
457 | /* | |
458 | * Lock might be not yet granted. In this case, completion ast | |
459 | * (osc_ldlm_completion_ast()) comes later and finishes lock | |
460 | * granting. | |
461 | */ | |
462 | if (dlmlock->l_granted_mode == dlmlock->l_req_mode) | |
463 | osc_lock_granted(env, olck, dlmlock, 0); | |
464 | unlock_res_and_lock(dlmlock); | |
465 | ||
466 | /* | |
467 | * osc_enqueue_interpret() decrefs asynchronous locks, counter | |
468 | * this. | |
469 | */ | |
470 | ldlm_lock_addref(&olck->ols_handle, olck->ols_einfo.ei_mode); | |
471 | olck->ols_hold = 1; | |
472 | ||
473 | /* lock reference taken by ldlm_handle2lock_long() is owned by | |
30aa9c52 OD |
474 | * osc_lock and released in osc_lock_detach() |
475 | */ | |
d7e09d03 PT |
476 | lu_ref_add(&dlmlock->l_reference, "osc_lock", olck); |
477 | olck->ols_has_ref = 1; | |
478 | } | |
479 | ||
480 | /** | |
481 | * Lock upcall function that is executed either when a reply to ENQUEUE rpc is | |
482 | * received from a server, or after osc_enqueue_base() matched a local DLM | |
483 | * lock. | |
484 | */ | |
485 | static int osc_lock_upcall(void *cookie, int errcode) | |
486 | { | |
29ac6840 CH |
487 | struct osc_lock *olck = cookie; |
488 | struct cl_lock_slice *slice = &olck->ols_cl; | |
489 | struct cl_lock *lock = slice->cls_lock; | |
490 | struct lu_env *env; | |
491 | struct cl_env_nest nest; | |
d7e09d03 | 492 | |
d7e09d03 PT |
493 | env = cl_env_nested_get(&nest); |
494 | if (!IS_ERR(env)) { | |
495 | int rc; | |
496 | ||
497 | cl_lock_mutex_get(env, lock); | |
498 | ||
499 | LASSERT(lock->cll_state >= CLS_QUEUING); | |
500 | if (olck->ols_state == OLS_ENQUEUED) { | |
501 | olck->ols_state = OLS_UPCALL_RECEIVED; | |
502 | rc = ldlm_error2errno(errcode); | |
503 | } else if (olck->ols_state == OLS_CANCELLED) { | |
504 | rc = -EIO; | |
505 | } else { | |
506 | CERROR("Impossible state: %d\n", olck->ols_state); | |
507 | LBUG(); | |
508 | } | |
509 | if (rc) { | |
510 | struct ldlm_lock *dlmlock; | |
511 | ||
512 | dlmlock = ldlm_handle2lock(&olck->ols_handle); | |
7f1ae4c0 | 513 | if (dlmlock) { |
d7e09d03 PT |
514 | lock_res_and_lock(dlmlock); |
515 | spin_lock(&osc_ast_guard); | |
7f1ae4c0 | 516 | LASSERT(!olck->ols_lock); |
d7e09d03 PT |
517 | dlmlock->l_ast_data = NULL; |
518 | olck->ols_handle.cookie = 0ULL; | |
519 | spin_unlock(&osc_ast_guard); | |
520 | ldlm_lock_fail_match_locked(dlmlock); | |
521 | unlock_res_and_lock(dlmlock); | |
522 | LDLM_LOCK_PUT(dlmlock); | |
523 | } | |
524 | } else { | |
525 | if (olck->ols_glimpse) | |
526 | olck->ols_glimpse = 0; | |
527 | osc_lock_upcall0(env, olck); | |
528 | } | |
529 | ||
530 | /* Error handling, some errors are tolerable. */ | |
531 | if (olck->ols_locklessable && rc == -EUSERS) { | |
532 | /* This is a tolerable error, turn this lock into | |
533 | * lockless lock. | |
534 | */ | |
535 | osc_object_set_contended(cl2osc(slice->cls_obj)); | |
536 | LASSERT(slice->cls_ops == &osc_lock_ops); | |
537 | ||
538 | /* Change this lock to ldlmlock-less lock. */ | |
539 | osc_lock_to_lockless(env, olck, 1); | |
540 | olck->ols_state = OLS_GRANTED; | |
541 | rc = 0; | |
542 | } else if (olck->ols_glimpse && rc == -ENAVAIL) { | |
543 | osc_lock_lvb_update(env, olck, rc); | |
544 | cl_lock_delete(env, lock); | |
545 | /* Hide the error. */ | |
546 | rc = 0; | |
547 | } | |
548 | ||
549 | if (rc == 0) { | |
550 | /* For AGL case, the RPC sponsor may exits the cl_lock | |
551 | * processing without wait() called before related OSC | |
552 | * lock upcall(). So update the lock status according | |
30aa9c52 OD |
553 | * to the enqueue result inside AGL upcall(). |
554 | */ | |
d7e09d03 PT |
555 | if (olck->ols_agl) { |
556 | lock->cll_flags |= CLF_FROM_UPCALL; | |
557 | cl_wait_try(env, lock); | |
558 | lock->cll_flags &= ~CLF_FROM_UPCALL; | |
559 | if (!olck->ols_glimpse) | |
560 | olck->ols_agl = 0; | |
561 | } | |
562 | cl_lock_signal(env, lock); | |
563 | /* del user for lock upcall cookie */ | |
564 | cl_unuse_try(env, lock); | |
565 | } else { | |
566 | /* del user for lock upcall cookie */ | |
567 | cl_lock_user_del(env, lock); | |
568 | cl_lock_error(env, lock, rc); | |
569 | } | |
570 | ||
571 | /* release cookie reference, acquired by osc_lock_enqueue() */ | |
572 | cl_lock_hold_release(env, lock, "upcall", lock); | |
573 | cl_lock_mutex_put(env, lock); | |
574 | ||
575 | lu_ref_del(&lock->cll_reference, "upcall", lock); | |
576 | /* This maybe the last reference, so must be called after | |
30aa9c52 OD |
577 | * cl_lock_mutex_put(). |
578 | */ | |
d7e09d03 PT |
579 | cl_lock_put(env, lock); |
580 | ||
581 | cl_env_nested_put(&nest, env); | |
582 | } else { | |
583 | /* should never happen, similar to osc_ldlm_blocking_ast(). */ | |
584 | LBUG(); | |
585 | } | |
0a3bdb00 | 586 | return errcode; |
d7e09d03 PT |
587 | } |
588 | ||
589 | /** | |
590 | * Core of osc_dlm_blocking_ast() logic. | |
591 | */ | |
592 | static void osc_lock_blocking(const struct lu_env *env, | |
593 | struct ldlm_lock *dlmlock, | |
594 | struct osc_lock *olck, int blocking) | |
595 | { | |
596 | struct cl_lock *lock = olck->ols_cl.cls_lock; | |
597 | ||
598 | LASSERT(olck->ols_lock == dlmlock); | |
599 | CLASSERT(OLS_BLOCKED < OLS_CANCELLED); | |
600 | LASSERT(!osc_lock_is_lockless(olck)); | |
601 | ||
602 | /* | |
603 | * Lock might be still addref-ed here, if e.g., blocking ast | |
604 | * is sent for a failed lock. | |
605 | */ | |
606 | osc_lock_unhold(olck); | |
607 | ||
608 | if (blocking && olck->ols_state < OLS_BLOCKED) | |
609 | /* | |
610 | * Move osc_lock into OLS_BLOCKED before canceling the lock, | |
611 | * because it recursively re-enters osc_lock_blocking(), with | |
612 | * the state set to OLS_CANCELLED. | |
613 | */ | |
614 | olck->ols_state = OLS_BLOCKED; | |
615 | /* | |
616 | * cancel and destroy lock at least once no matter how blocking ast is | |
617 | * entered (see comment above osc_ldlm_blocking_ast() for use | |
618 | * cases). cl_lock_cancel() and cl_lock_delete() are idempotent. | |
619 | */ | |
620 | cl_lock_cancel(env, lock); | |
621 | cl_lock_delete(env, lock); | |
622 | } | |
623 | ||
624 | /** | |
625 | * Helper for osc_dlm_blocking_ast() handling discrepancies between cl_lock | |
626 | * and ldlm_lock caches. | |
627 | */ | |
628 | static int osc_dlm_blocking_ast0(const struct lu_env *env, | |
629 | struct ldlm_lock *dlmlock, | |
630 | void *data, int flag) | |
631 | { | |
632 | struct osc_lock *olck; | |
29ac6840 | 633 | struct cl_lock *lock; |
d7e09d03 PT |
634 | int result; |
635 | int cancel; | |
636 | ||
637 | LASSERT(flag == LDLM_CB_BLOCKING || flag == LDLM_CB_CANCELING); | |
638 | ||
639 | cancel = 0; | |
640 | olck = osc_ast_data_get(dlmlock); | |
7f1ae4c0 | 641 | if (olck) { |
d7e09d03 PT |
642 | lock = olck->ols_cl.cls_lock; |
643 | cl_lock_mutex_get(env, lock); | |
644 | LINVRNT(osc_lock_invariant(olck)); | |
645 | if (olck->ols_ast_wait) { | |
646 | /* wake up osc_lock_use() */ | |
647 | cl_lock_signal(env, lock); | |
648 | olck->ols_ast_wait = 0; | |
649 | } | |
650 | /* | |
651 | * Lock might have been canceled while this thread was | |
652 | * sleeping for lock mutex, but olck is pinned in memory. | |
653 | */ | |
654 | if (olck == dlmlock->l_ast_data) { | |
655 | /* | |
656 | * NOTE: DLM sends blocking AST's for failed locks | |
657 | * (that are still in pre-OLS_GRANTED state) | |
658 | * too, and they have to be canceled otherwise | |
659 | * DLM lock is never destroyed and stuck in | |
660 | * the memory. | |
661 | * | |
662 | * Alternatively, ldlm_cli_cancel() can be | |
663 | * called here directly for osc_locks with | |
664 | * ols_state < OLS_GRANTED to maintain an | |
665 | * invariant that ->clo_cancel() is only called | |
666 | * for locks that were granted. | |
667 | */ | |
668 | LASSERT(data == olck); | |
669 | osc_lock_blocking(env, dlmlock, | |
670 | olck, flag == LDLM_CB_BLOCKING); | |
671 | } else | |
672 | cancel = 1; | |
673 | cl_lock_mutex_put(env, lock); | |
674 | osc_ast_data_put(env, olck); | |
675 | } else | |
676 | /* | |
677 | * DLM lock exists, but there is no cl_lock attached to it. | |
678 | * This is a `normal' race. cl_object and its cl_lock's can be | |
679 | * removed by memory pressure, together with all pages. | |
680 | */ | |
681 | cancel = (flag == LDLM_CB_BLOCKING); | |
682 | ||
683 | if (cancel) { | |
684 | struct lustre_handle *lockh; | |
685 | ||
686 | lockh = &osc_env_info(env)->oti_handle; | |
687 | ldlm_lock2handle(dlmlock, lockh); | |
688 | result = ldlm_cli_cancel(lockh, LCF_ASYNC); | |
689 | } else | |
690 | result = 0; | |
691 | return result; | |
692 | } | |
693 | ||
694 | /** | |
695 | * Blocking ast invoked by ldlm when dlm lock is either blocking progress of | |
696 | * some other lock, or is canceled. This function is installed as a | |
697 | * ldlm_lock::l_blocking_ast() for client extent locks. | |
698 | * | |
699 | * Control flow is tricky, because ldlm uses the same call-back | |
700 | * (ldlm_lock::l_blocking_ast()) for both blocking and cancellation ast's. | |
701 | * | |
702 | * \param dlmlock lock for which ast occurred. | |
703 | * | |
704 | * \param new description of a conflicting lock in case of blocking ast. | |
705 | * | |
706 | * \param data value of dlmlock->l_ast_data | |
707 | * | |
708 | * \param flag LDLM_CB_BLOCKING or LDLM_CB_CANCELING. Used to distinguish | |
709 | * cancellation and blocking ast's. | |
710 | * | |
711 | * Possible use cases: | |
712 | * | |
713 | * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) to cancel | |
714 | * lock due to lock lru pressure, or explicit user request to purge | |
715 | * locks. | |
716 | * | |
717 | * - ldlm calls dlmlock->l_blocking_ast(..., LDLM_CB_BLOCKING) to notify | |
718 | * us that dlmlock conflicts with another lock that some client is | |
719 | * enqueing. Lock is canceled. | |
720 | * | |
721 | * - cl_lock_cancel() is called. osc_lock_cancel() calls | |
722 | * ldlm_cli_cancel() that calls | |
723 | * | |
724 | * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) | |
725 | * | |
726 | * recursively entering osc_ldlm_blocking_ast(). | |
727 | * | |
728 | * - client cancels lock voluntary (e.g., as a part of early cancellation): | |
729 | * | |
730 | * cl_lock_cancel()-> | |
731 | * osc_lock_cancel()-> | |
732 | * ldlm_cli_cancel()-> | |
733 | * dlmlock->l_blocking_ast(..., LDLM_CB_CANCELING) | |
734 | * | |
735 | */ | |
736 | static int osc_ldlm_blocking_ast(struct ldlm_lock *dlmlock, | |
737 | struct ldlm_lock_desc *new, void *data, | |
738 | int flag) | |
739 | { | |
29ac6840 | 740 | struct lu_env *env; |
d7e09d03 | 741 | struct cl_env_nest nest; |
29ac6840 | 742 | int result; |
d7e09d03 PT |
743 | |
744 | /* | |
745 | * This can be called in the context of outer IO, e.g., | |
746 | * | |
747 | * cl_enqueue()->... | |
748 | * ->osc_enqueue_base()->... | |
749 | * ->ldlm_prep_elc_req()->... | |
750 | * ->ldlm_cancel_callback()->... | |
751 | * ->osc_ldlm_blocking_ast() | |
752 | * | |
753 | * new environment has to be created to not corrupt outer context. | |
754 | */ | |
755 | env = cl_env_nested_get(&nest); | |
756 | if (!IS_ERR(env)) { | |
757 | result = osc_dlm_blocking_ast0(env, dlmlock, data, flag); | |
758 | cl_env_nested_put(&nest, env); | |
759 | } else { | |
760 | result = PTR_ERR(env); | |
761 | /* | |
762 | * XXX This should never happen, as cl_lock is | |
763 | * stuck. Pre-allocated environment a la vvp_inode_fini_env | |
764 | * should be used. | |
765 | */ | |
766 | LBUG(); | |
767 | } | |
768 | if (result != 0) { | |
769 | if (result == -ENODATA) | |
770 | result = 0; | |
771 | else | |
772 | CERROR("BAST failed: %d\n", result); | |
773 | } | |
774 | return result; | |
775 | } | |
776 | ||
777 | static int osc_ldlm_completion_ast(struct ldlm_lock *dlmlock, | |
778 | __u64 flags, void *data) | |
779 | { | |
780 | struct cl_env_nest nest; | |
29ac6840 CH |
781 | struct lu_env *env; |
782 | struct osc_lock *olck; | |
783 | struct cl_lock *lock; | |
d7e09d03 PT |
784 | int result; |
785 | int dlmrc; | |
786 | ||
787 | /* first, do dlm part of the work */ | |
788 | dlmrc = ldlm_completion_ast_async(dlmlock, flags, data); | |
789 | /* then, notify cl_lock */ | |
790 | env = cl_env_nested_get(&nest); | |
791 | if (!IS_ERR(env)) { | |
792 | olck = osc_ast_data_get(dlmlock); | |
7f1ae4c0 | 793 | if (olck) { |
d7e09d03 PT |
794 | lock = olck->ols_cl.cls_lock; |
795 | cl_lock_mutex_get(env, lock); | |
796 | /* | |
797 | * ldlm_handle_cp_callback() copied LVB from request | |
798 | * to lock->l_lvb_data, store it in osc_lock. | |
799 | */ | |
7f1ae4c0 | 800 | LASSERT(dlmlock->l_lvb_data); |
d7e09d03 PT |
801 | lock_res_and_lock(dlmlock); |
802 | olck->ols_lvb = *(struct ost_lvb *)dlmlock->l_lvb_data; | |
7f1ae4c0 | 803 | if (!olck->ols_lock) { |
d7e09d03 PT |
804 | /* |
805 | * upcall (osc_lock_upcall()) hasn't yet been | |
806 | * called. Do nothing now, upcall will bind | |
807 | * olck to dlmlock and signal the waiters. | |
808 | * | |
809 | * This maintains an invariant that osc_lock | |
810 | * and ldlm_lock are always bound when | |
811 | * osc_lock is in OLS_GRANTED state. | |
812 | */ | |
813 | } else if (dlmlock->l_granted_mode == | |
814 | dlmlock->l_req_mode) { | |
815 | osc_lock_granted(env, olck, dlmlock, dlmrc); | |
816 | } | |
817 | unlock_res_and_lock(dlmlock); | |
818 | ||
819 | if (dlmrc != 0) { | |
820 | CL_LOCK_DEBUG(D_ERROR, env, lock, | |
821 | "dlmlock returned %d\n", dlmrc); | |
822 | cl_lock_error(env, lock, dlmrc); | |
823 | } | |
824 | cl_lock_mutex_put(env, lock); | |
825 | osc_ast_data_put(env, olck); | |
826 | result = 0; | |
827 | } else | |
828 | result = -ELDLM_NO_LOCK_DATA; | |
829 | cl_env_nested_put(&nest, env); | |
830 | } else | |
831 | result = PTR_ERR(env); | |
832 | return dlmrc ?: result; | |
833 | } | |
834 | ||
835 | static int osc_ldlm_glimpse_ast(struct ldlm_lock *dlmlock, void *data) | |
836 | { | |
29ac6840 | 837 | struct ptlrpc_request *req = data; |
d7e09d03 | 838 | struct osc_lock *olck; |
29ac6840 CH |
839 | struct cl_lock *lock; |
840 | struct cl_object *obj; | |
841 | struct cl_env_nest nest; | |
842 | struct lu_env *env; | |
843 | struct ost_lvb *lvb; | |
844 | struct req_capsule *cap; | |
845 | int result; | |
d7e09d03 PT |
846 | |
847 | LASSERT(lustre_msg_get_opc(req->rq_reqmsg) == LDLM_GL_CALLBACK); | |
848 | ||
849 | env = cl_env_nested_get(&nest); | |
850 | if (!IS_ERR(env)) { | |
851 | /* osc_ast_data_get() has to go after environment is | |
852 | * allocated, because osc_ast_data() acquires a | |
853 | * reference to a lock, and it can only be released in | |
854 | * environment. | |
855 | */ | |
856 | olck = osc_ast_data_get(dlmlock); | |
7f1ae4c0 | 857 | if (olck) { |
d7e09d03 PT |
858 | lock = olck->ols_cl.cls_lock; |
859 | /* Do not grab the mutex of cl_lock for glimpse. | |
860 | * See LU-1274 for details. | |
861 | * BTW, it's okay for cl_lock to be cancelled during | |
862 | * this period because server can handle this race. | |
863 | * See ldlm_server_glimpse_ast() for details. | |
30aa9c52 OD |
864 | * cl_lock_mutex_get(env, lock); |
865 | */ | |
d7e09d03 PT |
866 | cap = &req->rq_pill; |
867 | req_capsule_extend(cap, &RQF_LDLM_GL_CALLBACK); | |
868 | req_capsule_set_size(cap, &RMF_DLM_LVB, RCL_SERVER, | |
ec83e611 | 869 | sizeof(*lvb)); |
d7e09d03 PT |
870 | result = req_capsule_server_pack(cap); |
871 | if (result == 0) { | |
872 | lvb = req_capsule_server_get(cap, &RMF_DLM_LVB); | |
873 | obj = lock->cll_descr.cld_obj; | |
874 | result = cl_object_glimpse(env, obj, lvb); | |
875 | } | |
876 | if (!exp_connect_lvb_type(req->rq_export)) | |
877 | req_capsule_shrink(&req->rq_pill, | |
878 | &RMF_DLM_LVB, | |
879 | sizeof(struct ost_lvb_v1), | |
880 | RCL_SERVER); | |
881 | osc_ast_data_put(env, olck); | |
882 | } else { | |
883 | /* | |
884 | * These errors are normal races, so we don't want to | |
885 | * fill the console with messages by calling | |
886 | * ptlrpc_error() | |
887 | */ | |
888 | lustre_pack_reply(req, 1, NULL, NULL); | |
889 | result = -ELDLM_NO_LOCK_DATA; | |
890 | } | |
891 | cl_env_nested_put(&nest, env); | |
892 | } else | |
893 | result = PTR_ERR(env); | |
894 | req->rq_status = result; | |
895 | return result; | |
896 | } | |
897 | ||
898 | static unsigned long osc_lock_weigh(const struct lu_env *env, | |
899 | const struct cl_lock_slice *slice) | |
900 | { | |
901 | /* | |
902 | * don't need to grab coh_page_guard since we don't care the exact # | |
903 | * of pages.. | |
904 | */ | |
905 | return cl_object_header(slice->cls_obj)->coh_pages; | |
906 | } | |
907 | ||
d7e09d03 PT |
908 | static void osc_lock_build_einfo(const struct lu_env *env, |
909 | const struct cl_lock *clock, | |
910 | struct osc_lock *lock, | |
911 | struct ldlm_enqueue_info *einfo) | |
912 | { | |
913 | enum cl_lock_mode mode; | |
914 | ||
915 | mode = clock->cll_descr.cld_mode; | |
916 | if (mode == CLM_PHANTOM) | |
917 | /* | |
918 | * For now, enqueue all glimpse locks in read mode. In the | |
919 | * future, client might choose to enqueue LCK_PW lock for | |
920 | * glimpse on a file opened for write. | |
921 | */ | |
922 | mode = CLM_READ; | |
923 | ||
29ac6840 CH |
924 | einfo->ei_type = LDLM_EXTENT; |
925 | einfo->ei_mode = osc_cl_lock2ldlm(mode); | |
926 | einfo->ei_cb_bl = osc_ldlm_blocking_ast; | |
927 | einfo->ei_cb_cp = osc_ldlm_completion_ast; | |
928 | einfo->ei_cb_gl = osc_ldlm_glimpse_ast; | |
d7e09d03 PT |
929 | einfo->ei_cbdata = lock; /* value to be put into ->l_ast_data */ |
930 | } | |
931 | ||
932 | /** | |
933 | * Determine if the lock should be converted into a lockless lock. | |
934 | * | |
935 | * Steps to check: | |
11d66e89 | 936 | * - if the lock has an explicit requirement for a non-lockless lock; |
d7e09d03 PT |
937 | * - if the io lock request type ci_lockreq; |
938 | * - send the enqueue rpc to ost to make the further decision; | |
939 | * - special treat to truncate lockless lock | |
940 | * | |
941 | * Additional policy can be implemented here, e.g., never do lockless-io | |
942 | * for large extents. | |
943 | */ | |
944 | static void osc_lock_to_lockless(const struct lu_env *env, | |
945 | struct osc_lock *ols, int force) | |
946 | { | |
947 | struct cl_lock_slice *slice = &ols->ols_cl; | |
948 | ||
949 | LASSERT(ols->ols_state == OLS_NEW || | |
950 | ols->ols_state == OLS_UPCALL_RECEIVED); | |
951 | ||
952 | if (force) { | |
953 | ols->ols_locklessable = 1; | |
954 | slice->cls_ops = &osc_lock_lockless_ops; | |
955 | } else { | |
29ac6840 CH |
956 | struct osc_io *oio = osc_env_io(env); |
957 | struct cl_io *io = oio->oi_cl.cis_io; | |
958 | struct cl_object *obj = slice->cls_obj; | |
d7e09d03 PT |
959 | struct osc_object *oob = cl2osc(obj); |
960 | const struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); | |
961 | struct obd_connect_data *ocd; | |
962 | ||
963 | LASSERT(io->ci_lockreq == CILR_MANDATORY || | |
964 | io->ci_lockreq == CILR_MAYBE || | |
965 | io->ci_lockreq == CILR_NEVER); | |
966 | ||
967 | ocd = &class_exp2cliimp(osc_export(oob))->imp_connect_data; | |
968 | ols->ols_locklessable = (io->ci_type != CIT_SETATTR) && | |
969 | (io->ci_lockreq == CILR_MAYBE) && | |
970 | (ocd->ocd_connect_flags & OBD_CONNECT_SRVLOCK); | |
971 | if (io->ci_lockreq == CILR_NEVER || | |
972 | /* lockless IO */ | |
973 | (ols->ols_locklessable && osc_object_is_contended(oob)) || | |
974 | /* lockless truncate */ | |
975 | (cl_io_is_trunc(io) && | |
976 | (ocd->ocd_connect_flags & OBD_CONNECT_TRUNCLOCK) && | |
977 | osd->od_lockless_truncate)) { | |
978 | ols->ols_locklessable = 1; | |
979 | slice->cls_ops = &osc_lock_lockless_ops; | |
980 | } | |
981 | } | |
982 | LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); | |
983 | } | |
984 | ||
985 | static int osc_lock_compatible(const struct osc_lock *qing, | |
986 | const struct osc_lock *qed) | |
987 | { | |
988 | enum cl_lock_mode qing_mode; | |
989 | enum cl_lock_mode qed_mode; | |
990 | ||
991 | qing_mode = qing->ols_cl.cls_lock->cll_descr.cld_mode; | |
992 | if (qed->ols_glimpse && | |
993 | (qed->ols_state >= OLS_UPCALL_RECEIVED || qing_mode == CLM_READ)) | |
994 | return 1; | |
995 | ||
996 | qed_mode = qed->ols_cl.cls_lock->cll_descr.cld_mode; | |
997 | return ((qing_mode == CLM_READ) && (qed_mode == CLM_READ)); | |
998 | } | |
999 | ||
1000 | /** | |
1001 | * Cancel all conflicting locks and wait for them to be destroyed. | |
1002 | * | |
1003 | * This function is used for two purposes: | |
1004 | * | |
1005 | * - early cancel all conflicting locks before starting IO, and | |
1006 | * | |
1007 | * - guarantee that pages added to the page cache by lockless IO are never | |
1008 | * covered by locks other than lockless IO lock, and, hence, are not | |
1009 | * visible to other threads. | |
1010 | */ | |
1011 | static int osc_lock_enqueue_wait(const struct lu_env *env, | |
1012 | const struct osc_lock *olck) | |
1013 | { | |
29ac6840 CH |
1014 | struct cl_lock *lock = olck->ols_cl.cls_lock; |
1015 | struct cl_lock_descr *descr = &lock->cll_descr; | |
1016 | struct cl_object_header *hdr = cl_object_header(descr->cld_obj); | |
1017 | struct cl_lock *scan; | |
1018 | struct cl_lock *conflict = NULL; | |
1019 | int lockless = osc_lock_is_lockless(olck); | |
1020 | int rc = 0; | |
d7e09d03 PT |
1021 | |
1022 | LASSERT(cl_lock_is_mutexed(lock)); | |
1023 | ||
1024 | /* make it enqueue anyway for glimpse lock, because we actually | |
30aa9c52 OD |
1025 | * don't need to cancel any conflicting locks. |
1026 | */ | |
d7e09d03 PT |
1027 | if (olck->ols_glimpse) |
1028 | return 0; | |
1029 | ||
1030 | spin_lock(&hdr->coh_lock_guard); | |
1031 | list_for_each_entry(scan, &hdr->coh_locks, cll_linkage) { | |
1032 | struct cl_lock_descr *cld = &scan->cll_descr; | |
1033 | const struct osc_lock *scan_ols; | |
1034 | ||
1035 | if (scan == lock) | |
1036 | break; | |
1037 | ||
1038 | if (scan->cll_state < CLS_QUEUING || | |
1039 | scan->cll_state == CLS_FREEING || | |
1040 | cld->cld_start > descr->cld_end || | |
1041 | cld->cld_end < descr->cld_start) | |
1042 | continue; | |
1043 | ||
1044 | /* overlapped and living locks. */ | |
1045 | ||
1046 | /* We're not supposed to give up group lock. */ | |
1047 | if (scan->cll_descr.cld_mode == CLM_GROUP) { | |
1048 | LASSERT(descr->cld_mode != CLM_GROUP || | |
1049 | descr->cld_gid != scan->cll_descr.cld_gid); | |
1050 | continue; | |
1051 | } | |
1052 | ||
1053 | scan_ols = osc_lock_at(scan); | |
1054 | ||
1055 | /* We need to cancel the compatible locks if we're enqueuing | |
1056 | * a lockless lock, for example: | |
1057 | * imagine that client has PR lock on [0, 1000], and thread T0 | |
1058 | * is doing lockless IO in [500, 1500] region. Concurrent | |
1059 | * thread T1 can see lockless data in [500, 1000], which is | |
30aa9c52 OD |
1060 | * wrong, because these data are possibly stale. |
1061 | */ | |
d7e09d03 PT |
1062 | if (!lockless && osc_lock_compatible(olck, scan_ols)) |
1063 | continue; | |
1064 | ||
1065 | cl_lock_get_trust(scan); | |
1066 | conflict = scan; | |
1067 | break; | |
1068 | } | |
1069 | spin_unlock(&hdr->coh_lock_guard); | |
1070 | ||
1071 | if (conflict) { | |
1072 | if (lock->cll_descr.cld_mode == CLM_GROUP) { | |
1073 | /* we want a group lock but a previous lock request | |
1074 | * conflicts, we do not wait but return 0 so the | |
1075 | * request is send to the server | |
1076 | */ | |
2d00bd17 | 1077 | CDEBUG(D_DLMTRACE, "group lock %p is conflicted with %p, no wait, send to server\n", |
d7e09d03 PT |
1078 | lock, conflict); |
1079 | cl_lock_put(env, conflict); | |
1080 | rc = 0; | |
1081 | } else { | |
2d00bd17 | 1082 | CDEBUG(D_DLMTRACE, "lock %p is conflicted with %p, will wait\n", |
d7e09d03 | 1083 | lock, conflict); |
7f1ae4c0 | 1084 | LASSERT(!lock->cll_conflict); |
d7e09d03 PT |
1085 | lu_ref_add(&conflict->cll_reference, "cancel-wait", |
1086 | lock); | |
1087 | lock->cll_conflict = conflict; | |
1088 | rc = CLO_WAIT; | |
1089 | } | |
1090 | } | |
0a3bdb00 | 1091 | return rc; |
d7e09d03 PT |
1092 | } |
1093 | ||
1094 | /** | |
1095 | * Implementation of cl_lock_operations::clo_enqueue() method for osc | |
1096 | * layer. This initiates ldlm enqueue: | |
1097 | * | |
1098 | * - cancels conflicting locks early (osc_lock_enqueue_wait()); | |
1099 | * | |
1100 | * - calls osc_enqueue_base() to do actual enqueue. | |
1101 | * | |
1102 | * osc_enqueue_base() is supplied with an upcall function that is executed | |
1103 | * when lock is received either after a local cached ldlm lock is matched, or | |
1104 | * when a reply from the server is received. | |
1105 | * | |
1106 | * This function does not wait for the network communication to complete. | |
1107 | */ | |
1108 | static int osc_lock_enqueue(const struct lu_env *env, | |
1109 | const struct cl_lock_slice *slice, | |
1110 | struct cl_io *unused, __u32 enqflags) | |
1111 | { | |
29ac6840 CH |
1112 | struct osc_lock *ols = cl2osc_lock(slice); |
1113 | struct cl_lock *lock = ols->ols_cl.cls_lock; | |
d7e09d03 | 1114 | int result; |
d7e09d03 PT |
1115 | |
1116 | LASSERT(cl_lock_is_mutexed(lock)); | |
1117 | LASSERTF(ols->ols_state == OLS_NEW, | |
1118 | "Impossible state: %d\n", ols->ols_state); | |
1119 | ||
1120 | LASSERTF(ergo(ols->ols_glimpse, lock->cll_descr.cld_mode <= CLM_READ), | |
1121 | "lock = %p, ols = %p\n", lock, ols); | |
1122 | ||
1123 | result = osc_lock_enqueue_wait(env, ols); | |
1124 | if (result == 0) { | |
1125 | if (!osc_lock_is_lockless(ols)) { | |
29ac6840 CH |
1126 | struct osc_object *obj = cl2osc(slice->cls_obj); |
1127 | struct osc_thread_info *info = osc_env_info(env); | |
1128 | struct ldlm_res_id *resname = &info->oti_resname; | |
1129 | ldlm_policy_data_t *policy = &info->oti_policy; | |
d7e09d03 PT |
1130 | struct ldlm_enqueue_info *einfo = &ols->ols_einfo; |
1131 | ||
1132 | /* lock will be passed as upcall cookie, | |
30aa9c52 OD |
1133 | * hold ref to prevent to be released. |
1134 | */ | |
d7e09d03 PT |
1135 | cl_lock_hold_add(env, lock, "upcall", lock); |
1136 | /* a user for lock also */ | |
1137 | cl_lock_user_add(env, lock); | |
1138 | ols->ols_state = OLS_ENQUEUED; | |
1139 | ||
1140 | /* | |
1141 | * XXX: this is possible blocking point as | |
1142 | * ldlm_lock_match(LDLM_FL_LVB_READY) waits for | |
1143 | * LDLM_CP_CALLBACK. | |
1144 | */ | |
1145 | ostid_build_res_name(&obj->oo_oinfo->loi_oi, resname); | |
1146 | osc_lock_build_policy(env, lock, policy); | |
1147 | result = osc_enqueue_base(osc_export(obj), resname, | |
1148 | &ols->ols_flags, policy, | |
1149 | &ols->ols_lvb, | |
1150 | obj->oo_oinfo->loi_kms_valid, | |
1151 | osc_lock_upcall, | |
1152 | ols, einfo, &ols->ols_handle, | |
1153 | PTLRPCD_SET, 1, ols->ols_agl); | |
1154 | if (result != 0) { | |
1155 | cl_lock_user_del(env, lock); | |
1156 | cl_lock_unhold(env, lock, "upcall", lock); | |
1157 | if (unlikely(result == -ECANCELED)) { | |
1158 | ols->ols_state = OLS_NEW; | |
1159 | result = 0; | |
1160 | } | |
1161 | } | |
1162 | } else { | |
1163 | ols->ols_state = OLS_GRANTED; | |
1164 | ols->ols_owner = osc_env_io(env); | |
1165 | } | |
1166 | } | |
1167 | LASSERT(ergo(ols->ols_glimpse, !osc_lock_is_lockless(ols))); | |
0a3bdb00 | 1168 | return result; |
d7e09d03 PT |
1169 | } |
1170 | ||
1171 | static int osc_lock_wait(const struct lu_env *env, | |
1172 | const struct cl_lock_slice *slice) | |
1173 | { | |
1174 | struct osc_lock *olck = cl2osc_lock(slice); | |
29ac6840 | 1175 | struct cl_lock *lock = olck->ols_cl.cls_lock; |
d7e09d03 PT |
1176 | |
1177 | LINVRNT(osc_lock_invariant(olck)); | |
1178 | ||
1179 | if (olck->ols_glimpse && olck->ols_state >= OLS_UPCALL_RECEIVED) { | |
1180 | if (olck->ols_flags & LDLM_FL_LVB_READY) { | |
1181 | return 0; | |
1182 | } else if (olck->ols_agl) { | |
1183 | if (lock->cll_flags & CLF_FROM_UPCALL) | |
1184 | /* It is from enqueue RPC reply upcall for | |
30aa9c52 OD |
1185 | * updating state. Do not re-enqueue. |
1186 | */ | |
d7e09d03 | 1187 | return -ENAVAIL; |
71e8dd9a | 1188 | olck->ols_state = OLS_NEW; |
d7e09d03 PT |
1189 | } else { |
1190 | LASSERT(lock->cll_error); | |
1191 | return lock->cll_error; | |
1192 | } | |
1193 | } | |
1194 | ||
1195 | if (olck->ols_state == OLS_NEW) { | |
1196 | int rc; | |
1197 | ||
1198 | LASSERT(olck->ols_agl); | |
1199 | olck->ols_agl = 0; | |
34554afc | 1200 | olck->ols_flags &= ~LDLM_FL_BLOCK_NOWAIT; |
d7e09d03 PT |
1201 | rc = osc_lock_enqueue(env, slice, NULL, CEF_ASYNC | CEF_MUST); |
1202 | if (rc != 0) | |
1203 | return rc; | |
1204 | else | |
1205 | return CLO_REENQUEUED; | |
1206 | } | |
1207 | ||
1208 | LASSERT(equi(olck->ols_state >= OLS_UPCALL_RECEIVED && | |
7f1ae4c0 | 1209 | lock->cll_error == 0, olck->ols_lock)); |
d7e09d03 PT |
1210 | |
1211 | return lock->cll_error ?: olck->ols_state >= OLS_GRANTED ? 0 : CLO_WAIT; | |
1212 | } | |
1213 | ||
1214 | /** | |
1215 | * An implementation of cl_lock_operations::clo_use() method that pins cached | |
1216 | * lock. | |
1217 | */ | |
1218 | static int osc_lock_use(const struct lu_env *env, | |
1219 | const struct cl_lock_slice *slice) | |
1220 | { | |
1221 | struct osc_lock *olck = cl2osc_lock(slice); | |
1222 | int rc; | |
1223 | ||
1224 | LASSERT(!olck->ols_hold); | |
1225 | ||
1226 | /* | |
1227 | * Atomically check for LDLM_FL_CBPENDING and addref a lock if this | |
1228 | * flag is not set. This protects us from a concurrent blocking ast. | |
1229 | */ | |
1230 | rc = ldlm_lock_addref_try(&olck->ols_handle, olck->ols_einfo.ei_mode); | |
1231 | if (rc == 0) { | |
1232 | olck->ols_hold = 1; | |
1233 | olck->ols_state = OLS_GRANTED; | |
1234 | } else { | |
1235 | struct cl_lock *lock; | |
1236 | ||
1237 | /* | |
1238 | * Lock is being cancelled somewhere within | |
1239 | * ldlm_handle_bl_callback(): LDLM_FL_CBPENDING is already | |
1240 | * set, but osc_ldlm_blocking_ast() hasn't yet acquired | |
1241 | * cl_lock mutex. | |
1242 | */ | |
1243 | lock = slice->cls_lock; | |
1244 | LASSERT(lock->cll_state == CLS_INTRANSIT); | |
1245 | LASSERT(lock->cll_users > 0); | |
1246 | /* set a flag for osc_dlm_blocking_ast0() to signal the | |
30aa9c52 OD |
1247 | * lock. |
1248 | */ | |
d7e09d03 PT |
1249 | olck->ols_ast_wait = 1; |
1250 | rc = CLO_WAIT; | |
1251 | } | |
1252 | return rc; | |
1253 | } | |
1254 | ||
1255 | static int osc_lock_flush(struct osc_lock *ols, int discard) | |
1256 | { | |
29ac6840 CH |
1257 | struct cl_lock *lock = ols->ols_cl.cls_lock; |
1258 | struct cl_env_nest nest; | |
1259 | struct lu_env *env; | |
d7e09d03 | 1260 | int result = 0; |
d7e09d03 PT |
1261 | |
1262 | env = cl_env_nested_get(&nest); | |
1263 | if (!IS_ERR(env)) { | |
29ac6840 | 1264 | struct osc_object *obj = cl2osc(ols->ols_cl.cls_obj); |
d7e09d03 PT |
1265 | struct cl_lock_descr *descr = &lock->cll_descr; |
1266 | int rc = 0; | |
1267 | ||
1268 | if (descr->cld_mode >= CLM_WRITE) { | |
1269 | result = osc_cache_writeback_range(env, obj, | |
1270 | descr->cld_start, descr->cld_end, | |
1271 | 1, discard); | |
1272 | LDLM_DEBUG(ols->ols_lock, | |
1273 | "lock %p: %d pages were %s.\n", lock, result, | |
1274 | discard ? "discarded" : "written"); | |
1275 | if (result > 0) | |
1276 | result = 0; | |
1277 | } | |
1278 | ||
1279 | rc = cl_lock_discard_pages(env, lock); | |
1280 | if (result == 0 && rc < 0) | |
1281 | result = rc; | |
1282 | ||
1283 | cl_env_nested_put(&nest, env); | |
1284 | } else | |
1285 | result = PTR_ERR(env); | |
1286 | if (result == 0) { | |
1287 | ols->ols_flush = 1; | |
1288 | LINVRNT(!osc_lock_has_pages(ols)); | |
1289 | } | |
0a3bdb00 | 1290 | return result; |
d7e09d03 PT |
1291 | } |
1292 | ||
1293 | /** | |
1294 | * Implements cl_lock_operations::clo_cancel() method for osc layer. This is | |
1295 | * called (as part of cl_lock_cancel()) when lock is canceled either voluntary | |
1296 | * (LRU pressure, early cancellation, umount, etc.) or due to the conflict | |
1297 | * with some other lock some where in the cluster. This function does the | |
1298 | * following: | |
1299 | * | |
1300 | * - invalidates all pages protected by this lock (after sending dirty | |
1301 | * ones to the server, as necessary); | |
1302 | * | |
1303 | * - decref's underlying ldlm lock; | |
1304 | * | |
1305 | * - cancels ldlm lock (ldlm_cli_cancel()). | |
1306 | */ | |
1307 | static void osc_lock_cancel(const struct lu_env *env, | |
1308 | const struct cl_lock_slice *slice) | |
1309 | { | |
29ac6840 CH |
1310 | struct cl_lock *lock = slice->cls_lock; |
1311 | struct osc_lock *olck = cl2osc_lock(slice); | |
d7e09d03 | 1312 | struct ldlm_lock *dlmlock = olck->ols_lock; |
29ac6840 CH |
1313 | int result = 0; |
1314 | int discard; | |
d7e09d03 PT |
1315 | |
1316 | LASSERT(cl_lock_is_mutexed(lock)); | |
1317 | LINVRNT(osc_lock_invariant(olck)); | |
1318 | ||
7f1ae4c0 | 1319 | if (dlmlock) { |
d7e09d03 PT |
1320 | int do_cancel; |
1321 | ||
1322 | discard = !!(dlmlock->l_flags & LDLM_FL_DISCARD_DATA); | |
1323 | if (olck->ols_state >= OLS_GRANTED) | |
1324 | result = osc_lock_flush(olck, discard); | |
1325 | osc_lock_unhold(olck); | |
1326 | ||
1327 | lock_res_and_lock(dlmlock); | |
1328 | /* Now that we're the only user of dlm read/write reference, | |
1329 | * mostly the ->l_readers + ->l_writers should be zero. | |
1330 | * However, there is a corner case. | |
30aa9c52 OD |
1331 | * See bug 18829 for details. |
1332 | */ | |
d7e09d03 PT |
1333 | do_cancel = (dlmlock->l_readers == 0 && |
1334 | dlmlock->l_writers == 0); | |
1335 | dlmlock->l_flags |= LDLM_FL_CBPENDING; | |
1336 | unlock_res_and_lock(dlmlock); | |
1337 | if (do_cancel) | |
1338 | result = ldlm_cli_cancel(&olck->ols_handle, LCF_ASYNC); | |
1339 | if (result < 0) | |
1340 | CL_LOCK_DEBUG(D_ERROR, env, lock, | |
1341 | "lock %p cancel failure with error(%d)\n", | |
1342 | lock, result); | |
1343 | } | |
1344 | olck->ols_state = OLS_CANCELLED; | |
1345 | olck->ols_flags &= ~LDLM_FL_LVB_READY; | |
1346 | osc_lock_detach(env, olck); | |
1347 | } | |
1348 | ||
1349 | static int osc_lock_has_pages(struct osc_lock *olck) | |
1350 | { | |
1351 | return 0; | |
1352 | } | |
1353 | ||
1354 | static void osc_lock_delete(const struct lu_env *env, | |
1355 | const struct cl_lock_slice *slice) | |
1356 | { | |
1357 | struct osc_lock *olck; | |
1358 | ||
1359 | olck = cl2osc_lock(slice); | |
1360 | if (olck->ols_glimpse) { | |
1361 | LASSERT(!olck->ols_hold); | |
1362 | LASSERT(!olck->ols_lock); | |
1363 | return; | |
1364 | } | |
1365 | ||
1366 | LINVRNT(osc_lock_invariant(olck)); | |
1367 | LINVRNT(!osc_lock_has_pages(olck)); | |
1368 | ||
1369 | osc_lock_unhold(olck); | |
1370 | osc_lock_detach(env, olck); | |
1371 | } | |
1372 | ||
1373 | /** | |
1374 | * Implements cl_lock_operations::clo_state() method for osc layer. | |
1375 | * | |
1376 | * Maintains osc_lock::ols_owner field. | |
1377 | * | |
1378 | * This assumes that lock always enters CLS_HELD (from some other state) in | |
1379 | * the same IO context as one that requested the lock. This should not be a | |
1380 | * problem, because context is by definition shared by all activity pertaining | |
1381 | * to the same high-level IO. | |
1382 | */ | |
1383 | static void osc_lock_state(const struct lu_env *env, | |
1384 | const struct cl_lock_slice *slice, | |
1385 | enum cl_lock_state state) | |
1386 | { | |
1387 | struct osc_lock *lock = cl2osc_lock(slice); | |
1388 | ||
1389 | /* | |
1390 | * XXX multiple io contexts can use the lock at the same time. | |
1391 | */ | |
1392 | LINVRNT(osc_lock_invariant(lock)); | |
1393 | if (state == CLS_HELD && slice->cls_lock->cll_state != CLS_HELD) { | |
1394 | struct osc_io *oio = osc_env_io(env); | |
1395 | ||
7f1ae4c0 | 1396 | LASSERT(!lock->ols_owner); |
d7e09d03 PT |
1397 | lock->ols_owner = oio; |
1398 | } else if (state != CLS_HELD) | |
1399 | lock->ols_owner = NULL; | |
1400 | } | |
1401 | ||
1402 | static int osc_lock_print(const struct lu_env *env, void *cookie, | |
1403 | lu_printer_t p, const struct cl_lock_slice *slice) | |
1404 | { | |
1405 | struct osc_lock *lock = cl2osc_lock(slice); | |
1406 | ||
1407 | /* | |
1408 | * XXX print ldlm lock and einfo properly. | |
1409 | */ | |
55f5a824 | 1410 | (*p)(env, cookie, "%p %#16llx %#llx %d %p ", |
d7e09d03 PT |
1411 | lock->ols_lock, lock->ols_flags, lock->ols_handle.cookie, |
1412 | lock->ols_state, lock->ols_owner); | |
1413 | osc_lvb_print(env, cookie, p, &lock->ols_lvb); | |
1414 | return 0; | |
1415 | } | |
1416 | ||
1417 | static int osc_lock_fits_into(const struct lu_env *env, | |
1418 | const struct cl_lock_slice *slice, | |
1419 | const struct cl_lock_descr *need, | |
1420 | const struct cl_io *io) | |
1421 | { | |
1422 | struct osc_lock *ols = cl2osc_lock(slice); | |
1423 | ||
1424 | if (need->cld_enq_flags & CEF_NEVER) | |
1425 | return 0; | |
1426 | ||
1427 | if (ols->ols_state >= OLS_CANCELLED) | |
1428 | return 0; | |
1429 | ||
1430 | if (need->cld_mode == CLM_PHANTOM) { | |
1431 | if (ols->ols_agl) | |
1432 | return !(ols->ols_state > OLS_RELEASED); | |
1433 | ||
1434 | /* | |
1435 | * Note: the QUEUED lock can't be matched here, otherwise | |
1436 | * it might cause the deadlocks. | |
1437 | * In read_process, | |
1438 | * P1: enqueued read lock, create sublock1 | |
1439 | * P2: enqueued write lock, create sublock2(conflicted | |
1440 | * with sublock1). | |
1441 | * P1: Grant read lock. | |
1442 | * P1: enqueued glimpse lock(with holding sublock1_read), | |
1443 | * matched with sublock2, waiting sublock2 to be granted. | |
1444 | * But sublock2 can not be granted, because P1 | |
1445 | * will not release sublock1. Bang! | |
1446 | */ | |
1447 | if (ols->ols_state < OLS_GRANTED || | |
1448 | ols->ols_state > OLS_RELEASED) | |
1449 | return 0; | |
1450 | } else if (need->cld_enq_flags & CEF_MUST) { | |
1451 | /* | |
1452 | * If the lock hasn't ever enqueued, it can't be matched | |
1453 | * because enqueue process brings in many information | |
1454 | * which can be used to determine things such as lockless, | |
1455 | * CEF_MUST, etc. | |
1456 | */ | |
1457 | if (ols->ols_state < OLS_UPCALL_RECEIVED && | |
1458 | ols->ols_locklessable) | |
1459 | return 0; | |
1460 | } | |
1461 | return 1; | |
1462 | } | |
1463 | ||
1464 | static const struct cl_lock_operations osc_lock_ops = { | |
1465 | .clo_fini = osc_lock_fini, | |
1466 | .clo_enqueue = osc_lock_enqueue, | |
1467 | .clo_wait = osc_lock_wait, | |
1468 | .clo_unuse = osc_lock_unuse, | |
1469 | .clo_use = osc_lock_use, | |
1470 | .clo_delete = osc_lock_delete, | |
1471 | .clo_state = osc_lock_state, | |
1472 | .clo_cancel = osc_lock_cancel, | |
1473 | .clo_weigh = osc_lock_weigh, | |
1474 | .clo_print = osc_lock_print, | |
1475 | .clo_fits_into = osc_lock_fits_into, | |
1476 | }; | |
1477 | ||
1478 | static int osc_lock_lockless_unuse(const struct lu_env *env, | |
1479 | const struct cl_lock_slice *slice) | |
1480 | { | |
1481 | struct osc_lock *ols = cl2osc_lock(slice); | |
1482 | struct cl_lock *lock = slice->cls_lock; | |
1483 | ||
1484 | LASSERT(ols->ols_state == OLS_GRANTED); | |
1485 | LINVRNT(osc_lock_invariant(ols)); | |
1486 | ||
1487 | cl_lock_cancel(env, lock); | |
1488 | cl_lock_delete(env, lock); | |
1489 | return 0; | |
1490 | } | |
1491 | ||
1492 | static void osc_lock_lockless_cancel(const struct lu_env *env, | |
1493 | const struct cl_lock_slice *slice) | |
1494 | { | |
29ac6840 | 1495 | struct osc_lock *ols = cl2osc_lock(slice); |
d7e09d03 PT |
1496 | int result; |
1497 | ||
1498 | result = osc_lock_flush(ols, 0); | |
1499 | if (result) | |
1500 | CERROR("Pages for lockless lock %p were not purged(%d)\n", | |
1501 | ols, result); | |
1502 | ols->ols_state = OLS_CANCELLED; | |
1503 | } | |
1504 | ||
1505 | static int osc_lock_lockless_wait(const struct lu_env *env, | |
1506 | const struct cl_lock_slice *slice) | |
1507 | { | |
1508 | struct osc_lock *olck = cl2osc_lock(slice); | |
29ac6840 | 1509 | struct cl_lock *lock = olck->ols_cl.cls_lock; |
d7e09d03 PT |
1510 | |
1511 | LINVRNT(osc_lock_invariant(olck)); | |
1512 | LASSERT(olck->ols_state >= OLS_UPCALL_RECEIVED); | |
1513 | ||
1514 | return lock->cll_error; | |
1515 | } | |
1516 | ||
1517 | static void osc_lock_lockless_state(const struct lu_env *env, | |
1518 | const struct cl_lock_slice *slice, | |
1519 | enum cl_lock_state state) | |
1520 | { | |
1521 | struct osc_lock *lock = cl2osc_lock(slice); | |
1522 | ||
1523 | LINVRNT(osc_lock_invariant(lock)); | |
1524 | if (state == CLS_HELD) { | |
29ac6840 | 1525 | struct osc_io *oio = osc_env_io(env); |
d7e09d03 PT |
1526 | |
1527 | LASSERT(ergo(lock->ols_owner, lock->ols_owner == oio)); | |
1528 | lock->ols_owner = oio; | |
1529 | ||
1530 | /* set the io to be lockless if this lock is for io's | |
30aa9c52 OD |
1531 | * host object |
1532 | */ | |
d7e09d03 PT |
1533 | if (cl_object_same(oio->oi_cl.cis_obj, slice->cls_obj)) |
1534 | oio->oi_lockless = 1; | |
1535 | } | |
1536 | } | |
1537 | ||
1538 | static int osc_lock_lockless_fits_into(const struct lu_env *env, | |
1539 | const struct cl_lock_slice *slice, | |
1540 | const struct cl_lock_descr *need, | |
1541 | const struct cl_io *io) | |
1542 | { | |
1543 | struct osc_lock *lock = cl2osc_lock(slice); | |
1544 | ||
1545 | if (!(need->cld_enq_flags & CEF_NEVER)) | |
1546 | return 0; | |
1547 | ||
1548 | /* lockless lock should only be used by its owning io. b22147 */ | |
1549 | return (lock->ols_owner == osc_env_io(env)); | |
1550 | } | |
1551 | ||
1552 | static const struct cl_lock_operations osc_lock_lockless_ops = { | |
1553 | .clo_fini = osc_lock_fini, | |
1554 | .clo_enqueue = osc_lock_enqueue, | |
1555 | .clo_wait = osc_lock_lockless_wait, | |
1556 | .clo_unuse = osc_lock_lockless_unuse, | |
1557 | .clo_state = osc_lock_lockless_state, | |
1558 | .clo_fits_into = osc_lock_lockless_fits_into, | |
1559 | .clo_cancel = osc_lock_lockless_cancel, | |
1560 | .clo_print = osc_lock_print | |
1561 | }; | |
1562 | ||
1563 | int osc_lock_init(const struct lu_env *env, | |
1564 | struct cl_object *obj, struct cl_lock *lock, | |
1565 | const struct cl_io *unused) | |
1566 | { | |
1567 | struct osc_lock *clk; | |
1568 | int result; | |
1569 | ||
ccaabce1 | 1570 | clk = kmem_cache_alloc(osc_lock_kmem, GFP_NOFS | __GFP_ZERO); |
7f1ae4c0 | 1571 | if (clk) { |
d7e09d03 PT |
1572 | __u32 enqflags = lock->cll_descr.cld_enq_flags; |
1573 | ||
1574 | osc_lock_build_einfo(env, lock, clk, &clk->ols_einfo); | |
1575 | atomic_set(&clk->ols_pageref, 0); | |
1576 | clk->ols_state = OLS_NEW; | |
1577 | ||
1578 | clk->ols_flags = osc_enq2ldlm_flags(enqflags); | |
1579 | clk->ols_agl = !!(enqflags & CEF_AGL); | |
1580 | if (clk->ols_agl) | |
1581 | clk->ols_flags |= LDLM_FL_BLOCK_NOWAIT; | |
1582 | if (clk->ols_flags & LDLM_FL_HAS_INTENT) | |
1583 | clk->ols_glimpse = 1; | |
1584 | ||
1585 | cl_lock_slice_add(lock, &clk->ols_cl, obj, &osc_lock_ops); | |
1586 | ||
1587 | if (!(enqflags & CEF_MUST)) | |
1588 | /* try to convert this lock to a lockless lock */ | |
1589 | osc_lock_to_lockless(env, clk, (enqflags & CEF_NEVER)); | |
1590 | if (clk->ols_locklessable && !(enqflags & CEF_DISCARD_DATA)) | |
1591 | clk->ols_flags |= LDLM_FL_DENY_ON_CONTENTION; | |
1592 | ||
1593 | LDLM_DEBUG_NOLOCK("lock %p, osc lock %p, flags %llx\n", | |
1594 | lock, clk, clk->ols_flags); | |
1595 | ||
1596 | result = 0; | |
1597 | } else | |
1598 | result = -ENOMEM; | |
1599 | return result; | |
1600 | } | |
1601 | ||
1602 | int osc_dlm_lock_pageref(struct ldlm_lock *dlm) | |
1603 | { | |
1604 | struct osc_lock *olock; | |
29ac6840 | 1605 | int rc = 0; |
d7e09d03 PT |
1606 | |
1607 | spin_lock(&osc_ast_guard); | |
1608 | olock = dlm->l_ast_data; | |
1609 | /* | |
1610 | * there's a very rare race with osc_page_addref_lock(), but that | |
1611 | * doesn't matter because in the worst case we don't cancel a lock | |
1612 | * which we actually can, that's no harm. | |
1613 | */ | |
7f1ae4c0 | 1614 | if (olock && |
d7e09d03 PT |
1615 | atomic_add_return(_PAGEREF_MAGIC, |
1616 | &olock->ols_pageref) != _PAGEREF_MAGIC) { | |
1617 | atomic_sub(_PAGEREF_MAGIC, &olock->ols_pageref); | |
1618 | rc = 1; | |
1619 | } | |
1620 | spin_unlock(&osc_ast_guard); | |
1621 | return rc; | |
1622 | } | |
1623 | ||
1624 | /** @} osc */ |