fs: dlm: fix race between test_bit() and queue_work()
[linux-block.git] / fs / dlm / lock.c
CommitLineData
2522fe45 1// SPDX-License-Identifier: GPL-2.0-only
e7fd4179
DT
2/******************************************************************************
3*******************************************************************************
4**
7fe2b319 5** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
e7fd4179 6**
e7fd4179
DT
7**
8*******************************************************************************
9******************************************************************************/
10
11/* Central locking logic has four stages:
12
13 dlm_lock()
14 dlm_unlock()
15
16 request_lock(ls, lkb)
17 convert_lock(ls, lkb)
18 unlock_lock(ls, lkb)
19 cancel_lock(ls, lkb)
20
21 _request_lock(r, lkb)
22 _convert_lock(r, lkb)
23 _unlock_lock(r, lkb)
24 _cancel_lock(r, lkb)
25
26 do_request(r, lkb)
27 do_convert(r, lkb)
28 do_unlock(r, lkb)
29 do_cancel(r, lkb)
30
31 Stage 1 (lock, unlock) is mainly about checking input args and
32 splitting into one of the four main operations:
33
34 dlm_lock = request_lock
35 dlm_lock+CONVERT = convert_lock
36 dlm_unlock = unlock_lock
37 dlm_unlock+CANCEL = cancel_lock
38
39 Stage 2, xxxx_lock(), just finds and locks the relevant rsb which is
40 provided to the next stage.
41
42 Stage 3, _xxxx_lock(), determines if the operation is local or remote.
43 When remote, it calls send_xxxx(), when local it calls do_xxxx().
44
45 Stage 4, do_xxxx(), is the guts of the operation. It manipulates the
46 given rsb and lkb and queues callbacks.
47
48 For remote operations, send_xxxx() results in the corresponding do_xxxx()
49 function being executed on the remote node. The connecting send/receive
50 calls on local (L) and remote (R) nodes:
51
52 L: send_xxxx() -> R: receive_xxxx()
53 R: do_xxxx()
54 L: receive_xxxx_reply() <- R: send_xxxx_reply()
55*/
f1d3b8f9
AA
56#include <trace/events/dlm.h>
57
597d0cae 58#include <linux/types.h>
9beb3bf5 59#include <linux/rbtree.h>
5a0e3ad6 60#include <linux/slab.h>
e7fd4179 61#include "dlm_internal.h"
597d0cae 62#include <linux/dlm_device.h>
e7fd4179 63#include "memory.h"
a070a91c 64#include "midcomms.h"
e7fd4179
DT
65#include "requestqueue.h"
66#include "util.h"
67#include "dir.h"
68#include "member.h"
69#include "lockspace.h"
70#include "ast.h"
71#include "lock.h"
72#include "rcom.h"
73#include "recover.h"
74#include "lvb_table.h"
597d0cae 75#include "user.h"
e7fd4179
DT
76#include "config.h"
77
78static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb);
79static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb);
80static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb);
81static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb);
82static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb);
83static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
84static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
85static int send_remove(struct dlm_rsb *r);
86static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
3ae1acf9 87static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
e7fd4179
DT
88static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
89 struct dlm_message *ms);
90static int receive_extralen(struct dlm_message *ms);
8499137d 91static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
3ae1acf9 92static void del_timeout(struct dlm_lkb *lkb);
c04fecb4 93static void toss_rsb(struct kref *kref);
e7fd4179
DT
94
95/*
96 * Lock compatibilty matrix - thanks Steve
97 * UN = Unlocked state. Not really a state, used as a flag
98 * PD = Padding. Used to make the matrix a nice power of two in size
99 * Other states are the same as the VMS DLM.
100 * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same)
101 */
102
103static const int __dlm_compat_matrix[8][8] = {
104 /* UN NL CR CW PR PW EX PD */
105 {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
106 {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
107 {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
108 {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
109 {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
110 {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
111 {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
112 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
113};
114
115/*
116 * This defines the direction of transfer of LVB data.
117 * Granted mode is the row; requested mode is the column.
118 * Usage: matrix[grmode+1][rqmode+1]
119 * 1 = LVB is returned to the caller
120 * 0 = LVB is written to the resource
121 * -1 = nothing happens to the LVB
122 */
123
124const int dlm_lvb_operations[8][8] = {
125 /* UN NL CR CW PR PW EX PD*/
126 { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */
127 { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */
128 { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */
129 { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */
130 { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */
131 { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */
132 { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */
133 { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */
134};
e7fd4179
DT
135
136#define modes_compat(gr, rq) \
137 __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1]
138
139int dlm_modes_compat(int mode1, int mode2)
140{
141 return __dlm_compat_matrix[mode1 + 1][mode2 + 1];
142}
143
144/*
145 * Compatibility matrix for conversions with QUECVT set.
146 * Granted mode is the row; requested mode is the column.
147 * Usage: matrix[grmode+1][rqmode+1]
148 */
149
150static const int __quecvt_compat_matrix[8][8] = {
151 /* UN NL CR CW PR PW EX PD */
152 {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */
153 {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */
154 {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */
155 {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */
156 {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */
157 {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */
158 {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */
159 {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
160};
161
597d0cae 162void dlm_print_lkb(struct dlm_lkb *lkb)
e7fd4179 163{
6d40c4a7 164 printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x "
4875647a 165 "sts %d rq %d gr %d wait_type %d wait_nodeid %d seq %llu\n",
e7fd4179
DT
166 lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
167 lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
4875647a
DT
168 lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_wait_nodeid,
169 (unsigned long long)lkb->lkb_recover_seq);
e7fd4179
DT
170}
171
170e19ab 172static void dlm_print_rsb(struct dlm_rsb *r)
e7fd4179 173{
c04fecb4
DT
174 printk(KERN_ERR "rsb: nodeid %d master %d dir %d flags %lx first %x "
175 "rlc %d name %s\n",
176 r->res_nodeid, r->res_master_nodeid, r->res_dir_nodeid,
177 r->res_flags, r->res_first_lkid, r->res_recover_locks_count,
178 r->res_name);
e7fd4179
DT
179}
180
a345da3e
DT
181void dlm_dump_rsb(struct dlm_rsb *r)
182{
183 struct dlm_lkb *lkb;
184
185 dlm_print_rsb(r);
186
187 printk(KERN_ERR "rsb: root_list empty %d recover_list empty %d\n",
188 list_empty(&r->res_root_list), list_empty(&r->res_recover_list));
189 printk(KERN_ERR "rsb lookup list\n");
190 list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup)
191 dlm_print_lkb(lkb);
192 printk(KERN_ERR "rsb grant queue:\n");
193 list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
194 dlm_print_lkb(lkb);
195 printk(KERN_ERR "rsb convert queue:\n");
196 list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
197 dlm_print_lkb(lkb);
198 printk(KERN_ERR "rsb wait queue:\n");
199 list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
200 dlm_print_lkb(lkb);
201}
202
e7fd4179
DT
203/* Threads cannot use the lockspace while it's being recovered */
204
85e86edf 205static inline void dlm_lock_recovery(struct dlm_ls *ls)
e7fd4179
DT
206{
207 down_read(&ls->ls_in_recovery);
208}
209
85e86edf 210void dlm_unlock_recovery(struct dlm_ls *ls)
e7fd4179
DT
211{
212 up_read(&ls->ls_in_recovery);
213}
214
85e86edf 215int dlm_lock_recovery_try(struct dlm_ls *ls)
e7fd4179
DT
216{
217 return down_read_trylock(&ls->ls_in_recovery);
218}
219
220static inline int can_be_queued(struct dlm_lkb *lkb)
221{
222 return !(lkb->lkb_exflags & DLM_LKF_NOQUEUE);
223}
224
225static inline int force_blocking_asts(struct dlm_lkb *lkb)
226{
227 return (lkb->lkb_exflags & DLM_LKF_NOQUEUEBAST);
228}
229
230static inline int is_demoted(struct dlm_lkb *lkb)
231{
232 return (lkb->lkb_sbflags & DLM_SBF_DEMOTED);
233}
234
7d3c1feb
DT
235static inline int is_altmode(struct dlm_lkb *lkb)
236{
237 return (lkb->lkb_sbflags & DLM_SBF_ALTMODE);
238}
239
240static inline int is_granted(struct dlm_lkb *lkb)
241{
242 return (lkb->lkb_status == DLM_LKSTS_GRANTED);
243}
244
e7fd4179
DT
245static inline int is_remote(struct dlm_rsb *r)
246{
247 DLM_ASSERT(r->res_nodeid >= 0, dlm_print_rsb(r););
248 return !!r->res_nodeid;
249}
250
251static inline int is_process_copy(struct dlm_lkb *lkb)
252{
253 return (lkb->lkb_nodeid && !(lkb->lkb_flags & DLM_IFL_MSTCPY));
254}
255
256static inline int is_master_copy(struct dlm_lkb *lkb)
257{
90135925 258 return (lkb->lkb_flags & DLM_IFL_MSTCPY) ? 1 : 0;
e7fd4179
DT
259}
260
261static inline int middle_conversion(struct dlm_lkb *lkb)
262{
263 if ((lkb->lkb_grmode==DLM_LOCK_PR && lkb->lkb_rqmode==DLM_LOCK_CW) ||
264 (lkb->lkb_rqmode==DLM_LOCK_PR && lkb->lkb_grmode==DLM_LOCK_CW))
90135925
DT
265 return 1;
266 return 0;
e7fd4179
DT
267}
268
269static inline int down_conversion(struct dlm_lkb *lkb)
270{
271 return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
272}
273
ef0c2bb0
DT
274static inline int is_overlap_unlock(struct dlm_lkb *lkb)
275{
276 return lkb->lkb_flags & DLM_IFL_OVERLAP_UNLOCK;
277}
278
279static inline int is_overlap_cancel(struct dlm_lkb *lkb)
280{
281 return lkb->lkb_flags & DLM_IFL_OVERLAP_CANCEL;
282}
283
284static inline int is_overlap(struct dlm_lkb *lkb)
285{
286 return (lkb->lkb_flags & (DLM_IFL_OVERLAP_UNLOCK |
287 DLM_IFL_OVERLAP_CANCEL));
288}
289
e7fd4179
DT
290static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
291{
292 if (is_master_copy(lkb))
293 return;
294
3ae1acf9
DT
295 del_timeout(lkb);
296
e7fd4179
DT
297 DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
298
6b0afc0c 299#ifdef CONFIG_DLM_DEPRECATED_API
3ae1acf9
DT
300 /* if the operation was a cancel, then return -DLM_ECANCEL, if a
301 timeout caused the cancel then return -ETIMEDOUT */
302 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
303 lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
304 rv = -ETIMEDOUT;
305 }
6b0afc0c 306#endif
3ae1acf9 307
8b4021fa
DT
308 if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
309 lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
310 rv = -EDEADLK;
311 }
312
23e8e1aa 313 dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
e7fd4179
DT
314}
315
ef0c2bb0
DT
316static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
317{
318 queue_cast(r, lkb,
319 is_overlap_unlock(lkb) ? -DLM_EUNLOCK : -DLM_ECANCEL);
320}
321
e7fd4179
DT
322static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
323{
b6fa8796 324 if (is_master_copy(lkb)) {
e7fd4179 325 send_bast(r, lkb, rqmode);
b6fa8796 326 } else {
23e8e1aa 327 dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
b6fa8796 328 }
e7fd4179
DT
329}
330
331/*
332 * Basic operations on rsb's and lkb's
333 */
334
c04fecb4
DT
335/* This is only called to add a reference when the code already holds
336 a valid reference to the rsb, so there's no need for locking. */
337
338static inline void hold_rsb(struct dlm_rsb *r)
339{
340 kref_get(&r->res_ref);
341}
342
343void dlm_hold_rsb(struct dlm_rsb *r)
344{
345 hold_rsb(r);
346}
347
348/* When all references to the rsb are gone it's transferred to
349 the tossed list for later disposal. */
350
351static void put_rsb(struct dlm_rsb *r)
352{
353 struct dlm_ls *ls = r->res_ls;
354 uint32_t bucket = r->res_bucket;
9502a7f6 355 int rv;
c04fecb4 356
9502a7f6
AA
357 rv = kref_put_lock(&r->res_ref, toss_rsb,
358 &ls->ls_rsbtbl[bucket].lock);
359 if (rv)
360 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
c04fecb4
DT
361}
362
363void dlm_put_rsb(struct dlm_rsb *r)
364{
365 put_rsb(r);
366}
367
3881ac04
DT
368static int pre_rsb_struct(struct dlm_ls *ls)
369{
370 struct dlm_rsb *r1, *r2;
371 int count = 0;
372
373 spin_lock(&ls->ls_new_rsb_spin);
374 if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
375 spin_unlock(&ls->ls_new_rsb_spin);
376 return 0;
377 }
378 spin_unlock(&ls->ls_new_rsb_spin);
379
380 r1 = dlm_allocate_rsb(ls);
381 r2 = dlm_allocate_rsb(ls);
382
383 spin_lock(&ls->ls_new_rsb_spin);
384 if (r1) {
385 list_add(&r1->res_hashchain, &ls->ls_new_rsb);
386 ls->ls_new_rsb_count++;
387 }
388 if (r2) {
389 list_add(&r2->res_hashchain, &ls->ls_new_rsb);
390 ls->ls_new_rsb_count++;
391 }
392 count = ls->ls_new_rsb_count;
393 spin_unlock(&ls->ls_new_rsb_spin);
394
395 if (!count)
396 return -ENOMEM;
397 return 0;
398}
399
400/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
401 unlock any spinlocks, go back and call pre_rsb_struct again.
402 Otherwise, take an rsb off the list and return it. */
403
404static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
405 struct dlm_rsb **r_ret)
e7fd4179
DT
406{
407 struct dlm_rsb *r;
3881ac04
DT
408 int count;
409
410 spin_lock(&ls->ls_new_rsb_spin);
411 if (list_empty(&ls->ls_new_rsb)) {
412 count = ls->ls_new_rsb_count;
413 spin_unlock(&ls->ls_new_rsb_spin);
414 log_debug(ls, "find_rsb retry %d %d %s",
415 count, dlm_config.ci_new_rsb_count, name);
416 return -EAGAIN;
417 }
e7fd4179 418
3881ac04
DT
419 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
420 list_del(&r->res_hashchain);
9beb3bf5
BP
421 /* Convert the empty list_head to a NULL rb_node for tree usage: */
422 memset(&r->res_hashnode, 0, sizeof(struct rb_node));
3881ac04
DT
423 ls->ls_new_rsb_count--;
424 spin_unlock(&ls->ls_new_rsb_spin);
e7fd4179
DT
425
426 r->res_ls = ls;
427 r->res_length = len;
428 memcpy(r->res_name, name, len);
90135925 429 mutex_init(&r->res_mutex);
e7fd4179
DT
430
431 INIT_LIST_HEAD(&r->res_lookup);
432 INIT_LIST_HEAD(&r->res_grantqueue);
433 INIT_LIST_HEAD(&r->res_convertqueue);
434 INIT_LIST_HEAD(&r->res_waitqueue);
435 INIT_LIST_HEAD(&r->res_root_list);
436 INIT_LIST_HEAD(&r->res_recover_list);
437
3881ac04
DT
438 *r_ret = r;
439 return 0;
e7fd4179
DT
440}
441
9beb3bf5
BP
442static int rsb_cmp(struct dlm_rsb *r, const char *name, int nlen)
443{
444 char maxname[DLM_RESNAME_MAXLEN];
445
446 memset(maxname, 0, DLM_RESNAME_MAXLEN);
447 memcpy(maxname, name, nlen);
448 return memcmp(r->res_name, maxname, DLM_RESNAME_MAXLEN);
449}
450
7210cb7a 451int dlm_search_rsb_tree(struct rb_root *tree, char *name, int len,
c04fecb4 452 struct dlm_rsb **r_ret)
e7fd4179 453{
9beb3bf5 454 struct rb_node *node = tree->rb_node;
e7fd4179 455 struct dlm_rsb *r;
9beb3bf5
BP
456 int rc;
457
458 while (node) {
459 r = rb_entry(node, struct dlm_rsb, res_hashnode);
460 rc = rsb_cmp(r, name, len);
461 if (rc < 0)
462 node = node->rb_left;
463 else if (rc > 0)
464 node = node->rb_right;
465 else
e7fd4179
DT
466 goto found;
467 }
18c60c0a 468 *r_ret = NULL;
597d0cae 469 return -EBADR;
e7fd4179
DT
470
471 found:
e7fd4179 472 *r_ret = r;
c04fecb4 473 return 0;
e7fd4179
DT
474}
475
9beb3bf5
BP
476static int rsb_insert(struct dlm_rsb *rsb, struct rb_root *tree)
477{
478 struct rb_node **newn = &tree->rb_node;
479 struct rb_node *parent = NULL;
480 int rc;
481
482 while (*newn) {
483 struct dlm_rsb *cur = rb_entry(*newn, struct dlm_rsb,
484 res_hashnode);
485
486 parent = *newn;
487 rc = rsb_cmp(cur, rsb->res_name, rsb->res_length);
488 if (rc < 0)
489 newn = &parent->rb_left;
490 else if (rc > 0)
491 newn = &parent->rb_right;
492 else {
493 log_print("rsb_insert match");
494 dlm_dump_rsb(rsb);
495 dlm_dump_rsb(cur);
496 return -EEXIST;
497 }
498 }
499
500 rb_link_node(&rsb->res_hashnode, parent, newn);
501 rb_insert_color(&rsb->res_hashnode, tree);
502 return 0;
503}
504
c04fecb4
DT
505/*
506 * Find rsb in rsbtbl and potentially create/add one
507 *
508 * Delaying the release of rsb's has a similar benefit to applications keeping
509 * NL locks on an rsb, but without the guarantee that the cached master value
510 * will still be valid when the rsb is reused. Apps aren't always smart enough
511 * to keep NL locks on an rsb that they may lock again shortly; this can lead
512 * to excessive master lookups and removals if we don't delay the release.
513 *
514 * Searching for an rsb means looking through both the normal list and toss
515 * list. When found on the toss list the rsb is moved to the normal list with
516 * ref count of 1; when found on normal list the ref count is incremented.
517 *
518 * rsb's on the keep list are being used locally and refcounted.
519 * rsb's on the toss list are not being used locally, and are not refcounted.
520 *
521 * The toss list rsb's were either
522 * - previously used locally but not any more (were on keep list, then
523 * moved to toss list when last refcount dropped)
524 * - created and put on toss list as a directory record for a lookup
525 * (we are the dir node for the res, but are not using the res right now,
526 * but some other node is)
527 *
528 * The purpose of find_rsb() is to return a refcounted rsb for local use.
529 * So, if the given rsb is on the toss list, it is moved to the keep list
530 * before being returned.
531 *
532 * toss_rsb() happens when all local usage of the rsb is done, i.e. no
533 * more refcounts exist, so the rsb is moved from the keep list to the
534 * toss list.
535 *
536 * rsb's on both keep and toss lists are used for doing a name to master
537 * lookups. rsb's that are in use locally (and being refcounted) are on
538 * the keep list, rsb's that are not in use locally (not refcounted) and
539 * only exist for name/master lookups are on the toss list.
540 *
541 * rsb's on the toss list who's dir_nodeid is not local can have stale
542 * name/master mappings. So, remote requests on such rsb's can potentially
543 * return with an error, which means the mapping is stale and needs to
544 * be updated with a new lookup. (The idea behind MASTER UNCERTAIN and
545 * first_lkid is to keep only a single outstanding request on an rsb
546 * while that rsb has a potentially stale master.)
547 */
548
549static int find_rsb_dir(struct dlm_ls *ls, char *name, int len,
550 uint32_t hash, uint32_t b,
551 int dir_nodeid, int from_nodeid,
552 unsigned int flags, struct dlm_rsb **r_ret)
e7fd4179 553{
c04fecb4
DT
554 struct dlm_rsb *r = NULL;
555 int our_nodeid = dlm_our_nodeid();
556 int from_local = 0;
557 int from_other = 0;
558 int from_dir = 0;
559 int create = 0;
e7fd4179
DT
560 int error;
561
c04fecb4
DT
562 if (flags & R_RECEIVE_REQUEST) {
563 if (from_nodeid == dir_nodeid)
564 from_dir = 1;
565 else
566 from_other = 1;
567 } else if (flags & R_REQUEST) {
568 from_local = 1;
569 }
570
571 /*
572 * flags & R_RECEIVE_RECOVER is from dlm_recover_master_copy, so
573 * from_nodeid has sent us a lock in dlm_recover_locks, believing
574 * we're the new master. Our local recovery may not have set
575 * res_master_nodeid to our_nodeid yet, so allow either. Don't
576 * create the rsb; dlm_recover_process_copy() will handle EBADR
577 * by resending.
578 *
579 * If someone sends us a request, we are the dir node, and we do
580 * not find the rsb anywhere, then recreate it. This happens if
581 * someone sends us a request after we have removed/freed an rsb
582 * from our toss list. (They sent a request instead of lookup
583 * because they are using an rsb from their toss list.)
584 */
585
586 if (from_local || from_dir ||
587 (from_other && (dir_nodeid == our_nodeid))) {
588 create = 1;
e7fd4179 589 }
57638bf3 590
c04fecb4
DT
591 retry:
592 if (create) {
593 error = pre_rsb_struct(ls);
594 if (error < 0)
595 goto out;
596 }
597
598 spin_lock(&ls->ls_rsbtbl[b].lock);
599
600 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
e7fd4179 601 if (error)
c04fecb4
DT
602 goto do_toss;
603
604 /*
605 * rsb is active, so we can't check master_nodeid without lock_rsb.
606 */
e7fd4179 607
c04fecb4 608 kref_get(&r->res_ref);
c04fecb4
DT
609 goto out_unlock;
610
611
612 do_toss:
613 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
9beb3bf5 614 if (error)
c04fecb4 615 goto do_new;
e7fd4179 616
c04fecb4
DT
617 /*
618 * rsb found inactive (master_nodeid may be out of date unless
619 * we are the dir_nodeid or were the master) No other thread
620 * is using this rsb because it's on the toss list, so we can
621 * look at or update res_master_nodeid without lock_rsb.
622 */
e7fd4179 623
c04fecb4
DT
624 if ((r->res_master_nodeid != our_nodeid) && from_other) {
625 /* our rsb was not master, and another node (not the dir node)
626 has sent us a request */
627 log_debug(ls, "find_rsb toss from_other %d master %d dir %d %s",
628 from_nodeid, r->res_master_nodeid, dir_nodeid,
629 r->res_name);
630 error = -ENOTBLK;
631 goto out_unlock;
632 }
633
634 if ((r->res_master_nodeid != our_nodeid) && from_dir) {
635 /* don't think this should ever happen */
636 log_error(ls, "find_rsb toss from_dir %d master %d",
637 from_nodeid, r->res_master_nodeid);
638 dlm_print_rsb(r);
639 /* fix it and go on */
640 r->res_master_nodeid = our_nodeid;
641 r->res_nodeid = 0;
e7fd4179
DT
642 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
643 r->res_first_lkid = 0;
c04fecb4
DT
644 }
645
646 if (from_local && (r->res_master_nodeid != our_nodeid)) {
647 /* Because we have held no locks on this rsb,
648 res_master_nodeid could have become stale. */
e7fd4179
DT
649 rsb_set_flag(r, RSB_MASTER_UNCERTAIN);
650 r->res_first_lkid = 0;
c04fecb4
DT
651 }
652
653 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
654 error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
655 goto out_unlock;
656
657
658 do_new:
659 /*
660 * rsb not found
661 */
662
663 if (error == -EBADR && !create)
664 goto out_unlock;
665
666 error = get_rsb_struct(ls, name, len, &r);
667 if (error == -EAGAIN) {
668 spin_unlock(&ls->ls_rsbtbl[b].lock);
669 goto retry;
670 }
671 if (error)
672 goto out_unlock;
673
674 r->res_hash = hash;
675 r->res_bucket = b;
676 r->res_dir_nodeid = dir_nodeid;
677 kref_init(&r->res_ref);
678
679 if (from_dir) {
680 /* want to see how often this happens */
681 log_debug(ls, "find_rsb new from_dir %d recreate %s",
682 from_nodeid, r->res_name);
683 r->res_master_nodeid = our_nodeid;
684 r->res_nodeid = 0;
685 goto out_add;
686 }
687
688 if (from_other && (dir_nodeid != our_nodeid)) {
689 /* should never happen */
690 log_error(ls, "find_rsb new from_other %d dir %d our %d %s",
691 from_nodeid, dir_nodeid, our_nodeid, r->res_name);
692 dlm_free_rsb(r);
e8243f32 693 r = NULL;
c04fecb4
DT
694 error = -ENOTBLK;
695 goto out_unlock;
696 }
697
698 if (from_other) {
699 log_debug(ls, "find_rsb new from_other %d dir %d %s",
700 from_nodeid, dir_nodeid, r->res_name);
701 }
702
703 if (dir_nodeid == our_nodeid) {
704 /* When we are the dir nodeid, we can set the master
705 node immediately */
706 r->res_master_nodeid = our_nodeid;
707 r->res_nodeid = 0;
e7fd4179 708 } else {
c04fecb4
DT
709 /* set_master will send_lookup to dir_nodeid */
710 r->res_master_nodeid = 0;
711 r->res_nodeid = -1;
712 }
713
714 out_add:
715 error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
716 out_unlock:
717 spin_unlock(&ls->ls_rsbtbl[b].lock);
718 out:
719 *r_ret = r;
720 return error;
721}
722
723/* During recovery, other nodes can send us new MSTCPY locks (from
724 dlm_recover_locks) before we've made ourself master (in
725 dlm_recover_masters). */
726
727static int find_rsb_nodir(struct dlm_ls *ls, char *name, int len,
728 uint32_t hash, uint32_t b,
729 int dir_nodeid, int from_nodeid,
730 unsigned int flags, struct dlm_rsb **r_ret)
731{
732 struct dlm_rsb *r = NULL;
733 int our_nodeid = dlm_our_nodeid();
734 int recover = (flags & R_RECEIVE_RECOVER);
735 int error;
736
737 retry:
738 error = pre_rsb_struct(ls);
739 if (error < 0)
740 goto out;
741
742 spin_lock(&ls->ls_rsbtbl[b].lock);
743
744 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
745 if (error)
746 goto do_toss;
747
748 /*
749 * rsb is active, so we can't check master_nodeid without lock_rsb.
750 */
751
752 kref_get(&r->res_ref);
753 goto out_unlock;
754
755
756 do_toss:
757 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
758 if (error)
759 goto do_new;
760
761 /*
762 * rsb found inactive. No other thread is using this rsb because
763 * it's on the toss list, so we can look at or update
764 * res_master_nodeid without lock_rsb.
765 */
766
767 if (!recover && (r->res_master_nodeid != our_nodeid) && from_nodeid) {
768 /* our rsb is not master, and another node has sent us a
769 request; this should never happen */
770 log_error(ls, "find_rsb toss from_nodeid %d master %d dir %d",
771 from_nodeid, r->res_master_nodeid, dir_nodeid);
772 dlm_print_rsb(r);
773 error = -ENOTBLK;
774 goto out_unlock;
e7fd4179 775 }
c04fecb4
DT
776
777 if (!recover && (r->res_master_nodeid != our_nodeid) &&
778 (dir_nodeid == our_nodeid)) {
779 /* our rsb is not master, and we are dir; may as well fix it;
780 this should never happen */
781 log_error(ls, "find_rsb toss our %d master %d dir %d",
782 our_nodeid, r->res_master_nodeid, dir_nodeid);
783 dlm_print_rsb(r);
784 r->res_master_nodeid = our_nodeid;
785 r->res_nodeid = 0;
786 }
787
788 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
789 error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
790 goto out_unlock;
791
792
793 do_new:
794 /*
795 * rsb not found
796 */
797
798 error = get_rsb_struct(ls, name, len, &r);
799 if (error == -EAGAIN) {
800 spin_unlock(&ls->ls_rsbtbl[b].lock);
801 goto retry;
802 }
803 if (error)
804 goto out_unlock;
805
806 r->res_hash = hash;
807 r->res_bucket = b;
808 r->res_dir_nodeid = dir_nodeid;
809 r->res_master_nodeid = dir_nodeid;
810 r->res_nodeid = (dir_nodeid == our_nodeid) ? 0 : dir_nodeid;
811 kref_init(&r->res_ref);
812
813 error = rsb_insert(r, &ls->ls_rsbtbl[b].keep);
814 out_unlock:
815 spin_unlock(&ls->ls_rsbtbl[b].lock);
e7fd4179
DT
816 out:
817 *r_ret = r;
818 return error;
819}
820
c04fecb4
DT
821static int find_rsb(struct dlm_ls *ls, char *name, int len, int from_nodeid,
822 unsigned int flags, struct dlm_rsb **r_ret)
823{
824 uint32_t hash, b;
825 int dir_nodeid;
826
827 if (len > DLM_RESNAME_MAXLEN)
828 return -EINVAL;
829
830 hash = jhash(name, len, 0);
831 b = hash & (ls->ls_rsbtbl_size - 1);
832
833 dir_nodeid = dlm_hash2nodeid(ls, hash);
834
835 if (dlm_no_directory(ls))
836 return find_rsb_nodir(ls, name, len, hash, b, dir_nodeid,
837 from_nodeid, flags, r_ret);
838 else
839 return find_rsb_dir(ls, name, len, hash, b, dir_nodeid,
840 from_nodeid, flags, r_ret);
841}
842
843/* we have received a request and found that res_master_nodeid != our_nodeid,
844 so we need to return an error or make ourself the master */
845
846static int validate_master_nodeid(struct dlm_ls *ls, struct dlm_rsb *r,
847 int from_nodeid)
848{
849 if (dlm_no_directory(ls)) {
850 log_error(ls, "find_rsb keep from_nodeid %d master %d dir %d",
851 from_nodeid, r->res_master_nodeid,
852 r->res_dir_nodeid);
853 dlm_print_rsb(r);
854 return -ENOTBLK;
855 }
856
857 if (from_nodeid != r->res_dir_nodeid) {
858 /* our rsb is not master, and another node (not the dir node)
859 has sent us a request. this is much more common when our
860 master_nodeid is zero, so limit debug to non-zero. */
861
862 if (r->res_master_nodeid) {
863 log_debug(ls, "validate master from_other %d master %d "
864 "dir %d first %x %s", from_nodeid,
865 r->res_master_nodeid, r->res_dir_nodeid,
866 r->res_first_lkid, r->res_name);
867 }
868 return -ENOTBLK;
869 } else {
870 /* our rsb is not master, but the dir nodeid has sent us a
871 request; this could happen with master 0 / res_nodeid -1 */
872
873 if (r->res_master_nodeid) {
874 log_error(ls, "validate master from_dir %d master %d "
875 "first %x %s",
876 from_nodeid, r->res_master_nodeid,
877 r->res_first_lkid, r->res_name);
878 }
879
880 r->res_master_nodeid = dlm_our_nodeid();
881 r->res_nodeid = 0;
882 return 0;
883 }
884}
885
40159748
AA
886static void __dlm_master_lookup(struct dlm_ls *ls, struct dlm_rsb *r, int our_nodeid,
887 int from_nodeid, bool toss_list, unsigned int flags,
888 int *r_nodeid, int *result)
889{
890 int fix_master = (flags & DLM_LU_RECOVER_MASTER);
891 int from_master = (flags & DLM_LU_RECOVER_DIR);
892
893 if (r->res_dir_nodeid != our_nodeid) {
894 /* should not happen, but may as well fix it and carry on */
895 log_error(ls, "%s res_dir %d our %d %s", __func__,
896 r->res_dir_nodeid, our_nodeid, r->res_name);
897 r->res_dir_nodeid = our_nodeid;
898 }
899
900 if (fix_master && dlm_is_removed(ls, r->res_master_nodeid)) {
901 /* Recovery uses this function to set a new master when
902 * the previous master failed. Setting NEW_MASTER will
903 * force dlm_recover_masters to call recover_master on this
904 * rsb even though the res_nodeid is no longer removed.
905 */
906
907 r->res_master_nodeid = from_nodeid;
908 r->res_nodeid = from_nodeid;
909 rsb_set_flag(r, RSB_NEW_MASTER);
910
911 if (toss_list) {
912 /* I don't think we should ever find it on toss list. */
913 log_error(ls, "%s fix_master on toss", __func__);
914 dlm_dump_rsb(r);
915 }
916 }
917
918 if (from_master && (r->res_master_nodeid != from_nodeid)) {
919 /* this will happen if from_nodeid became master during
920 * a previous recovery cycle, and we aborted the previous
921 * cycle before recovering this master value
922 */
923
924 log_limit(ls, "%s from_master %d master_nodeid %d res_nodeid %d first %x %s",
925 __func__, from_nodeid, r->res_master_nodeid,
926 r->res_nodeid, r->res_first_lkid, r->res_name);
927
928 if (r->res_master_nodeid == our_nodeid) {
929 log_error(ls, "from_master %d our_master", from_nodeid);
930 dlm_dump_rsb(r);
931 goto ret_assign;
932 }
933
934 r->res_master_nodeid = from_nodeid;
935 r->res_nodeid = from_nodeid;
936 rsb_set_flag(r, RSB_NEW_MASTER);
937 }
938
939 if (!r->res_master_nodeid) {
940 /* this will happen if recovery happens while we're looking
941 * up the master for this rsb
942 */
943
944 log_debug(ls, "%s master 0 to %d first %x %s", __func__,
945 from_nodeid, r->res_first_lkid, r->res_name);
946 r->res_master_nodeid = from_nodeid;
947 r->res_nodeid = from_nodeid;
948 }
949
950 if (!from_master && !fix_master &&
951 (r->res_master_nodeid == from_nodeid)) {
952 /* this can happen when the master sends remove, the dir node
953 * finds the rsb on the keep list and ignores the remove,
954 * and the former master sends a lookup
955 */
956
957 log_limit(ls, "%s from master %d flags %x first %x %s",
958 __func__, from_nodeid, flags, r->res_first_lkid,
959 r->res_name);
960 }
961
962 ret_assign:
963 *r_nodeid = r->res_master_nodeid;
964 if (result)
965 *result = DLM_LU_MATCH;
966}
967
e7fd4179 968/*
c04fecb4
DT
969 * We're the dir node for this res and another node wants to know the
970 * master nodeid. During normal operation (non recovery) this is only
971 * called from receive_lookup(); master lookups when the local node is
972 * the dir node are done by find_rsb().
e7fd4179 973 *
c04fecb4
DT
974 * normal operation, we are the dir node for a resource
975 * . _request_lock
976 * . set_master
977 * . send_lookup
978 * . receive_lookup
979 * . dlm_master_lookup flags 0
e7fd4179 980 *
c04fecb4
DT
981 * recover directory, we are rebuilding dir for all resources
982 * . dlm_recover_directory
983 * . dlm_rcom_names
984 * remote node sends back the rsb names it is master of and we are dir of
985 * . dlm_master_lookup RECOVER_DIR (fix_master 0, from_master 1)
986 * we either create new rsb setting remote node as master, or find existing
987 * rsb and set master to be the remote node.
988 *
989 * recover masters, we are finding the new master for resources
990 * . dlm_recover_masters
991 * . recover_master
992 * . dlm_send_rcom_lookup
993 * . receive_rcom_lookup
994 * . dlm_master_lookup RECOVER_MASTER (fix_master 1, from_master 0)
e7fd4179
DT
995 */
996
c04fecb4
DT
997int dlm_master_lookup(struct dlm_ls *ls, int from_nodeid, char *name, int len,
998 unsigned int flags, int *r_nodeid, int *result)
e7fd4179 999{
3881ac04 1000 struct dlm_rsb *r = NULL;
c04fecb4 1001 uint32_t hash, b;
c04fecb4 1002 int our_nodeid = dlm_our_nodeid();
40159748 1003 int dir_nodeid, error;
ef58bcca 1004
c04fecb4
DT
1005 if (len > DLM_RESNAME_MAXLEN)
1006 return -EINVAL;
1007
1008 if (from_nodeid == our_nodeid) {
1009 log_error(ls, "dlm_master_lookup from our_nodeid %d flags %x",
1010 our_nodeid, flags);
1011 return -EINVAL;
3881ac04 1012 }
e7fd4179 1013
c04fecb4
DT
1014 hash = jhash(name, len, 0);
1015 b = hash & (ls->ls_rsbtbl_size - 1);
e7fd4179 1016
c04fecb4
DT
1017 dir_nodeid = dlm_hash2nodeid(ls, hash);
1018 if (dir_nodeid != our_nodeid) {
1019 log_error(ls, "dlm_master_lookup from %d dir %d our %d h %x %d",
1020 from_nodeid, dir_nodeid, our_nodeid, hash,
1021 ls->ls_num_nodes);
1022 *r_nodeid = -1;
1023 return -EINVAL;
1024 }
e7fd4179 1025
3881ac04 1026 retry:
c04fecb4
DT
1027 error = pre_rsb_struct(ls);
1028 if (error < 0)
1029 return error;
1030
1031 spin_lock(&ls->ls_rsbtbl[b].lock);
1032 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
1033 if (!error) {
1034 /* because the rsb is active, we need to lock_rsb before
40159748
AA
1035 * checking/changing re_master_nodeid
1036 */
c04fecb4
DT
1037
1038 hold_rsb(r);
1039 spin_unlock(&ls->ls_rsbtbl[b].lock);
1040 lock_rsb(r);
e7fd4179 1041
40159748
AA
1042 __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, false,
1043 flags, r_nodeid, result);
c04fecb4 1044
40159748
AA
1045 /* the rsb was active */
1046 unlock_rsb(r);
1047 put_rsb(r);
c04fecb4 1048
40159748 1049 return 0;
c04fecb4
DT
1050 }
1051
40159748
AA
1052 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1053 if (error)
1054 goto not_found;
c04fecb4 1055
40159748
AA
1056 /* because the rsb is inactive (on toss list), it's not refcounted
1057 * and lock_rsb is not used, but is protected by the rsbtbl lock
1058 */
c04fecb4 1059
40159748
AA
1060 __dlm_master_lookup(ls, r, our_nodeid, from_nodeid, true, flags,
1061 r_nodeid, result);
c04fecb4 1062
40159748
AA
1063 r->res_toss_time = jiffies;
1064 /* the rsb was inactive (on toss list) */
1065 spin_unlock(&ls->ls_rsbtbl[b].lock);
c04fecb4 1066
c04fecb4 1067 return 0;
e7fd4179 1068
c04fecb4
DT
1069 not_found:
1070 error = get_rsb_struct(ls, name, len, &r);
3881ac04 1071 if (error == -EAGAIN) {
c04fecb4 1072 spin_unlock(&ls->ls_rsbtbl[b].lock);
3881ac04
DT
1073 goto retry;
1074 }
1075 if (error)
1076 goto out_unlock;
e7fd4179
DT
1077
1078 r->res_hash = hash;
c04fecb4
DT
1079 r->res_bucket = b;
1080 r->res_dir_nodeid = our_nodeid;
1081 r->res_master_nodeid = from_nodeid;
1082 r->res_nodeid = from_nodeid;
e7fd4179 1083 kref_init(&r->res_ref);
c04fecb4 1084 r->res_toss_time = jiffies;
e7fd4179 1085
c04fecb4
DT
1086 error = rsb_insert(r, &ls->ls_rsbtbl[b].toss);
1087 if (error) {
1088 /* should never happen */
1089 dlm_free_rsb(r);
1090 spin_unlock(&ls->ls_rsbtbl[b].lock);
1091 goto retry;
e7fd4179 1092 }
c04fecb4
DT
1093
1094 if (result)
1095 *result = DLM_LU_ADD;
1096 *r_nodeid = from_nodeid;
3881ac04 1097 out_unlock:
c04fecb4 1098 spin_unlock(&ls->ls_rsbtbl[b].lock);
e7fd4179
DT
1099 return error;
1100}
1101
6d40c4a7
DT
1102static void dlm_dump_rsb_hash(struct dlm_ls *ls, uint32_t hash)
1103{
1104 struct rb_node *n;
1105 struct dlm_rsb *r;
1106 int i;
1107
1108 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
1109 spin_lock(&ls->ls_rsbtbl[i].lock);
1110 for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
1111 r = rb_entry(n, struct dlm_rsb, res_hashnode);
1112 if (r->res_hash == hash)
1113 dlm_dump_rsb(r);
1114 }
1115 spin_unlock(&ls->ls_rsbtbl[i].lock);
1116 }
1117}
1118
c04fecb4 1119void dlm_dump_rsb_name(struct dlm_ls *ls, char *name, int len)
e7fd4179 1120{
c04fecb4
DT
1121 struct dlm_rsb *r = NULL;
1122 uint32_t hash, b;
1123 int error;
e7fd4179 1124
c04fecb4
DT
1125 hash = jhash(name, len, 0);
1126 b = hash & (ls->ls_rsbtbl_size - 1);
1127
1128 spin_lock(&ls->ls_rsbtbl[b].lock);
1129 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
1130 if (!error)
1131 goto out_dump;
1132
1133 error = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1134 if (error)
1135 goto out;
1136 out_dump:
1137 dlm_dump_rsb(r);
1138 out:
1139 spin_unlock(&ls->ls_rsbtbl[b].lock);
e7fd4179
DT
1140}
1141
1142static void toss_rsb(struct kref *kref)
1143{
1144 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
1145 struct dlm_ls *ls = r->res_ls;
1146
1147 DLM_ASSERT(list_empty(&r->res_root_list), dlm_print_rsb(r););
1148 kref_init(&r->res_ref);
9beb3bf5
BP
1149 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[r->res_bucket].keep);
1150 rsb_insert(r, &ls->ls_rsbtbl[r->res_bucket].toss);
e7fd4179 1151 r->res_toss_time = jiffies;
f1172283 1152 ls->ls_rsbtbl[r->res_bucket].flags |= DLM_RTF_SHRINK;
e7fd4179 1153 if (r->res_lvbptr) {
52bda2b5 1154 dlm_free_lvb(r->res_lvbptr);
e7fd4179
DT
1155 r->res_lvbptr = NULL;
1156 }
1157}
1158
e7fd4179
DT
1159/* See comment for unhold_lkb */
1160
1161static void unhold_rsb(struct dlm_rsb *r)
1162{
1163 int rv;
1164 rv = kref_put(&r->res_ref, toss_rsb);
a345da3e 1165 DLM_ASSERT(!rv, dlm_dump_rsb(r););
e7fd4179
DT
1166}
1167
1168static void kill_rsb(struct kref *kref)
1169{
1170 struct dlm_rsb *r = container_of(kref, struct dlm_rsb, res_ref);
1171
1172 /* All work is done after the return from kref_put() so we
1173 can release the write_lock before the remove and free. */
1174
a345da3e
DT
1175 DLM_ASSERT(list_empty(&r->res_lookup), dlm_dump_rsb(r););
1176 DLM_ASSERT(list_empty(&r->res_grantqueue), dlm_dump_rsb(r););
1177 DLM_ASSERT(list_empty(&r->res_convertqueue), dlm_dump_rsb(r););
1178 DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r););
1179 DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r););
1180 DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r););
e7fd4179
DT
1181}
1182
1183/* Attaching/detaching lkb's from rsb's is for rsb reference counting.
1184 The rsb must exist as long as any lkb's for it do. */
1185
1186static void attach_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
1187{
1188 hold_rsb(r);
1189 lkb->lkb_resource = r;
1190}
1191
1192static void detach_lkb(struct dlm_lkb *lkb)
1193{
1194 if (lkb->lkb_resource) {
1195 put_rsb(lkb->lkb_resource);
1196 lkb->lkb_resource = NULL;
1197 }
1198}
1199
75d25ffe
AA
1200static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
1201 int start, int end)
e7fd4179 1202{
3d6aa675 1203 struct dlm_lkb *lkb;
2a86b3e7 1204 int rv;
e7fd4179 1205
52bda2b5 1206 lkb = dlm_allocate_lkb(ls);
e7fd4179
DT
1207 if (!lkb)
1208 return -ENOMEM;
1209
1210 lkb->lkb_nodeid = -1;
1211 lkb->lkb_grmode = DLM_LOCK_IV;
1212 kref_init(&lkb->lkb_ref);
34e22bed 1213 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
ef0c2bb0 1214 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
6b0afc0c 1215#ifdef CONFIG_DLM_DEPRECATED_API
3ae1acf9 1216 INIT_LIST_HEAD(&lkb->lkb_time_list);
6b0afc0c 1217#endif
23e8e1aa
DT
1218 INIT_LIST_HEAD(&lkb->lkb_cb_list);
1219 mutex_init(&lkb->lkb_cb_mutex);
1220 INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
e7fd4179 1221
2a86b3e7 1222 idr_preload(GFP_NOFS);
3d6aa675 1223 spin_lock(&ls->ls_lkbidr_spin);
75d25ffe 1224 rv = idr_alloc(&ls->ls_lkbidr, lkb, start, end, GFP_NOWAIT);
2a86b3e7
TH
1225 if (rv >= 0)
1226 lkb->lkb_id = rv;
3d6aa675 1227 spin_unlock(&ls->ls_lkbidr_spin);
2a86b3e7 1228 idr_preload_end();
e7fd4179 1229
3d6aa675
DT
1230 if (rv < 0) {
1231 log_error(ls, "create_lkb idr error %d", rv);
23851e97 1232 dlm_free_lkb(lkb);
3d6aa675 1233 return rv;
e7fd4179
DT
1234 }
1235
e7fd4179
DT
1236 *lkb_ret = lkb;
1237 return 0;
1238}
1239
75d25ffe
AA
1240static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
1241{
1242 return _create_lkb(ls, lkb_ret, 1, 0);
1243}
1244
e7fd4179
DT
1245static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
1246{
1247 struct dlm_lkb *lkb;
e7fd4179 1248
3d6aa675
DT
1249 spin_lock(&ls->ls_lkbidr_spin);
1250 lkb = idr_find(&ls->ls_lkbidr, lkid);
e7fd4179
DT
1251 if (lkb)
1252 kref_get(&lkb->lkb_ref);
3d6aa675 1253 spin_unlock(&ls->ls_lkbidr_spin);
e7fd4179
DT
1254
1255 *lkb_ret = lkb;
1256 return lkb ? 0 : -ENOENT;
1257}
1258
1259static void kill_lkb(struct kref *kref)
1260{
1261 struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
1262
1263 /* All work is done after the return from kref_put() so we
1264 can release the write_lock before the detach_lkb */
1265
1266 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
1267}
1268
b3f58d8f
DT
1269/* __put_lkb() is used when an lkb may not have an rsb attached to
1270 it so we need to provide the lockspace explicitly */
1271
1272static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
e7fd4179 1273{
3d6aa675 1274 uint32_t lkid = lkb->lkb_id;
8e51ec61 1275 int rv;
e7fd4179 1276
8e51ec61
AA
1277 rv = kref_put_lock(&lkb->lkb_ref, kill_lkb,
1278 &ls->ls_lkbidr_spin);
1279 if (rv) {
3d6aa675
DT
1280 idr_remove(&ls->ls_lkbidr, lkid);
1281 spin_unlock(&ls->ls_lkbidr_spin);
e7fd4179
DT
1282
1283 detach_lkb(lkb);
1284
1285 /* for local/process lkbs, lvbptr points to caller's lksb */
1286 if (lkb->lkb_lvbptr && is_master_copy(lkb))
52bda2b5
DT
1287 dlm_free_lvb(lkb->lkb_lvbptr);
1288 dlm_free_lkb(lkb);
e7fd4179 1289 }
8e51ec61
AA
1290
1291 return rv;
e7fd4179
DT
1292}
1293
1294int dlm_put_lkb(struct dlm_lkb *lkb)
1295{
b3f58d8f
DT
1296 struct dlm_ls *ls;
1297
1298 DLM_ASSERT(lkb->lkb_resource, dlm_print_lkb(lkb););
1299 DLM_ASSERT(lkb->lkb_resource->res_ls, dlm_print_lkb(lkb););
1300
1301 ls = lkb->lkb_resource->res_ls;
1302 return __put_lkb(ls, lkb);
e7fd4179
DT
1303}
1304
1305/* This is only called to add a reference when the code already holds
1306 a valid reference to the lkb, so there's no need for locking. */
1307
1308static inline void hold_lkb(struct dlm_lkb *lkb)
1309{
1310 kref_get(&lkb->lkb_ref);
1311}
1312
95858989
AA
1313static void unhold_lkb_assert(struct kref *kref)
1314{
1315 struct dlm_lkb *lkb = container_of(kref, struct dlm_lkb, lkb_ref);
1316
1317 DLM_ASSERT(false, dlm_print_lkb(lkb););
1318}
1319
e7fd4179
DT
1320/* This is called when we need to remove a reference and are certain
1321 it's not the last ref. e.g. del_lkb is always called between a
1322 find_lkb/put_lkb and is always the inverse of a previous add_lkb.
1323 put_lkb would work fine, but would involve unnecessary locking */
1324
1325static inline void unhold_lkb(struct dlm_lkb *lkb)
1326{
95858989 1327 kref_put(&lkb->lkb_ref, unhold_lkb_assert);
e7fd4179
DT
1328}
1329
1330static void lkb_add_ordered(struct list_head *new, struct list_head *head,
1331 int mode)
1332{
c490b3af 1333 struct dlm_lkb *lkb = NULL, *iter;
e7fd4179 1334
c490b3af
JK
1335 list_for_each_entry(iter, head, lkb_statequeue)
1336 if (iter->lkb_rqmode < mode) {
1337 lkb = iter;
1338 list_add_tail(new, &iter->lkb_statequeue);
e7fd4179 1339 break;
c490b3af 1340 }
e7fd4179 1341
c490b3af
JK
1342 if (!lkb)
1343 list_add_tail(new, head);
e7fd4179
DT
1344}
1345
1346/* add/remove lkb to rsb's grant/convert/wait queue */
1347
1348static void add_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int status)
1349{
1350 kref_get(&lkb->lkb_ref);
1351
1352 DLM_ASSERT(!lkb->lkb_status, dlm_print_lkb(lkb););
1353
eeda418d
DT
1354 lkb->lkb_timestamp = ktime_get();
1355
e7fd4179
DT
1356 lkb->lkb_status = status;
1357
1358 switch (status) {
1359 case DLM_LKSTS_WAITING:
1360 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
1361 list_add(&lkb->lkb_statequeue, &r->res_waitqueue);
1362 else
1363 list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue);
1364 break;
1365 case DLM_LKSTS_GRANTED:
1366 /* convention says granted locks kept in order of grmode */
1367 lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue,
1368 lkb->lkb_grmode);
1369 break;
1370 case DLM_LKSTS_CONVERT:
1371 if (lkb->lkb_exflags & DLM_LKF_HEADQUE)
1372 list_add(&lkb->lkb_statequeue, &r->res_convertqueue);
1373 else
1374 list_add_tail(&lkb->lkb_statequeue,
1375 &r->res_convertqueue);
1376 break;
1377 default:
1378 DLM_ASSERT(0, dlm_print_lkb(lkb); printk("sts=%d\n", status););
1379 }
1380}
1381
1382static void del_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb)
1383{
1384 lkb->lkb_status = 0;
1385 list_del(&lkb->lkb_statequeue);
1386 unhold_lkb(lkb);
1387}
1388
1389static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
1390{
1391 hold_lkb(lkb);
1392 del_lkb(r, lkb);
1393 add_lkb(r, lkb, sts);
1394 unhold_lkb(lkb);
1395}
1396
ef0c2bb0
DT
1397static int msg_reply_type(int mstype)
1398{
1399 switch (mstype) {
1400 case DLM_MSG_REQUEST:
1401 return DLM_MSG_REQUEST_REPLY;
1402 case DLM_MSG_CONVERT:
1403 return DLM_MSG_CONVERT_REPLY;
1404 case DLM_MSG_UNLOCK:
1405 return DLM_MSG_UNLOCK_REPLY;
1406 case DLM_MSG_CANCEL:
1407 return DLM_MSG_CANCEL_REPLY;
1408 case DLM_MSG_LOOKUP:
1409 return DLM_MSG_LOOKUP_REPLY;
1410 }
1411 return -1;
1412}
1413
e7fd4179
DT
1414/* add/remove lkb from global waiters list of lkb's waiting for
1415 a reply from a remote node */
1416
c6ff669b 1417static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
e7fd4179
DT
1418{
1419 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
ef0c2bb0 1420 int error = 0;
e7fd4179 1421
90135925 1422 mutex_lock(&ls->ls_waiters_mutex);
ef0c2bb0
DT
1423
1424 if (is_overlap_unlock(lkb) ||
1425 (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL))) {
1426 error = -EINVAL;
1427 goto out;
1428 }
1429
1430 if (lkb->lkb_wait_type || is_overlap_cancel(lkb)) {
1431 switch (mstype) {
1432 case DLM_MSG_UNLOCK:
1433 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
1434 break;
1435 case DLM_MSG_CANCEL:
1436 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
1437 break;
1438 default:
1439 error = -EBUSY;
1440 goto out;
1441 }
1442 lkb->lkb_wait_count++;
1443 hold_lkb(lkb);
1444
43279e53 1445 log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
ef0c2bb0
DT
1446 lkb->lkb_id, lkb->lkb_wait_type, mstype,
1447 lkb->lkb_wait_count, lkb->lkb_flags);
e7fd4179
DT
1448 goto out;
1449 }
ef0c2bb0
DT
1450
1451 DLM_ASSERT(!lkb->lkb_wait_count,
1452 dlm_print_lkb(lkb);
1453 printk("wait_count %d\n", lkb->lkb_wait_count););
1454
1455 lkb->lkb_wait_count++;
e7fd4179 1456 lkb->lkb_wait_type = mstype;
c6ff669b 1457 lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
ef0c2bb0 1458 hold_lkb(lkb);
e7fd4179
DT
1459 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
1460 out:
ef0c2bb0 1461 if (error)
43279e53 1462 log_error(ls, "addwait error %x %d flags %x %d %d %s",
ef0c2bb0
DT
1463 lkb->lkb_id, error, lkb->lkb_flags, mstype,
1464 lkb->lkb_wait_type, lkb->lkb_resource->res_name);
90135925 1465 mutex_unlock(&ls->ls_waiters_mutex);
ef0c2bb0 1466 return error;
e7fd4179
DT
1467}
1468
b790c3b7
DT
1469/* We clear the RESEND flag because we might be taking an lkb off the waiters
1470 list as part of process_requestqueue (e.g. a lookup that has an optimized
1471 request reply on the requestqueue) between dlm_recover_waiters_pre() which
1472 set RESEND and dlm_recover_waiters_post() */
1473
43279e53
DT
1474static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
1475 struct dlm_message *ms)
e7fd4179 1476{
ef0c2bb0
DT
1477 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1478 int overlap_done = 0;
e7fd4179 1479
ef0c2bb0 1480 if (is_overlap_unlock(lkb) && (mstype == DLM_MSG_UNLOCK_REPLY)) {
43279e53 1481 log_debug(ls, "remwait %x unlock_reply overlap", lkb->lkb_id);
ef0c2bb0
DT
1482 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
1483 overlap_done = 1;
1484 goto out_del;
e7fd4179 1485 }
ef0c2bb0
DT
1486
1487 if (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL_REPLY)) {
43279e53 1488 log_debug(ls, "remwait %x cancel_reply overlap", lkb->lkb_id);
ef0c2bb0
DT
1489 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
1490 overlap_done = 1;
1491 goto out_del;
1492 }
1493
43279e53
DT
1494 /* Cancel state was preemptively cleared by a successful convert,
1495 see next comment, nothing to do. */
1496
1497 if ((mstype == DLM_MSG_CANCEL_REPLY) &&
1498 (lkb->lkb_wait_type != DLM_MSG_CANCEL)) {
1499 log_debug(ls, "remwait %x cancel_reply wait_type %d",
1500 lkb->lkb_id, lkb->lkb_wait_type);
1501 return -1;
1502 }
1503
1504 /* Remove for the convert reply, and premptively remove for the
1505 cancel reply. A convert has been granted while there's still
1506 an outstanding cancel on it (the cancel is moot and the result
1507 in the cancel reply should be 0). We preempt the cancel reply
1508 because the app gets the convert result and then can follow up
1509 with another op, like convert. This subsequent op would see the
1510 lingering state of the cancel and fail with -EBUSY. */
1511
1512 if ((mstype == DLM_MSG_CONVERT_REPLY) &&
1513 (lkb->lkb_wait_type == DLM_MSG_CONVERT) &&
1514 is_overlap_cancel(lkb) && ms && !ms->m_result) {
1515 log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
1516 lkb->lkb_id);
1517 lkb->lkb_wait_type = 0;
1518 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
1519 lkb->lkb_wait_count--;
1689c169 1520 unhold_lkb(lkb);
43279e53
DT
1521 goto out_del;
1522 }
1523
ef0c2bb0
DT
1524 /* N.B. type of reply may not always correspond to type of original
1525 msg due to lookup->request optimization, verify others? */
1526
1527 if (lkb->lkb_wait_type) {
1528 lkb->lkb_wait_type = 0;
1529 goto out_del;
1530 }
1531
6d40c4a7 1532 log_error(ls, "remwait error %x remote %d %x msg %d flags %x no wait",
3428785a
AA
1533 lkb->lkb_id, ms ? le32_to_cpu(ms->m_header.h_nodeid) : 0,
1534 lkb->lkb_remid, mstype, lkb->lkb_flags);
ef0c2bb0
DT
1535 return -1;
1536
1537 out_del:
1538 /* the force-unlock/cancel has completed and we haven't recvd a reply
1539 to the op that was in progress prior to the unlock/cancel; we
1540 give up on any reply to the earlier op. FIXME: not sure when/how
1541 this would happen */
1542
1543 if (overlap_done && lkb->lkb_wait_type) {
43279e53 1544 log_error(ls, "remwait error %x reply %d wait_type %d overlap",
ef0c2bb0
DT
1545 lkb->lkb_id, mstype, lkb->lkb_wait_type);
1546 lkb->lkb_wait_count--;
1689c169 1547 unhold_lkb(lkb);
ef0c2bb0
DT
1548 lkb->lkb_wait_type = 0;
1549 }
1550
1551 DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
1552
b790c3b7 1553 lkb->lkb_flags &= ~DLM_IFL_RESEND;
ef0c2bb0
DT
1554 lkb->lkb_wait_count--;
1555 if (!lkb->lkb_wait_count)
1556 list_del_init(&lkb->lkb_wait_reply);
e7fd4179 1557 unhold_lkb(lkb);
ef0c2bb0 1558 return 0;
e7fd4179
DT
1559}
1560
ef0c2bb0 1561static int remove_from_waiters(struct dlm_lkb *lkb, int mstype)
e7fd4179
DT
1562{
1563 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1564 int error;
1565
90135925 1566 mutex_lock(&ls->ls_waiters_mutex);
43279e53 1567 error = _remove_from_waiters(lkb, mstype, NULL);
90135925 1568 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179
DT
1569 return error;
1570}
1571
ef0c2bb0
DT
1572/* Handles situations where we might be processing a "fake" or "stub" reply in
1573 which we can't try to take waiters_mutex again. */
1574
1575static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
1576{
1577 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1578 int error;
1579
00e99ccd 1580 if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS))
ef0c2bb0 1581 mutex_lock(&ls->ls_waiters_mutex);
00e99ccd
AA
1582 error = _remove_from_waiters(lkb, le32_to_cpu(ms->m_type), ms);
1583 if (ms->m_flags != cpu_to_le32(DLM_IFL_STUB_MS))
ef0c2bb0
DT
1584 mutex_unlock(&ls->ls_waiters_mutex);
1585 return error;
1586}
1587
05c32f47 1588/* If there's an rsb for the same resource being removed, ensure
21d9ac1a
AA
1589 * that the remove message is sent before the new lookup message.
1590 */
1591
1592#define DLM_WAIT_PENDING_COND(ls, r) \
1593 (ls->ls_remove_len && \
1594 !rsb_cmp(r, ls->ls_remove_name, \
1595 ls->ls_remove_len))
e7fd4179 1596
05c32f47 1597static void wait_pending_remove(struct dlm_rsb *r)
e7fd4179 1598{
05c32f47
DT
1599 struct dlm_ls *ls = r->res_ls;
1600 restart:
1601 spin_lock(&ls->ls_remove_spin);
21d9ac1a 1602 if (DLM_WAIT_PENDING_COND(ls, r)) {
05c32f47 1603 log_debug(ls, "delay lookup for remove dir %d %s",
21d9ac1a 1604 r->res_dir_nodeid, r->res_name);
05c32f47 1605 spin_unlock(&ls->ls_remove_spin);
21d9ac1a 1606 wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r));
05c32f47
DT
1607 goto restart;
1608 }
1609 spin_unlock(&ls->ls_remove_spin);
1610}
e7fd4179 1611
05c32f47
DT
1612/*
1613 * ls_remove_spin protects ls_remove_name and ls_remove_len which are
1614 * read by other threads in wait_pending_remove. ls_remove_names
1615 * and ls_remove_lens are only used by the scan thread, so they do
1616 * not need protection.
1617 */
c04fecb4 1618
05c32f47
DT
1619static void shrink_bucket(struct dlm_ls *ls, int b)
1620{
1621 struct rb_node *n, *next;
1622 struct dlm_rsb *r;
1623 char *name;
1624 int our_nodeid = dlm_our_nodeid();
1625 int remote_count = 0;
f1172283 1626 int need_shrink = 0;
05c32f47 1627 int i, len, rv;
c04fecb4 1628
05c32f47 1629 memset(&ls->ls_remove_lens, 0, sizeof(int) * DLM_REMOVE_NAMES_MAX);
c04fecb4 1630
05c32f47 1631 spin_lock(&ls->ls_rsbtbl[b].lock);
f1172283
DT
1632
1633 if (!(ls->ls_rsbtbl[b].flags & DLM_RTF_SHRINK)) {
1634 spin_unlock(&ls->ls_rsbtbl[b].lock);
1635 return;
1636 }
1637
05c32f47
DT
1638 for (n = rb_first(&ls->ls_rsbtbl[b].toss); n; n = next) {
1639 next = rb_next(n);
1640 r = rb_entry(n, struct dlm_rsb, res_hashnode);
1641
1642 /* If we're the directory record for this rsb, and
1643 we're not the master of it, then we need to wait
1644 for the master node to send us a dir remove for
1645 before removing the dir record. */
1646
1647 if (!dlm_no_directory(ls) &&
1648 (r->res_master_nodeid != our_nodeid) &&
1649 (dlm_dir_nodeid(r) == our_nodeid)) {
1650 continue;
e7fd4179
DT
1651 }
1652
f1172283
DT
1653 need_shrink = 1;
1654
05c32f47
DT
1655 if (!time_after_eq(jiffies, r->res_toss_time +
1656 dlm_config.ci_toss_secs * HZ)) {
1657 continue;
e7fd4179
DT
1658 }
1659
05c32f47
DT
1660 if (!dlm_no_directory(ls) &&
1661 (r->res_master_nodeid == our_nodeid) &&
1662 (dlm_dir_nodeid(r) != our_nodeid)) {
e7fd4179 1663
c04fecb4
DT
1664 /* We're the master of this rsb but we're not
1665 the directory record, so we need to tell the
1666 dir node to remove the dir record. */
1667
05c32f47
DT
1668 ls->ls_remove_lens[remote_count] = r->res_length;
1669 memcpy(ls->ls_remove_names[remote_count], r->res_name,
1670 DLM_RESNAME_MAXLEN);
1671 remote_count++;
c04fecb4 1672
05c32f47
DT
1673 if (remote_count >= DLM_REMOVE_NAMES_MAX)
1674 break;
1675 continue;
1676 }
1677
1678 if (!kref_put(&r->res_ref, kill_rsb)) {
e7fd4179 1679 log_error(ls, "tossed rsb in use %s", r->res_name);
05c32f47 1680 continue;
e7fd4179 1681 }
05c32f47
DT
1682
1683 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1684 dlm_free_rsb(r);
e7fd4179 1685 }
f1172283
DT
1686
1687 if (need_shrink)
1688 ls->ls_rsbtbl[b].flags |= DLM_RTF_SHRINK;
1689 else
1690 ls->ls_rsbtbl[b].flags &= ~DLM_RTF_SHRINK;
05c32f47 1691 spin_unlock(&ls->ls_rsbtbl[b].lock);
e7fd4179 1692
05c32f47
DT
1693 /*
1694 * While searching for rsb's to free, we found some that require
1695 * remote removal. We leave them in place and find them again here
1696 * so there is a very small gap between removing them from the toss
1697 * list and sending the removal. Keeping this gap small is
1698 * important to keep us (the master node) from being out of sync
1699 * with the remote dir node for very long.
1700 *
1701 * From the time the rsb is removed from toss until just after
1702 * send_remove, the rsb name is saved in ls_remove_name. A new
1703 * lookup checks this to ensure that a new lookup message for the
1704 * same resource name is not sent just before the remove message.
1705 */
1706
1707 for (i = 0; i < remote_count; i++) {
1708 name = ls->ls_remove_names[i];
1709 len = ls->ls_remove_lens[i];
1710
1711 spin_lock(&ls->ls_rsbtbl[b].lock);
1712 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
1713 if (rv) {
1714 spin_unlock(&ls->ls_rsbtbl[b].lock);
1715 log_debug(ls, "remove_name not toss %s", name);
1716 continue;
1717 }
1718
1719 if (r->res_master_nodeid != our_nodeid) {
1720 spin_unlock(&ls->ls_rsbtbl[b].lock);
1721 log_debug(ls, "remove_name master %d dir %d our %d %s",
1722 r->res_master_nodeid, r->res_dir_nodeid,
1723 our_nodeid, name);
1724 continue;
1725 }
1726
1727 if (r->res_dir_nodeid == our_nodeid) {
1728 /* should never happen */
1729 spin_unlock(&ls->ls_rsbtbl[b].lock);
1730 log_error(ls, "remove_name dir %d master %d our %d %s",
1731 r->res_dir_nodeid, r->res_master_nodeid,
1732 our_nodeid, name);
1733 continue;
1734 }
1735
1736 if (!time_after_eq(jiffies, r->res_toss_time +
1737 dlm_config.ci_toss_secs * HZ)) {
1738 spin_unlock(&ls->ls_rsbtbl[b].lock);
1739 log_debug(ls, "remove_name toss_time %lu now %lu %s",
1740 r->res_toss_time, jiffies, name);
1741 continue;
1742 }
1743
1744 if (!kref_put(&r->res_ref, kill_rsb)) {
1745 spin_unlock(&ls->ls_rsbtbl[b].lock);
1746 log_error(ls, "remove_name in use %s", name);
1747 continue;
1748 }
1749
1750 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
1751
1752 /* block lookup of same name until we've sent remove */
1753 spin_lock(&ls->ls_remove_spin);
1754 ls->ls_remove_len = len;
1755 memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
1756 spin_unlock(&ls->ls_remove_spin);
1757 spin_unlock(&ls->ls_rsbtbl[b].lock);
1758
1759 send_remove(r);
1760
1761 /* allow lookup of name again */
1762 spin_lock(&ls->ls_remove_spin);
1763 ls->ls_remove_len = 0;
1764 memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
1765 spin_unlock(&ls->ls_remove_spin);
f6f74183 1766 wake_up(&ls->ls_remove_wait);
05c32f47
DT
1767
1768 dlm_free_rsb(r);
1769 }
e7fd4179
DT
1770}
1771
1772void dlm_scan_rsbs(struct dlm_ls *ls)
1773{
1774 int i;
1775
e7fd4179
DT
1776 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
1777 shrink_bucket(ls, i);
85e86edf
DT
1778 if (dlm_locking_stopped(ls))
1779 break;
e7fd4179
DT
1780 cond_resched();
1781 }
1782}
1783
6b0afc0c 1784#ifdef CONFIG_DLM_DEPRECATED_API
3ae1acf9
DT
1785static void add_timeout(struct dlm_lkb *lkb)
1786{
1787 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1788
eeda418d 1789 if (is_master_copy(lkb))
3ae1acf9 1790 return;
3ae1acf9
DT
1791
1792 if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
1793 !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
1794 lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
1795 goto add_it;
1796 }
84d8cd69
DT
1797 if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
1798 goto add_it;
3ae1acf9
DT
1799 return;
1800
1801 add_it:
1802 DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
1803 mutex_lock(&ls->ls_timeout_mutex);
1804 hold_lkb(lkb);
3ae1acf9
DT
1805 list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
1806 mutex_unlock(&ls->ls_timeout_mutex);
1807}
1808
1809static void del_timeout(struct dlm_lkb *lkb)
1810{
1811 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
1812
1813 mutex_lock(&ls->ls_timeout_mutex);
1814 if (!list_empty(&lkb->lkb_time_list)) {
1815 list_del_init(&lkb->lkb_time_list);
1816 unhold_lkb(lkb);
1817 }
1818 mutex_unlock(&ls->ls_timeout_mutex);
1819}
1820
1821/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
1822 lkb_lksb_timeout without lock_rsb? Note: we can't lock timeout_mutex
1823 and then lock rsb because of lock ordering in add_timeout. We may need
1824 to specify some special timeout-related bits in the lkb that are just to
1825 be accessed under the timeout_mutex. */
1826
1827void dlm_scan_timeout(struct dlm_ls *ls)
1828{
1829 struct dlm_rsb *r;
dc1acd5c 1830 struct dlm_lkb *lkb = NULL, *iter;
3ae1acf9 1831 int do_cancel, do_warn;
eeda418d 1832 s64 wait_us;
3ae1acf9
DT
1833
1834 for (;;) {
1835 if (dlm_locking_stopped(ls))
1836 break;
1837
1838 do_cancel = 0;
1839 do_warn = 0;
1840 mutex_lock(&ls->ls_timeout_mutex);
dc1acd5c 1841 list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) {
3ae1acf9 1842
eeda418d 1843 wait_us = ktime_to_us(ktime_sub(ktime_get(),
dc1acd5c 1844 iter->lkb_timestamp));
eeda418d 1845
dc1acd5c
JK
1846 if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) &&
1847 wait_us >= (iter->lkb_timeout_cs * 10000))
3ae1acf9
DT
1848 do_cancel = 1;
1849
dc1acd5c 1850 if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
eeda418d 1851 wait_us >= dlm_config.ci_timewarn_cs * 10000)
3ae1acf9
DT
1852 do_warn = 1;
1853
1854 if (!do_cancel && !do_warn)
1855 continue;
dc1acd5c
JK
1856 hold_lkb(iter);
1857 lkb = iter;
3ae1acf9
DT
1858 break;
1859 }
1860 mutex_unlock(&ls->ls_timeout_mutex);
1861
dc1acd5c 1862 if (!lkb)
3ae1acf9
DT
1863 break;
1864
1865 r = lkb->lkb_resource;
1866 hold_rsb(r);
1867 lock_rsb(r);
1868
1869 if (do_warn) {
1870 /* clear flag so we only warn once */
1871 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1872 if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
1873 del_timeout(lkb);
1874 dlm_timeout_warn(lkb);
1875 }
1876
1877 if (do_cancel) {
b3cab7b9 1878 log_debug(ls, "timeout cancel %x node %d %s",
639aca41 1879 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
3ae1acf9
DT
1880 lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
1881 lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
1882 del_timeout(lkb);
1883 _cancel_lock(r, lkb);
1884 }
1885
1886 unlock_rsb(r);
1887 unhold_rsb(r);
1888 dlm_put_lkb(lkb);
1889 }
1890}
1891
1892/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
1893 dlm_recoverd before checking/setting ls_recover_begin. */
1894
1895void dlm_adjust_timeouts(struct dlm_ls *ls)
1896{
1897 struct dlm_lkb *lkb;
eeda418d 1898 u64 adj_us = jiffies_to_usecs(jiffies - ls->ls_recover_begin);
3ae1acf9
DT
1899
1900 ls->ls_recover_begin = 0;
1901 mutex_lock(&ls->ls_timeout_mutex);
1902 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
eeda418d 1903 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
3ae1acf9
DT
1904 mutex_unlock(&ls->ls_timeout_mutex);
1905}
6b0afc0c
AA
1906#else
1907static void add_timeout(struct dlm_lkb *lkb) { }
1908static void del_timeout(struct dlm_lkb *lkb) { }
1909#endif
3ae1acf9 1910
e7fd4179
DT
1911/* lkb is master or local copy */
1912
1913static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1914{
1915 int b, len = r->res_ls->ls_lvblen;
1916
1917 /* b=1 lvb returned to caller
1918 b=0 lvb written to rsb or invalidated
1919 b=-1 do nothing */
1920
1921 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
1922
1923 if (b == 1) {
1924 if (!lkb->lkb_lvbptr)
1925 return;
1926
1927 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
1928 return;
1929
1930 if (!r->res_lvbptr)
1931 return;
1932
1933 memcpy(lkb->lkb_lvbptr, r->res_lvbptr, len);
1934 lkb->lkb_lvbseq = r->res_lvbseq;
1935
1936 } else if (b == 0) {
1937 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
1938 rsb_set_flag(r, RSB_VALNOTVALID);
1939 return;
1940 }
1941
1942 if (!lkb->lkb_lvbptr)
1943 return;
1944
1945 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
1946 return;
1947
1948 if (!r->res_lvbptr)
52bda2b5 1949 r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
e7fd4179
DT
1950
1951 if (!r->res_lvbptr)
1952 return;
1953
1954 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, len);
1955 r->res_lvbseq++;
1956 lkb->lkb_lvbseq = r->res_lvbseq;
1957 rsb_clear_flag(r, RSB_VALNOTVALID);
1958 }
1959
1960 if (rsb_flag(r, RSB_VALNOTVALID))
1961 lkb->lkb_sbflags |= DLM_SBF_VALNOTVALID;
1962}
1963
1964static void set_lvb_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
1965{
1966 if (lkb->lkb_grmode < DLM_LOCK_PW)
1967 return;
1968
1969 if (lkb->lkb_exflags & DLM_LKF_IVVALBLK) {
1970 rsb_set_flag(r, RSB_VALNOTVALID);
1971 return;
1972 }
1973
1974 if (!lkb->lkb_lvbptr)
1975 return;
1976
1977 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
1978 return;
1979
1980 if (!r->res_lvbptr)
52bda2b5 1981 r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
e7fd4179
DT
1982
1983 if (!r->res_lvbptr)
1984 return;
1985
1986 memcpy(r->res_lvbptr, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
1987 r->res_lvbseq++;
1988 rsb_clear_flag(r, RSB_VALNOTVALID);
1989}
1990
1991/* lkb is process copy (pc) */
1992
1993static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
1994 struct dlm_message *ms)
1995{
1996 int b;
1997
1998 if (!lkb->lkb_lvbptr)
1999 return;
2000
2001 if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
2002 return;
2003
597d0cae 2004 b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
e7fd4179
DT
2005 if (b == 1) {
2006 int len = receive_extralen(ms);
cfa805f6
BVA
2007 if (len > r->res_ls->ls_lvblen)
2008 len = r->res_ls->ls_lvblen;
e7fd4179 2009 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
00e99ccd 2010 lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq);
e7fd4179
DT
2011 }
2012}
2013
2014/* Manipulate lkb's on rsb's convert/granted/waiting queues
2015 remove_lock -- used for unlock, removes lkb from granted
2016 revert_lock -- used for cancel, moves lkb from convert to granted
2017 grant_lock -- used for request and convert, adds lkb to granted or
2018 moves lkb from convert or waiting to granted
2019
2020 Each of these is used for master or local copy lkb's. There is
2021 also a _pc() variation used to make the corresponding change on
2022 a process copy (pc) lkb. */
2023
2024static void _remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2025{
2026 del_lkb(r, lkb);
2027 lkb->lkb_grmode = DLM_LOCK_IV;
2028 /* this unhold undoes the original ref from create_lkb()
2029 so this leads to the lkb being freed */
2030 unhold_lkb(lkb);
2031}
2032
2033static void remove_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2034{
2035 set_lvb_unlock(r, lkb);
2036 _remove_lock(r, lkb);
2037}
2038
2039static void remove_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
2040{
2041 _remove_lock(r, lkb);
2042}
2043
ef0c2bb0
DT
2044/* returns: 0 did nothing
2045 1 moved lock to granted
2046 -1 removed lock */
2047
2048static int revert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
e7fd4179 2049{
ef0c2bb0
DT
2050 int rv = 0;
2051
e7fd4179
DT
2052 lkb->lkb_rqmode = DLM_LOCK_IV;
2053
2054 switch (lkb->lkb_status) {
597d0cae
DT
2055 case DLM_LKSTS_GRANTED:
2056 break;
e7fd4179
DT
2057 case DLM_LKSTS_CONVERT:
2058 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
ef0c2bb0 2059 rv = 1;
e7fd4179
DT
2060 break;
2061 case DLM_LKSTS_WAITING:
2062 del_lkb(r, lkb);
2063 lkb->lkb_grmode = DLM_LOCK_IV;
2064 /* this unhold undoes the original ref from create_lkb()
2065 so this leads to the lkb being freed */
2066 unhold_lkb(lkb);
ef0c2bb0 2067 rv = -1;
e7fd4179
DT
2068 break;
2069 default:
2070 log_print("invalid status for revert %d", lkb->lkb_status);
2071 }
ef0c2bb0 2072 return rv;
e7fd4179
DT
2073}
2074
ef0c2bb0 2075static int revert_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb)
e7fd4179 2076{
ef0c2bb0 2077 return revert_lock(r, lkb);
e7fd4179
DT
2078}
2079
2080static void _grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2081{
2082 if (lkb->lkb_grmode != lkb->lkb_rqmode) {
2083 lkb->lkb_grmode = lkb->lkb_rqmode;
2084 if (lkb->lkb_status)
2085 move_lkb(r, lkb, DLM_LKSTS_GRANTED);
2086 else
2087 add_lkb(r, lkb, DLM_LKSTS_GRANTED);
2088 }
2089
2090 lkb->lkb_rqmode = DLM_LOCK_IV;
4875647a 2091 lkb->lkb_highbast = 0;
e7fd4179
DT
2092}
2093
2094static void grant_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
2095{
2096 set_lvb_lock(r, lkb);
2097 _grant_lock(r, lkb);
e7fd4179
DT
2098}
2099
2100static void grant_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb,
2101 struct dlm_message *ms)
2102{
2103 set_lvb_lock_pc(r, lkb, ms);
2104 _grant_lock(r, lkb);
2105}
2106
2107/* called by grant_pending_locks() which means an async grant message must
2108 be sent to the requesting node in addition to granting the lock if the
2109 lkb belongs to a remote node. */
2110
2111static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
2112{
2113 grant_lock(r, lkb);
2114 if (is_master_copy(lkb))
2115 send_grant(r, lkb);
2116 else
2117 queue_cast(r, lkb, 0);
2118}
2119
7d3c1feb
DT
2120/* The special CONVDEADLK, ALTPR and ALTCW flags allow the master to
2121 change the granted/requested modes. We're munging things accordingly in
2122 the process copy.
2123 CONVDEADLK: our grmode may have been forced down to NL to resolve a
2124 conversion deadlock
2125 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
2126 compatible with other granted locks */
2127
2a7ce0ed 2128static void munge_demoted(struct dlm_lkb *lkb)
7d3c1feb 2129{
7d3c1feb
DT
2130 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
2131 log_print("munge_demoted %x invalid modes gr %d rq %d",
2132 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
2133 return;
2134 }
2135
2136 lkb->lkb_grmode = DLM_LOCK_NL;
2137}
2138
2139static void munge_altmode(struct dlm_lkb *lkb, struct dlm_message *ms)
2140{
00e99ccd
AA
2141 if (ms->m_type != cpu_to_le32(DLM_MSG_REQUEST_REPLY) &&
2142 ms->m_type != cpu_to_le32(DLM_MSG_GRANT)) {
7d3c1feb 2143 log_print("munge_altmode %x invalid reply type %d",
00e99ccd 2144 lkb->lkb_id, le32_to_cpu(ms->m_type));
7d3c1feb
DT
2145 return;
2146 }
2147
2148 if (lkb->lkb_exflags & DLM_LKF_ALTPR)
2149 lkb->lkb_rqmode = DLM_LOCK_PR;
2150 else if (lkb->lkb_exflags & DLM_LKF_ALTCW)
2151 lkb->lkb_rqmode = DLM_LOCK_CW;
2152 else {
2153 log_print("munge_altmode invalid exflags %x", lkb->lkb_exflags);
2154 dlm_print_lkb(lkb);
2155 }
2156}
2157
e7fd4179
DT
2158static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
2159{
2160 struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb,
2161 lkb_statequeue);
2162 if (lkb->lkb_id == first->lkb_id)
90135925 2163 return 1;
e7fd4179 2164
90135925 2165 return 0;
e7fd4179
DT
2166}
2167
e7fd4179
DT
2168/* Check if the given lkb conflicts with another lkb on the queue. */
2169
2170static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
2171{
2172 struct dlm_lkb *this;
2173
2174 list_for_each_entry(this, head, lkb_statequeue) {
2175 if (this == lkb)
2176 continue;
3bcd3687 2177 if (!modes_compat(this, lkb))
90135925 2178 return 1;
e7fd4179 2179 }
90135925 2180 return 0;
e7fd4179
DT
2181}
2182
2183/*
2184 * "A conversion deadlock arises with a pair of lock requests in the converting
2185 * queue for one resource. The granted mode of each lock blocks the requested
2186 * mode of the other lock."
2187 *
c85d65e9
DT
2188 * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
2189 * convert queue from being granted, then deadlk/demote lkb.
e7fd4179
DT
2190 *
2191 * Example:
2192 * Granted Queue: empty
2193 * Convert Queue: NL->EX (first lock)
2194 * PR->EX (second lock)
2195 *
2196 * The first lock can't be granted because of the granted mode of the second
2197 * lock and the second lock can't be granted because it's not first in the
c85d65e9
DT
2198 * list. We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
2199 * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
2200 * flag set and return DEMOTED in the lksb flags.
e7fd4179 2201 *
c85d65e9
DT
2202 * Originally, this function detected conv-deadlk in a more limited scope:
2203 * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
2204 * - if lkb1 was the first entry in the queue (not just earlier), and was
2205 * blocked by the granted mode of lkb2, and there was nothing on the
2206 * granted queue preventing lkb1 from being granted immediately, i.e.
2207 * lkb2 was the only thing preventing lkb1 from being granted.
2208 *
2209 * That second condition meant we'd only say there was conv-deadlk if
2210 * resolving it (by demotion) would lead to the first lock on the convert
2211 * queue being granted right away. It allowed conversion deadlocks to exist
2212 * between locks on the convert queue while they couldn't be granted anyway.
2213 *
2214 * Now, we detect and take action on conversion deadlocks immediately when
2215 * they're created, even if they may not be immediately consequential. If
2216 * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
2217 * mode that would prevent lkb1's conversion from being granted, we do a
2218 * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
2219 * I think this means that the lkb_is_ahead condition below should always
2220 * be zero, i.e. there will never be conv-deadlk between two locks that are
2221 * both already on the convert queue.
e7fd4179
DT
2222 */
2223
c85d65e9 2224static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
e7fd4179 2225{
c85d65e9
DT
2226 struct dlm_lkb *lkb1;
2227 int lkb_is_ahead = 0;
e7fd4179 2228
c85d65e9
DT
2229 list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
2230 if (lkb1 == lkb2) {
2231 lkb_is_ahead = 1;
e7fd4179
DT
2232 continue;
2233 }
2234
c85d65e9
DT
2235 if (!lkb_is_ahead) {
2236 if (!modes_compat(lkb2, lkb1))
2237 return 1;
2238 } else {
2239 if (!modes_compat(lkb2, lkb1) &&
2240 !modes_compat(lkb1, lkb2))
2241 return 1;
2242 }
e7fd4179 2243 }
90135925 2244 return 0;
e7fd4179
DT
2245}
2246
2247/*
2248 * Return 1 if the lock can be granted, 0 otherwise.
2249 * Also detect and resolve conversion deadlocks.
2250 *
2251 * lkb is the lock to be granted
2252 *
2253 * now is 1 if the function is being called in the context of the
2254 * immediate request, it is 0 if called later, after the lock has been
2255 * queued.
2256 *
c503a621
DT
2257 * recover is 1 if dlm_recover_grant() is trying to grant conversions
2258 * after recovery.
2259 *
e7fd4179
DT
2260 * References are from chapter 6 of "VAXcluster Principles" by Roy Davis
2261 */
2262
c503a621
DT
2263static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
2264 int recover)
e7fd4179
DT
2265{
2266 int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV);
2267
2268 /*
2269 * 6-10: Version 5.4 introduced an option to address the phenomenon of
2270 * a new request for a NL mode lock being blocked.
2271 *
2272 * 6-11: If the optional EXPEDITE flag is used with the new NL mode
2273 * request, then it would be granted. In essence, the use of this flag
2274 * tells the Lock Manager to expedite theis request by not considering
2275 * what may be in the CONVERTING or WAITING queues... As of this
2276 * writing, the EXPEDITE flag can be used only with new requests for NL
2277 * mode locks. This flag is not valid for conversion requests.
2278 *
2279 * A shortcut. Earlier checks return an error if EXPEDITE is used in a
2280 * conversion or used with a non-NL requested mode. We also know an
2281 * EXPEDITE request is always granted immediately, so now must always
2282 * be 1. The full condition to grant an expedite request: (now &&
2283 * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can
2284 * therefore be shortened to just checking the flag.
2285 */
2286
2287 if (lkb->lkb_exflags & DLM_LKF_EXPEDITE)
90135925 2288 return 1;
e7fd4179
DT
2289
2290 /*
2291 * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be
2292 * added to the remaining conditions.
2293 */
2294
2295 if (queue_conflict(&r->res_grantqueue, lkb))
c503a621 2296 return 0;
e7fd4179
DT
2297
2298 /*
2299 * 6-3: By default, a conversion request is immediately granted if the
2300 * requested mode is compatible with the modes of all other granted
2301 * locks
2302 */
2303
2304 if (queue_conflict(&r->res_convertqueue, lkb))
c503a621
DT
2305 return 0;
2306
2307 /*
2308 * The RECOVER_GRANT flag means dlm_recover_grant() is granting
2309 * locks for a recovered rsb, on which lkb's have been rebuilt.
2310 * The lkb's may have been rebuilt on the queues in a different
2311 * order than they were in on the previous master. So, granting
2312 * queued conversions in order after recovery doesn't make sense
2313 * since the order hasn't been preserved anyway. The new order
2314 * could also have created a new "in place" conversion deadlock.
2315 * (e.g. old, failed master held granted EX, with PR->EX, NL->EX.
2316 * After recovery, there would be no granted locks, and possibly
2317 * NL->EX, PR->EX, an in-place conversion deadlock.) So, after
2318 * recovery, grant conversions without considering order.
2319 */
2320
2321 if (conv && recover)
2322 return 1;
e7fd4179
DT
2323
2324 /*
2325 * 6-5: But the default algorithm for deciding whether to grant or
2326 * queue conversion requests does not by itself guarantee that such
2327 * requests are serviced on a "first come first serve" basis. This, in
2328 * turn, can lead to a phenomenon known as "indefinate postponement".
2329 *
2330 * 6-7: This issue is dealt with by using the optional QUECVT flag with
2331 * the system service employed to request a lock conversion. This flag
2332 * forces certain conversion requests to be queued, even if they are
2333 * compatible with the granted modes of other locks on the same
2334 * resource. Thus, the use of this flag results in conversion requests
2335 * being ordered on a "first come first servce" basis.
2336 *
2337 * DCT: This condition is all about new conversions being able to occur
2338 * "in place" while the lock remains on the granted queue (assuming
2339 * nothing else conflicts.) IOW if QUECVT isn't set, a conversion
2340 * doesn't _have_ to go onto the convert queue where it's processed in
2341 * order. The "now" variable is necessary to distinguish converts
2342 * being received and processed for the first time now, because once a
2343 * convert is moved to the conversion queue the condition below applies
2344 * requiring fifo granting.
2345 */
2346
2347 if (now && conv && !(lkb->lkb_exflags & DLM_LKF_QUECVT))
90135925 2348 return 1;
e7fd4179 2349
53ad1c98
DT
2350 /*
2351 * Even if the convert is compat with all granted locks,
2352 * QUECVT forces it behind other locks on the convert queue.
2353 */
2354
2355 if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) {
2356 if (list_empty(&r->res_convertqueue))
2357 return 1;
2358 else
c503a621 2359 return 0;
53ad1c98
DT
2360 }
2361
e7fd4179 2362 /*
3bcd3687
DT
2363 * The NOORDER flag is set to avoid the standard vms rules on grant
2364 * order.
e7fd4179
DT
2365 */
2366
2367 if (lkb->lkb_exflags & DLM_LKF_NOORDER)
90135925 2368 return 1;
e7fd4179
DT
2369
2370 /*
2371 * 6-3: Once in that queue [CONVERTING], a conversion request cannot be
2372 * granted until all other conversion requests ahead of it are granted
2373 * and/or canceled.
2374 */
2375
2376 if (!now && conv && first_in_list(lkb, &r->res_convertqueue))
90135925 2377 return 1;
e7fd4179
DT
2378
2379 /*
2380 * 6-4: By default, a new request is immediately granted only if all
2381 * three of the following conditions are satisfied when the request is
2382 * issued:
2383 * - The queue of ungranted conversion requests for the resource is
2384 * empty.
2385 * - The queue of ungranted new requests for the resource is empty.
2386 * - The mode of the new request is compatible with the most
2387 * restrictive mode of all granted locks on the resource.
2388 */
2389
2390 if (now && !conv && list_empty(&r->res_convertqueue) &&
2391 list_empty(&r->res_waitqueue))
90135925 2392 return 1;
e7fd4179
DT
2393
2394 /*
2395 * 6-4: Once a lock request is in the queue of ungranted new requests,
2396 * it cannot be granted until the queue of ungranted conversion
2397 * requests is empty, all ungranted new requests ahead of it are
2398 * granted and/or canceled, and it is compatible with the granted mode
2399 * of the most restrictive lock granted on the resource.
2400 */
2401
2402 if (!now && !conv && list_empty(&r->res_convertqueue) &&
2403 first_in_list(lkb, &r->res_waitqueue))
90135925 2404 return 1;
c503a621 2405
90135925 2406 return 0;
e7fd4179
DT
2407}
2408
c85d65e9 2409static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
c503a621 2410 int recover, int *err)
e7fd4179 2411{
e7fd4179
DT
2412 int rv;
2413 int8_t alt = 0, rqmode = lkb->lkb_rqmode;
c85d65e9
DT
2414 int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
2415
2416 if (err)
2417 *err = 0;
e7fd4179 2418
c503a621 2419 rv = _can_be_granted(r, lkb, now, recover);
e7fd4179
DT
2420 if (rv)
2421 goto out;
2422
c85d65e9
DT
2423 /*
2424 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
2425 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
2426 * cancels one of the locks.
2427 */
2428
2429 if (is_convert && can_be_queued(lkb) &&
2430 conversion_deadlock_detect(r, lkb)) {
2431 if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
2432 lkb->lkb_grmode = DLM_LOCK_NL;
2433 lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
294e7e45 2434 } else if (err) {
2435 *err = -EDEADLK;
2436 } else {
2437 log_print("can_be_granted deadlock %x now %d",
2438 lkb->lkb_id, now);
2439 dlm_dump_rsb(r);
c85d65e9 2440 }
e7fd4179 2441 goto out;
c85d65e9 2442 }
e7fd4179 2443
c85d65e9
DT
2444 /*
2445 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
2446 * to grant a request in a mode other than the normal rqmode. It's a
2447 * simple way to provide a big optimization to applications that can
2448 * use them.
2449 */
2450
2451 if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
e7fd4179 2452 alt = DLM_LOCK_PR;
c85d65e9 2453 else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
e7fd4179
DT
2454 alt = DLM_LOCK_CW;
2455
2456 if (alt) {
2457 lkb->lkb_rqmode = alt;
c503a621 2458 rv = _can_be_granted(r, lkb, now, 0);
e7fd4179
DT
2459 if (rv)
2460 lkb->lkb_sbflags |= DLM_SBF_ALTMODE;
2461 else
2462 lkb->lkb_rqmode = rqmode;
2463 }
2464 out:
2465 return rv;
2466}
2467
36509258
DT
2468/* Returns the highest requested mode of all blocked conversions; sets
2469 cw if there's a blocked conversion to DLM_LOCK_CW. */
c85d65e9 2470
4875647a
DT
2471static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw,
2472 unsigned int *count)
e7fd4179
DT
2473{
2474 struct dlm_lkb *lkb, *s;
c503a621 2475 int recover = rsb_flag(r, RSB_RECOVER_GRANT);
e7fd4179 2476 int hi, demoted, quit, grant_restart, demote_restart;
c85d65e9 2477 int deadlk;
e7fd4179
DT
2478
2479 quit = 0;
2480 restart:
2481 grant_restart = 0;
2482 demote_restart = 0;
2483 hi = DLM_LOCK_IV;
2484
2485 list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
2486 demoted = is_demoted(lkb);
c85d65e9
DT
2487 deadlk = 0;
2488
c503a621 2489 if (can_be_granted(r, lkb, 0, recover, &deadlk)) {
e7fd4179
DT
2490 grant_lock_pending(r, lkb);
2491 grant_restart = 1;
4875647a
DT
2492 if (count)
2493 (*count)++;
c85d65e9 2494 continue;
e7fd4179 2495 }
c85d65e9
DT
2496
2497 if (!demoted && is_demoted(lkb)) {
2498 log_print("WARN: pending demoted %x node %d %s",
2499 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
2500 demote_restart = 1;
2501 continue;
2502 }
2503
2504 if (deadlk) {
294e7e45 2505 /*
2506 * If DLM_LKB_NODLKWT flag is set and conversion
2507 * deadlock is detected, we request blocking AST and
2508 * down (or cancel) conversion.
2509 */
2510 if (lkb->lkb_exflags & DLM_LKF_NODLCKWT) {
2511 if (lkb->lkb_highbast < lkb->lkb_rqmode) {
2512 queue_bast(r, lkb, lkb->lkb_rqmode);
2513 lkb->lkb_highbast = lkb->lkb_rqmode;
2514 }
2515 } else {
2516 log_print("WARN: pending deadlock %x node %d %s",
2517 lkb->lkb_id, lkb->lkb_nodeid,
2518 r->res_name);
2519 dlm_dump_rsb(r);
2520 }
c85d65e9
DT
2521 continue;
2522 }
2523
2524 hi = max_t(int, lkb->lkb_rqmode, hi);
36509258
DT
2525
2526 if (cw && lkb->lkb_rqmode == DLM_LOCK_CW)
2527 *cw = 1;
e7fd4179
DT
2528 }
2529
2530 if (grant_restart)
2531 goto restart;
2532 if (demote_restart && !quit) {
2533 quit = 1;
2534 goto restart;
2535 }
2536
2537 return max_t(int, high, hi);
2538}
2539
4875647a
DT
2540static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw,
2541 unsigned int *count)
e7fd4179
DT
2542{
2543 struct dlm_lkb *lkb, *s;
2544
2545 list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
c503a621 2546 if (can_be_granted(r, lkb, 0, 0, NULL)) {
e7fd4179 2547 grant_lock_pending(r, lkb);
4875647a
DT
2548 if (count)
2549 (*count)++;
2550 } else {
e7fd4179 2551 high = max_t(int, lkb->lkb_rqmode, high);
36509258
DT
2552 if (lkb->lkb_rqmode == DLM_LOCK_CW)
2553 *cw = 1;
2554 }
e7fd4179
DT
2555 }
2556
2557 return high;
2558}
2559
36509258
DT
2560/* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked
2561 on either the convert or waiting queue.
2562 high is the largest rqmode of all locks blocked on the convert or
2563 waiting queue. */
2564
2565static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw)
2566{
2567 if (gr->lkb_grmode == DLM_LOCK_PR && cw) {
2568 if (gr->lkb_highbast < DLM_LOCK_EX)
2569 return 1;
2570 return 0;
2571 }
2572
2573 if (gr->lkb_highbast < high &&
2574 !__dlm_compat_matrix[gr->lkb_grmode+1][high+1])
2575 return 1;
2576 return 0;
2577}
2578
4875647a 2579static void grant_pending_locks(struct dlm_rsb *r, unsigned int *count)
e7fd4179
DT
2580{
2581 struct dlm_lkb *lkb, *s;
2582 int high = DLM_LOCK_IV;
36509258 2583 int cw = 0;
e7fd4179 2584
4875647a
DT
2585 if (!is_master(r)) {
2586 log_print("grant_pending_locks r nodeid %d", r->res_nodeid);
2587 dlm_dump_rsb(r);
2588 return;
2589 }
e7fd4179 2590
4875647a
DT
2591 high = grant_pending_convert(r, high, &cw, count);
2592 high = grant_pending_wait(r, high, &cw, count);
e7fd4179
DT
2593
2594 if (high == DLM_LOCK_IV)
2595 return;
2596
2597 /*
2598 * If there are locks left on the wait/convert queue then send blocking
2599 * ASTs to granted locks based on the largest requested mode (high)
36509258 2600 * found above.
e7fd4179
DT
2601 */
2602
2603 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
e5dae548 2604 if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
329fc4c3
DT
2605 if (cw && high == DLM_LOCK_PR &&
2606 lkb->lkb_grmode == DLM_LOCK_PR)
36509258
DT
2607 queue_bast(r, lkb, DLM_LOCK_CW);
2608 else
2609 queue_bast(r, lkb, high);
e7fd4179
DT
2610 lkb->lkb_highbast = high;
2611 }
2612 }
2613}
2614
36509258
DT
2615static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq)
2616{
2617 if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) ||
2618 (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) {
2619 if (gr->lkb_highbast < DLM_LOCK_EX)
2620 return 1;
2621 return 0;
2622 }
2623
2624 if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq))
2625 return 1;
2626 return 0;
2627}
2628
e7fd4179
DT
2629static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
2630 struct dlm_lkb *lkb)
2631{
2632 struct dlm_lkb *gr;
2633
2634 list_for_each_entry(gr, head, lkb_statequeue) {
314dd2a0
SW
2635 /* skip self when sending basts to convertqueue */
2636 if (gr == lkb)
2637 continue;
e5dae548 2638 if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) {
e7fd4179
DT
2639 queue_bast(r, gr, lkb->lkb_rqmode);
2640 gr->lkb_highbast = lkb->lkb_rqmode;
2641 }
2642 }
2643}
2644
2645static void send_blocking_asts(struct dlm_rsb *r, struct dlm_lkb *lkb)
2646{
2647 send_bast_queue(r, &r->res_grantqueue, lkb);
2648}
2649
2650static void send_blocking_asts_all(struct dlm_rsb *r, struct dlm_lkb *lkb)
2651{
2652 send_bast_queue(r, &r->res_grantqueue, lkb);
2653 send_bast_queue(r, &r->res_convertqueue, lkb);
2654}
2655
2656/* set_master(r, lkb) -- set the master nodeid of a resource
2657
2658 The purpose of this function is to set the nodeid field in the given
2659 lkb using the nodeid field in the given rsb. If the rsb's nodeid is
2660 known, it can just be copied to the lkb and the function will return
2661 0. If the rsb's nodeid is _not_ known, it needs to be looked up
2662 before it can be copied to the lkb.
2663
2664 When the rsb nodeid is being looked up remotely, the initial lkb
2665 causing the lookup is kept on the ls_waiters list waiting for the
2666 lookup reply. Other lkb's waiting for the same rsb lookup are kept
2667 on the rsb's res_lookup list until the master is verified.
2668
2669 Return values:
2670 0: nodeid is set in rsb/lkb and the caller should go ahead and use it
2671 1: the rsb master is not available and the lkb has been placed on
2672 a wait queue
2673*/
2674
2675static int set_master(struct dlm_rsb *r, struct dlm_lkb *lkb)
2676{
c04fecb4 2677 int our_nodeid = dlm_our_nodeid();
e7fd4179
DT
2678
2679 if (rsb_flag(r, RSB_MASTER_UNCERTAIN)) {
2680 rsb_clear_flag(r, RSB_MASTER_UNCERTAIN);
2681 r->res_first_lkid = lkb->lkb_id;
2682 lkb->lkb_nodeid = r->res_nodeid;
2683 return 0;
2684 }
2685
2686 if (r->res_first_lkid && r->res_first_lkid != lkb->lkb_id) {
2687 list_add_tail(&lkb->lkb_rsb_lookup, &r->res_lookup);
2688 return 1;
2689 }
2690
c04fecb4 2691 if (r->res_master_nodeid == our_nodeid) {
e7fd4179
DT
2692 lkb->lkb_nodeid = 0;
2693 return 0;
2694 }
2695
c04fecb4
DT
2696 if (r->res_master_nodeid) {
2697 lkb->lkb_nodeid = r->res_master_nodeid;
e7fd4179
DT
2698 return 0;
2699 }
2700
c04fecb4
DT
2701 if (dlm_dir_nodeid(r) == our_nodeid) {
2702 /* This is a somewhat unusual case; find_rsb will usually
2703 have set res_master_nodeid when dir nodeid is local, but
2704 there are cases where we become the dir node after we've
2705 past find_rsb and go through _request_lock again.
2706 confirm_master() or process_lookup_list() needs to be
2707 called after this. */
2708 log_debug(r->res_ls, "set_master %x self master %d dir %d %s",
2709 lkb->lkb_id, r->res_master_nodeid, r->res_dir_nodeid,
2710 r->res_name);
2711 r->res_master_nodeid = our_nodeid;
e7fd4179
DT
2712 r->res_nodeid = 0;
2713 lkb->lkb_nodeid = 0;
c04fecb4 2714 return 0;
e7fd4179 2715 }
c04fecb4 2716
05c32f47
DT
2717 wait_pending_remove(r);
2718
c04fecb4
DT
2719 r->res_first_lkid = lkb->lkb_id;
2720 send_lookup(r, lkb);
2721 return 1;
e7fd4179
DT
2722}
2723
2724static void process_lookup_list(struct dlm_rsb *r)
2725{
2726 struct dlm_lkb *lkb, *safe;
2727
2728 list_for_each_entry_safe(lkb, safe, &r->res_lookup, lkb_rsb_lookup) {
ef0c2bb0 2729 list_del_init(&lkb->lkb_rsb_lookup);
e7fd4179
DT
2730 _request_lock(r, lkb);
2731 schedule();
2732 }
2733}
2734
2735/* confirm_master -- confirm (or deny) an rsb's master nodeid */
2736
2737static void confirm_master(struct dlm_rsb *r, int error)
2738{
2739 struct dlm_lkb *lkb;
2740
2741 if (!r->res_first_lkid)
2742 return;
2743
2744 switch (error) {
2745 case 0:
2746 case -EINPROGRESS:
2747 r->res_first_lkid = 0;
2748 process_lookup_list(r);
2749 break;
2750
2751 case -EAGAIN:
aec64e1b
DT
2752 case -EBADR:
2753 case -ENOTBLK:
2754 /* the remote request failed and won't be retried (it was
2755 a NOQUEUE, or has been canceled/unlocked); make a waiting
2756 lkb the first_lkid */
e7fd4179
DT
2757
2758 r->res_first_lkid = 0;
2759
2760 if (!list_empty(&r->res_lookup)) {
2761 lkb = list_entry(r->res_lookup.next, struct dlm_lkb,
2762 lkb_rsb_lookup);
ef0c2bb0 2763 list_del_init(&lkb->lkb_rsb_lookup);
e7fd4179
DT
2764 r->res_first_lkid = lkb->lkb_id;
2765 _request_lock(r, lkb);
761b9d3f 2766 }
e7fd4179
DT
2767 break;
2768
2769 default:
2770 log_error(r->res_ls, "confirm_master unknown error %d", error);
2771 }
2772}
2773
6b0afc0c 2774#ifdef CONFIG_DLM_DEPRECATED_API
e7fd4179 2775static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
e5dae548
DT
2776 int namelen, unsigned long timeout_cs,
2777 void (*ast) (void *astparam),
2778 void *astparam,
2779 void (*bast) (void *astparam, int mode),
2780 struct dlm_args *args)
6b0afc0c
AA
2781#else
2782static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
2783 int namelen, void (*ast)(void *astparam),
2784 void *astparam,
2785 void (*bast)(void *astparam, int mode),
2786 struct dlm_args *args)
2787#endif
e7fd4179
DT
2788{
2789 int rv = -EINVAL;
2790
2791 /* check for invalid arg usage */
2792
2793 if (mode < 0 || mode > DLM_LOCK_EX)
2794 goto out;
2795
2796 if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN))
2797 goto out;
2798
2799 if (flags & DLM_LKF_CANCEL)
2800 goto out;
2801
2802 if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT))
2803 goto out;
2804
2805 if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT))
2806 goto out;
2807
2808 if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE)
2809 goto out;
2810
2811 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT)
2812 goto out;
2813
2814 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT)
2815 goto out;
2816
2817 if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE)
2818 goto out;
2819
2820 if (flags & DLM_LKF_EXPEDITE && mode != DLM_LOCK_NL)
2821 goto out;
2822
2823 if (!ast || !lksb)
2824 goto out;
2825
2826 if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
2827 goto out;
2828
e7fd4179
DT
2829 if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
2830 goto out;
2831
2832 /* these args will be copied to the lkb in validate_lock_args,
2833 it cannot be done now because when converting locks, fields in
2834 an active lkb cannot be modified before locking the rsb */
2835
2836 args->flags = flags;
e5dae548
DT
2837 args->astfn = ast;
2838 args->astparam = astparam;
2839 args->bastfn = bast;
6b0afc0c 2840#ifdef CONFIG_DLM_DEPRECATED_API
d7db923e 2841 args->timeout = timeout_cs;
6b0afc0c 2842#endif
e7fd4179
DT
2843 args->mode = mode;
2844 args->lksb = lksb;
e7fd4179
DT
2845 rv = 0;
2846 out:
2847 return rv;
2848}
2849
2850static int set_unlock_args(uint32_t flags, void *astarg, struct dlm_args *args)
2851{
2852 if (flags & ~(DLM_LKF_CANCEL | DLM_LKF_VALBLK | DLM_LKF_IVVALBLK |
2853 DLM_LKF_FORCEUNLOCK))
2854 return -EINVAL;
2855
ef0c2bb0
DT
2856 if (flags & DLM_LKF_CANCEL && flags & DLM_LKF_FORCEUNLOCK)
2857 return -EINVAL;
2858
e7fd4179 2859 args->flags = flags;
e5dae548 2860 args->astparam = astarg;
e7fd4179
DT
2861 return 0;
2862}
2863
2864static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
2865 struct dlm_args *args)
2866{
2867 int rv = -EINVAL;
2868
2869 if (args->flags & DLM_LKF_CONVERT) {
2870 if (lkb->lkb_flags & DLM_IFL_MSTCPY)
2871 goto out;
2872
2873 if (args->flags & DLM_LKF_QUECVT &&
2874 !__quecvt_compat_matrix[lkb->lkb_grmode+1][args->mode+1])
2875 goto out;
2876
2877 rv = -EBUSY;
2878 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
2879 goto out;
2880
67e4d8c5
AA
2881 /* lock not allowed if there's any op in progress */
2882 if (lkb->lkb_wait_type || lkb->lkb_wait_count)
e7fd4179 2883 goto out;
ef0c2bb0
DT
2884
2885 if (is_overlap(lkb))
2886 goto out;
e7fd4179
DT
2887 }
2888
2889 lkb->lkb_exflags = args->flags;
2890 lkb->lkb_sbflags = 0;
e5dae548 2891 lkb->lkb_astfn = args->astfn;
e7fd4179 2892 lkb->lkb_astparam = args->astparam;
e5dae548 2893 lkb->lkb_bastfn = args->bastfn;
e7fd4179
DT
2894 lkb->lkb_rqmode = args->mode;
2895 lkb->lkb_lksb = args->lksb;
2896 lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
2897 lkb->lkb_ownpid = (int) current->pid;
6b0afc0c 2898#ifdef CONFIG_DLM_DEPRECATED_API
d7db923e 2899 lkb->lkb_timeout_cs = args->timeout;
6b0afc0c 2900#endif
e7fd4179
DT
2901 rv = 0;
2902 out:
43279e53
DT
2903 if (rv)
2904 log_debug(ls, "validate_lock_args %d %x %x %x %d %d %s",
2905 rv, lkb->lkb_id, lkb->lkb_flags, args->flags,
2906 lkb->lkb_status, lkb->lkb_wait_type,
2907 lkb->lkb_resource->res_name);
e7fd4179
DT
2908 return rv;
2909}
2910
ef0c2bb0
DT
2911/* when dlm_unlock() sees -EBUSY with CANCEL/FORCEUNLOCK it returns 0
2912 for success */
2913
2914/* note: it's valid for lkb_nodeid/res_nodeid to be -1 when we get here
2915 because there may be a lookup in progress and it's valid to do
2916 cancel/unlockf on it */
2917
e7fd4179
DT
2918static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
2919{
ef0c2bb0 2920 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
e7fd4179
DT
2921 int rv = -EINVAL;
2922
ef0c2bb0
DT
2923 if (lkb->lkb_flags & DLM_IFL_MSTCPY) {
2924 log_error(ls, "unlock on MSTCPY %x", lkb->lkb_id);
2925 dlm_print_lkb(lkb);
e7fd4179 2926 goto out;
ef0c2bb0 2927 }
e7fd4179 2928
ef0c2bb0
DT
2929 /* an lkb may still exist even though the lock is EOL'ed due to a
2930 cancel, unlock or failed noqueue request; an app can't use these
2931 locks; return same error as if the lkid had not been found at all */
e7fd4179 2932
ef0c2bb0
DT
2933 if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
2934 log_debug(ls, "unlock on ENDOFLIFE %x", lkb->lkb_id);
2935 rv = -ENOENT;
e7fd4179 2936 goto out;
ef0c2bb0 2937 }
e7fd4179 2938
ef0c2bb0
DT
2939 /* an lkb may be waiting for an rsb lookup to complete where the
2940 lookup was initiated by another lock */
2941
42dc1601
DT
2942 if (!list_empty(&lkb->lkb_rsb_lookup)) {
2943 if (args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) {
ef0c2bb0
DT
2944 log_debug(ls, "unlock on rsb_lookup %x", lkb->lkb_id);
2945 list_del_init(&lkb->lkb_rsb_lookup);
2946 queue_cast(lkb->lkb_resource, lkb,
2947 args->flags & DLM_LKF_CANCEL ?
2948 -DLM_ECANCEL : -DLM_EUNLOCK);
2949 unhold_lkb(lkb); /* undoes create_lkb() */
ef0c2bb0 2950 }
42dc1601
DT
2951 /* caller changes -EBUSY to 0 for CANCEL and FORCEUNLOCK */
2952 rv = -EBUSY;
2953 goto out;
ef0c2bb0
DT
2954 }
2955
2956 /* cancel not allowed with another cancel/unlock in progress */
2957
2958 if (args->flags & DLM_LKF_CANCEL) {
2959 if (lkb->lkb_exflags & DLM_LKF_CANCEL)
2960 goto out;
2961
2962 if (is_overlap(lkb))
2963 goto out;
2964
3ae1acf9
DT
2965 /* don't let scand try to do a cancel */
2966 del_timeout(lkb);
2967
ef0c2bb0
DT
2968 if (lkb->lkb_flags & DLM_IFL_RESEND) {
2969 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
2970 rv = -EBUSY;
2971 goto out;
2972 }
2973
a536e381
DT
2974 /* there's nothing to cancel */
2975 if (lkb->lkb_status == DLM_LKSTS_GRANTED &&
2976 !lkb->lkb_wait_type) {
2977 rv = -EBUSY;
2978 goto out;
2979 }
2980
ef0c2bb0
DT
2981 switch (lkb->lkb_wait_type) {
2982 case DLM_MSG_LOOKUP:
2983 case DLM_MSG_REQUEST:
2984 lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
2985 rv = -EBUSY;
2986 goto out;
2987 case DLM_MSG_UNLOCK:
2988 case DLM_MSG_CANCEL:
2989 goto out;
2990 }
2991 /* add_to_waiters() will set OVERLAP_CANCEL */
2992 goto out_ok;
2993 }
2994
2995 /* do we need to allow a force-unlock if there's a normal unlock
2996 already in progress? in what conditions could the normal unlock
2997 fail such that we'd want to send a force-unlock to be sure? */
2998
2999 if (args->flags & DLM_LKF_FORCEUNLOCK) {
3000 if (lkb->lkb_exflags & DLM_LKF_FORCEUNLOCK)
3001 goto out;
3002
3003 if (is_overlap_unlock(lkb))
3004 goto out;
e7fd4179 3005
3ae1acf9
DT
3006 /* don't let scand try to do a cancel */
3007 del_timeout(lkb);
3008
ef0c2bb0
DT
3009 if (lkb->lkb_flags & DLM_IFL_RESEND) {
3010 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
3011 rv = -EBUSY;
3012 goto out;
3013 }
3014
3015 switch (lkb->lkb_wait_type) {
3016 case DLM_MSG_LOOKUP:
3017 case DLM_MSG_REQUEST:
3018 lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
3019 rv = -EBUSY;
3020 goto out;
3021 case DLM_MSG_UNLOCK:
3022 goto out;
3023 }
3024 /* add_to_waiters() will set OVERLAP_UNLOCK */
3025 goto out_ok;
3026 }
3027
3028 /* normal unlock not allowed if there's any op in progress */
e7fd4179 3029 rv = -EBUSY;
ef0c2bb0 3030 if (lkb->lkb_wait_type || lkb->lkb_wait_count)
e7fd4179
DT
3031 goto out;
3032
3033 out_ok:
ef0c2bb0
DT
3034 /* an overlapping op shouldn't blow away exflags from other op */
3035 lkb->lkb_exflags |= args->flags;
e7fd4179
DT
3036 lkb->lkb_sbflags = 0;
3037 lkb->lkb_astparam = args->astparam;
e7fd4179
DT
3038 rv = 0;
3039 out:
ef0c2bb0
DT
3040 if (rv)
3041 log_debug(ls, "validate_unlock_args %d %x %x %x %x %d %s", rv,
3042 lkb->lkb_id, lkb->lkb_flags, lkb->lkb_exflags,
3043 args->flags, lkb->lkb_wait_type,
3044 lkb->lkb_resource->res_name);
e7fd4179
DT
3045 return rv;
3046}
3047
3048/*
3049 * Four stage 4 varieties:
3050 * do_request(), do_convert(), do_unlock(), do_cancel()
3051 * These are called on the master node for the given lock and
3052 * from the central locking logic.
3053 */
3054
3055static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
3056{
3057 int error = 0;
3058
c503a621 3059 if (can_be_granted(r, lkb, 1, 0, NULL)) {
e7fd4179
DT
3060 grant_lock(r, lkb);
3061 queue_cast(r, lkb, 0);
3062 goto out;
3063 }
3064
3065 if (can_be_queued(lkb)) {
3066 error = -EINPROGRESS;
3067 add_lkb(r, lkb, DLM_LKSTS_WAITING);
3ae1acf9 3068 add_timeout(lkb);
e7fd4179
DT
3069 goto out;
3070 }
3071
3072 error = -EAGAIN;
e7fd4179 3073 queue_cast(r, lkb, -EAGAIN);
e7fd4179
DT
3074 out:
3075 return error;
3076}
3077
cf6620ac
DT
3078static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3079 int error)
3080{
3081 switch (error) {
3082 case -EAGAIN:
3083 if (force_blocking_asts(lkb))
3084 send_blocking_asts_all(r, lkb);
3085 break;
3086 case -EINPROGRESS:
3087 send_blocking_asts(r, lkb);
3088 break;
3089 }
3090}
3091
e7fd4179
DT
3092static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
3093{
3094 int error = 0;
c85d65e9 3095 int deadlk = 0;
e7fd4179
DT
3096
3097 /* changing an existing lock may allow others to be granted */
3098
c503a621 3099 if (can_be_granted(r, lkb, 1, 0, &deadlk)) {
e7fd4179
DT
3100 grant_lock(r, lkb);
3101 queue_cast(r, lkb, 0);
e7fd4179
DT
3102 goto out;
3103 }
3104
c85d65e9
DT
3105 /* can_be_granted() detected that this lock would block in a conversion
3106 deadlock, so we leave it on the granted queue and return EDEADLK in
3107 the ast for the convert. */
3108
294e7e45 3109 if (deadlk && !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
c85d65e9 3110 /* it's left on the granted queue */
c85d65e9
DT
3111 revert_lock(r, lkb);
3112 queue_cast(r, lkb, -EDEADLK);
3113 error = -EDEADLK;
3114 goto out;
3115 }
3116
7d3c1feb
DT
3117 /* is_demoted() means the can_be_granted() above set the grmode
3118 to NL, and left us on the granted queue. This auto-demotion
3119 (due to CONVDEADLK) might mean other locks, and/or this lock, are
3120 now grantable. We have to try to grant other converting locks
3121 before we try again to grant this one. */
3122
3123 if (is_demoted(lkb)) {
4875647a 3124 grant_pending_convert(r, DLM_LOCK_IV, NULL, NULL);
c503a621 3125 if (_can_be_granted(r, lkb, 1, 0)) {
7d3c1feb
DT
3126 grant_lock(r, lkb);
3127 queue_cast(r, lkb, 0);
7d3c1feb
DT
3128 goto out;
3129 }
3130 /* else fall through and move to convert queue */
3131 }
3132
3133 if (can_be_queued(lkb)) {
e7fd4179
DT
3134 error = -EINPROGRESS;
3135 del_lkb(r, lkb);
3136 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3ae1acf9 3137 add_timeout(lkb);
e7fd4179
DT
3138 goto out;
3139 }
3140
3141 error = -EAGAIN;
e7fd4179 3142 queue_cast(r, lkb, -EAGAIN);
e7fd4179
DT
3143 out:
3144 return error;
3145}
3146
cf6620ac
DT
3147static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3148 int error)
3149{
3150 switch (error) {
3151 case 0:
4875647a 3152 grant_pending_locks(r, NULL);
cf6620ac
DT
3153 /* grant_pending_locks also sends basts */
3154 break;
3155 case -EAGAIN:
3156 if (force_blocking_asts(lkb))
3157 send_blocking_asts_all(r, lkb);
3158 break;
3159 case -EINPROGRESS:
3160 send_blocking_asts(r, lkb);
3161 break;
3162 }
3163}
3164
e7fd4179
DT
3165static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3166{
3167 remove_lock(r, lkb);
3168 queue_cast(r, lkb, -DLM_EUNLOCK);
e7fd4179
DT
3169 return -DLM_EUNLOCK;
3170}
3171
cf6620ac
DT
3172static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3173 int error)
3174{
4875647a 3175 grant_pending_locks(r, NULL);
cf6620ac
DT
3176}
3177
ef0c2bb0 3178/* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
c04fecb4 3179
e7fd4179
DT
3180static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
3181{
ef0c2bb0
DT
3182 int error;
3183
3184 error = revert_lock(r, lkb);
3185 if (error) {
3186 queue_cast(r, lkb, -DLM_ECANCEL);
ef0c2bb0
DT
3187 return -DLM_ECANCEL;
3188 }
3189 return 0;
e7fd4179
DT
3190}
3191
cf6620ac
DT
3192static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
3193 int error)
3194{
3195 if (error)
4875647a 3196 grant_pending_locks(r, NULL);
cf6620ac
DT
3197}
3198
e7fd4179
DT
3199/*
3200 * Four stage 3 varieties:
3201 * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
3202 */
3203
3204/* add a new lkb to a possibly new rsb, called by requesting process */
3205
3206static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3207{
3208 int error;
3209
3210 /* set_master: sets lkb nodeid from r */
3211
3212 error = set_master(r, lkb);
3213 if (error < 0)
3214 goto out;
3215 if (error) {
3216 error = 0;
3217 goto out;
3218 }
3219
cf6620ac 3220 if (is_remote(r)) {
e7fd4179
DT
3221 /* receive_request() calls do_request() on remote node */
3222 error = send_request(r, lkb);
cf6620ac 3223 } else {
e7fd4179 3224 error = do_request(r, lkb);
cf6620ac
DT
3225 /* for remote locks the request_reply is sent
3226 between do_request and do_request_effects */
3227 do_request_effects(r, lkb, error);
3228 }
e7fd4179
DT
3229 out:
3230 return error;
3231}
3232
3bcd3687 3233/* change some property of an existing lkb, e.g. mode */
e7fd4179
DT
3234
3235static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3236{
3237 int error;
3238
cf6620ac 3239 if (is_remote(r)) {
e7fd4179
DT
3240 /* receive_convert() calls do_convert() on remote node */
3241 error = send_convert(r, lkb);
cf6620ac 3242 } else {
e7fd4179 3243 error = do_convert(r, lkb);
cf6620ac
DT
3244 /* for remote locks the convert_reply is sent
3245 between do_convert and do_convert_effects */
3246 do_convert_effects(r, lkb, error);
3247 }
e7fd4179
DT
3248
3249 return error;
3250}
3251
3252/* remove an existing lkb from the granted queue */
3253
3254static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3255{
3256 int error;
3257
cf6620ac 3258 if (is_remote(r)) {
e7fd4179
DT
3259 /* receive_unlock() calls do_unlock() on remote node */
3260 error = send_unlock(r, lkb);
cf6620ac 3261 } else {
e7fd4179 3262 error = do_unlock(r, lkb);
cf6620ac
DT
3263 /* for remote locks the unlock_reply is sent
3264 between do_unlock and do_unlock_effects */
3265 do_unlock_effects(r, lkb, error);
3266 }
e7fd4179
DT
3267
3268 return error;
3269}
3270
3271/* remove an existing lkb from the convert or wait queue */
3272
3273static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3274{
3275 int error;
3276
cf6620ac 3277 if (is_remote(r)) {
e7fd4179
DT
3278 /* receive_cancel() calls do_cancel() on remote node */
3279 error = send_cancel(r, lkb);
cf6620ac 3280 } else {
e7fd4179 3281 error = do_cancel(r, lkb);
cf6620ac
DT
3282 /* for remote locks the cancel_reply is sent
3283 between do_cancel and do_cancel_effects */
3284 do_cancel_effects(r, lkb, error);
3285 }
e7fd4179
DT
3286
3287 return error;
3288}
3289
3290/*
3291 * Four stage 2 varieties:
3292 * request_lock(), convert_lock(), unlock_lock(), cancel_lock()
3293 */
3294
3295static int request_lock(struct dlm_ls *ls, struct dlm_lkb *lkb, char *name,
3296 int len, struct dlm_args *args)
3297{
3298 struct dlm_rsb *r;
3299 int error;
3300
3301 error = validate_lock_args(ls, lkb, args);
3302 if (error)
c04fecb4 3303 return error;
e7fd4179 3304
c04fecb4 3305 error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
e7fd4179 3306 if (error)
c04fecb4 3307 return error;
e7fd4179
DT
3308
3309 lock_rsb(r);
3310
3311 attach_lkb(r, lkb);
3312 lkb->lkb_lksb->sb_lkid = lkb->lkb_id;
3313
3314 error = _request_lock(r, lkb);
3315
3316 unlock_rsb(r);
3317 put_rsb(r);
e7fd4179
DT
3318 return error;
3319}
3320
3321static int convert_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3322 struct dlm_args *args)
3323{
3324 struct dlm_rsb *r;
3325 int error;
3326
3327 r = lkb->lkb_resource;
3328
3329 hold_rsb(r);
3330 lock_rsb(r);
3331
3332 error = validate_lock_args(ls, lkb, args);
3333 if (error)
3334 goto out;
3335
3336 error = _convert_lock(r, lkb);
3337 out:
3338 unlock_rsb(r);
3339 put_rsb(r);
3340 return error;
3341}
3342
3343static int unlock_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3344 struct dlm_args *args)
3345{
3346 struct dlm_rsb *r;
3347 int error;
3348
3349 r = lkb->lkb_resource;
3350
3351 hold_rsb(r);
3352 lock_rsb(r);
3353
3354 error = validate_unlock_args(lkb, args);
3355 if (error)
3356 goto out;
3357
3358 error = _unlock_lock(r, lkb);
3359 out:
3360 unlock_rsb(r);
3361 put_rsb(r);
3362 return error;
3363}
3364
3365static int cancel_lock(struct dlm_ls *ls, struct dlm_lkb *lkb,
3366 struct dlm_args *args)
3367{
3368 struct dlm_rsb *r;
3369 int error;
3370
3371 r = lkb->lkb_resource;
3372
3373 hold_rsb(r);
3374 lock_rsb(r);
3375
3376 error = validate_unlock_args(lkb, args);
3377 if (error)
3378 goto out;
3379
3380 error = _cancel_lock(r, lkb);
3381 out:
3382 unlock_rsb(r);
3383 put_rsb(r);
3384 return error;
3385}
3386
3387/*
3388 * Two stage 1 varieties: dlm_lock() and dlm_unlock()
3389 */
3390
3391int dlm_lock(dlm_lockspace_t *lockspace,
3392 int mode,
3393 struct dlm_lksb *lksb,
3394 uint32_t flags,
3395 void *name,
3396 unsigned int namelen,
3397 uint32_t parent_lkid,
3398 void (*ast) (void *astarg),
3399 void *astarg,
3bcd3687 3400 void (*bast) (void *astarg, int mode))
e7fd4179
DT
3401{
3402 struct dlm_ls *ls;
3403 struct dlm_lkb *lkb;
3404 struct dlm_args args;
3405 int error, convert = flags & DLM_LKF_CONVERT;
3406
3407 ls = dlm_find_lockspace_local(lockspace);
3408 if (!ls)
3409 return -EINVAL;
3410
85e86edf 3411 dlm_lock_recovery(ls);
e7fd4179
DT
3412
3413 if (convert)
3414 error = find_lkb(ls, lksb->sb_lkid, &lkb);
3415 else
3416 error = create_lkb(ls, &lkb);
3417
3418 if (error)
3419 goto out;
3420
5d92a30e 3421 trace_dlm_lock_start(ls, lkb, name, namelen, mode, flags);
f1d3b8f9 3422
6b0afc0c 3423#ifdef CONFIG_DLM_DEPRECATED_API
d7db923e 3424 error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
3bcd3687 3425 astarg, bast, &args);
6b0afc0c
AA
3426#else
3427 error = set_lock_args(mode, lksb, flags, namelen, ast, astarg, bast,
3428 &args);
3429#endif
e7fd4179
DT
3430 if (error)
3431 goto out_put;
3432
3433 if (convert)
3434 error = convert_lock(ls, lkb, &args);
3435 else
3436 error = request_lock(ls, lkb, name, namelen, &args);
3437
3438 if (error == -EINPROGRESS)
3439 error = 0;
3440 out_put:
5d92a30e 3441 trace_dlm_lock_end(ls, lkb, name, namelen, mode, flags, error);
f1d3b8f9 3442
e7fd4179 3443 if (convert || error)
b3f58d8f 3444 __put_lkb(ls, lkb);
c85d65e9 3445 if (error == -EAGAIN || error == -EDEADLK)
e7fd4179
DT
3446 error = 0;
3447 out:
85e86edf 3448 dlm_unlock_recovery(ls);
e7fd4179
DT
3449 dlm_put_lockspace(ls);
3450 return error;
3451}
3452
3453int dlm_unlock(dlm_lockspace_t *lockspace,
3454 uint32_t lkid,
3455 uint32_t flags,
3456 struct dlm_lksb *lksb,
3457 void *astarg)
3458{
3459 struct dlm_ls *ls;
3460 struct dlm_lkb *lkb;
3461 struct dlm_args args;
3462 int error;
3463
3464 ls = dlm_find_lockspace_local(lockspace);
3465 if (!ls)
3466 return -EINVAL;
3467
85e86edf 3468 dlm_lock_recovery(ls);
e7fd4179
DT
3469
3470 error = find_lkb(ls, lkid, &lkb);
3471 if (error)
3472 goto out;
3473
f1d3b8f9
AA
3474 trace_dlm_unlock_start(ls, lkb, flags);
3475
e7fd4179
DT
3476 error = set_unlock_args(flags, astarg, &args);
3477 if (error)
3478 goto out_put;
3479
3480 if (flags & DLM_LKF_CANCEL)
3481 error = cancel_lock(ls, lkb, &args);
3482 else
3483 error = unlock_lock(ls, lkb, &args);
3484
3485 if (error == -DLM_EUNLOCK || error == -DLM_ECANCEL)
3486 error = 0;
ef0c2bb0
DT
3487 if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)))
3488 error = 0;
e7fd4179 3489 out_put:
f1d3b8f9
AA
3490 trace_dlm_unlock_end(ls, lkb, flags, error);
3491
b3f58d8f 3492 dlm_put_lkb(lkb);
e7fd4179 3493 out:
85e86edf 3494 dlm_unlock_recovery(ls);
e7fd4179
DT
3495 dlm_put_lockspace(ls);
3496 return error;
3497}
3498
3499/*
3500 * send/receive routines for remote operations and replies
3501 *
3502 * send_args
3503 * send_common
3504 * send_request receive_request
3505 * send_convert receive_convert
3506 * send_unlock receive_unlock
3507 * send_cancel receive_cancel
3508 * send_grant receive_grant
3509 * send_bast receive_bast
3510 * send_lookup receive_lookup
3511 * send_remove receive_remove
3512 *
3513 * send_common_reply
3514 * receive_request_reply send_request_reply
3515 * receive_convert_reply send_convert_reply
3516 * receive_unlock_reply send_unlock_reply
3517 * receive_cancel_reply send_cancel_reply
3518 * receive_lookup_reply send_lookup_reply
3519 */
3520
7e4dac33
DT
3521static int _create_message(struct dlm_ls *ls, int mb_len,
3522 int to_nodeid, int mstype,
3523 struct dlm_message **ms_ret,
3524 struct dlm_mhandle **mh_ret)
e7fd4179
DT
3525{
3526 struct dlm_message *ms;
3527 struct dlm_mhandle *mh;
3528 char *mb;
e7fd4179
DT
3529
3530 /* get_buffer gives us a message handle (mh) that we need to
a070a91c 3531 pass into midcomms_commit and a message buffer (mb) that we
e7fd4179
DT
3532 write our data into */
3533
a070a91c 3534 mh = dlm_midcomms_get_mhandle(to_nodeid, mb_len, GFP_NOFS, &mb);
e7fd4179
DT
3535 if (!mh)
3536 return -ENOBUFS;
3537
e7fd4179
DT
3538 ms = (struct dlm_message *) mb;
3539
3428785a
AA
3540 ms->m_header.h_version = cpu_to_le32(DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
3541 ms->m_header.u.h_lockspace = cpu_to_le32(ls->ls_global_id);
3542 ms->m_header.h_nodeid = cpu_to_le32(dlm_our_nodeid());
3543 ms->m_header.h_length = cpu_to_le16(mb_len);
e7fd4179
DT
3544 ms->m_header.h_cmd = DLM_MSG;
3545
00e99ccd 3546 ms->m_type = cpu_to_le32(mstype);
e7fd4179
DT
3547
3548 *mh_ret = mh;
3549 *ms_ret = ms;
3550 return 0;
3551}
3552
7e4dac33
DT
3553static int create_message(struct dlm_rsb *r, struct dlm_lkb *lkb,
3554 int to_nodeid, int mstype,
3555 struct dlm_message **ms_ret,
3556 struct dlm_mhandle **mh_ret)
3557{
3558 int mb_len = sizeof(struct dlm_message);
3559
3560 switch (mstype) {
3561 case DLM_MSG_REQUEST:
3562 case DLM_MSG_LOOKUP:
3563 case DLM_MSG_REMOVE:
3564 mb_len += r->res_length;
3565 break;
3566 case DLM_MSG_CONVERT:
3567 case DLM_MSG_UNLOCK:
3568 case DLM_MSG_REQUEST_REPLY:
3569 case DLM_MSG_CONVERT_REPLY:
3570 case DLM_MSG_GRANT:
3571 if (lkb && lkb->lkb_lvbptr)
3572 mb_len += r->res_ls->ls_lvblen;
3573 break;
3574 }
3575
3576 return _create_message(r->res_ls, mb_len, to_nodeid, mstype,
3577 ms_ret, mh_ret);
3578}
3579
e7fd4179
DT
3580/* further lowcomms enhancements or alternate implementations may make
3581 the return value from this function useful at some point */
3582
3583static int send_message(struct dlm_mhandle *mh, struct dlm_message *ms)
3584{
a070a91c 3585 dlm_midcomms_commit_mhandle(mh);
e7fd4179
DT
3586 return 0;
3587}
3588
3589static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
3590 struct dlm_message *ms)
3591{
00e99ccd
AA
3592 ms->m_nodeid = cpu_to_le32(lkb->lkb_nodeid);
3593 ms->m_pid = cpu_to_le32(lkb->lkb_ownpid);
3594 ms->m_lkid = cpu_to_le32(lkb->lkb_id);
3595 ms->m_remid = cpu_to_le32(lkb->lkb_remid);
3596 ms->m_exflags = cpu_to_le32(lkb->lkb_exflags);
3597 ms->m_sbflags = cpu_to_le32(lkb->lkb_sbflags);
3598 ms->m_flags = cpu_to_le32(lkb->lkb_flags);
3599 ms->m_lvbseq = cpu_to_le32(lkb->lkb_lvbseq);
3600 ms->m_status = cpu_to_le32(lkb->lkb_status);
3601 ms->m_grmode = cpu_to_le32(lkb->lkb_grmode);
3602 ms->m_rqmode = cpu_to_le32(lkb->lkb_rqmode);
3603 ms->m_hash = cpu_to_le32(r->res_hash);
e7fd4179
DT
3604
3605 /* m_result and m_bastmode are set from function args,
3606 not from lkb fields */
3607
e5dae548 3608 if (lkb->lkb_bastfn)
00e99ccd 3609 ms->m_asts |= cpu_to_le32(DLM_CB_BAST);
e5dae548 3610 if (lkb->lkb_astfn)
00e99ccd 3611 ms->m_asts |= cpu_to_le32(DLM_CB_CAST);
e7fd4179 3612
da49f36f
DT
3613 /* compare with switch in create_message; send_remove() doesn't
3614 use send_args() */
e7fd4179 3615
da49f36f 3616 switch (ms->m_type) {
00e99ccd
AA
3617 case cpu_to_le32(DLM_MSG_REQUEST):
3618 case cpu_to_le32(DLM_MSG_LOOKUP):
da49f36f
DT
3619 memcpy(ms->m_extra, r->res_name, r->res_length);
3620 break;
00e99ccd
AA
3621 case cpu_to_le32(DLM_MSG_CONVERT):
3622 case cpu_to_le32(DLM_MSG_UNLOCK):
3623 case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
3624 case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
3625 case cpu_to_le32(DLM_MSG_GRANT):
da49f36f
DT
3626 if (!lkb->lkb_lvbptr)
3627 break;
e7fd4179 3628 memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
da49f36f
DT
3629 break;
3630 }
e7fd4179
DT
3631}
3632
3633static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
3634{
3635 struct dlm_message *ms;
3636 struct dlm_mhandle *mh;
3637 int to_nodeid, error;
3638
c6ff669b
DT
3639 to_nodeid = r->res_nodeid;
3640
3641 error = add_to_waiters(lkb, mstype, to_nodeid);
ef0c2bb0
DT
3642 if (error)
3643 return error;
e7fd4179 3644
e7fd4179
DT
3645 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
3646 if (error)
3647 goto fail;
3648
3649 send_args(r, lkb, ms);
3650
3651 error = send_message(mh, ms);
3652 if (error)
3653 goto fail;
3654 return 0;
3655
3656 fail:
ef0c2bb0 3657 remove_from_waiters(lkb, msg_reply_type(mstype));
e7fd4179
DT
3658 return error;
3659}
3660
3661static int send_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
3662{
3663 return send_common(r, lkb, DLM_MSG_REQUEST);
3664}
3665
3666static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
3667{
3668 int error;
3669
3670 error = send_common(r, lkb, DLM_MSG_CONVERT);
3671
3672 /* down conversions go without a reply from the master */
3673 if (!error && down_conversion(lkb)) {
ef0c2bb0 3674 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
00e99ccd
AA
3675 r->res_ls->ls_stub_ms.m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
3676 r->res_ls->ls_stub_ms.m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY);
e7fd4179
DT
3677 r->res_ls->ls_stub_ms.m_result = 0;
3678 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
3679 }
3680
3681 return error;
3682}
3683
3684/* FIXME: if this lkb is the only lock we hold on the rsb, then set
3685 MASTER_UNCERTAIN to force the next request on the rsb to confirm
3686 that the master is still correct. */
3687
3688static int send_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
3689{
3690 return send_common(r, lkb, DLM_MSG_UNLOCK);
3691}
3692
3693static int send_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
3694{
3695 return send_common(r, lkb, DLM_MSG_CANCEL);
3696}
3697
3698static int send_grant(struct dlm_rsb *r, struct dlm_lkb *lkb)
3699{
3700 struct dlm_message *ms;
3701 struct dlm_mhandle *mh;
3702 int to_nodeid, error;
3703
3704 to_nodeid = lkb->lkb_nodeid;
3705
3706 error = create_message(r, lkb, to_nodeid, DLM_MSG_GRANT, &ms, &mh);
3707 if (error)
3708 goto out;
3709
3710 send_args(r, lkb, ms);
3711
3712 ms->m_result = 0;
3713
3714 error = send_message(mh, ms);
3715 out:
3716 return error;
3717}
3718
3719static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode)
3720{
3721 struct dlm_message *ms;
3722 struct dlm_mhandle *mh;
3723 int to_nodeid, error;
3724
3725 to_nodeid = lkb->lkb_nodeid;
3726
3727 error = create_message(r, NULL, to_nodeid, DLM_MSG_BAST, &ms, &mh);
3728 if (error)
3729 goto out;
3730
3731 send_args(r, lkb, ms);
3732
00e99ccd 3733 ms->m_bastmode = cpu_to_le32(mode);
e7fd4179
DT
3734
3735 error = send_message(mh, ms);
3736 out:
3737 return error;
3738}
3739
3740static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
3741{
3742 struct dlm_message *ms;
3743 struct dlm_mhandle *mh;
3744 int to_nodeid, error;
3745
c6ff669b
DT
3746 to_nodeid = dlm_dir_nodeid(r);
3747
3748 error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
ef0c2bb0
DT
3749 if (error)
3750 return error;
e7fd4179 3751
e7fd4179
DT
3752 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
3753 if (error)
3754 goto fail;
3755
3756 send_args(r, lkb, ms);
3757
3758 error = send_message(mh, ms);
3759 if (error)
3760 goto fail;
3761 return 0;
3762
3763 fail:
ef0c2bb0 3764 remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
e7fd4179
DT
3765 return error;
3766}
3767
3768static int send_remove(struct dlm_rsb *r)
3769{
3770 struct dlm_message *ms;
3771 struct dlm_mhandle *mh;
3772 int to_nodeid, error;
3773
3774 to_nodeid = dlm_dir_nodeid(r);
3775
3776 error = create_message(r, NULL, to_nodeid, DLM_MSG_REMOVE, &ms, &mh);
3777 if (error)
3778 goto out;
3779
3780 memcpy(ms->m_extra, r->res_name, r->res_length);
00e99ccd 3781 ms->m_hash = cpu_to_le32(r->res_hash);
e7fd4179
DT
3782
3783 error = send_message(mh, ms);
3784 out:
3785 return error;
3786}
3787
3788static int send_common_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3789 int mstype, int rv)
3790{
3791 struct dlm_message *ms;
3792 struct dlm_mhandle *mh;
3793 int to_nodeid, error;
3794
3795 to_nodeid = lkb->lkb_nodeid;
3796
3797 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
3798 if (error)
3799 goto out;
3800
3801 send_args(r, lkb, ms);
3802
00e99ccd 3803 ms->m_result = cpu_to_le32(to_dlm_errno(rv));
e7fd4179
DT
3804
3805 error = send_message(mh, ms);
3806 out:
3807 return error;
3808}
3809
3810static int send_request_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3811{
3812 return send_common_reply(r, lkb, DLM_MSG_REQUEST_REPLY, rv);
3813}
3814
3815static int send_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3816{
3817 return send_common_reply(r, lkb, DLM_MSG_CONVERT_REPLY, rv);
3818}
3819
3820static int send_unlock_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3821{
3822 return send_common_reply(r, lkb, DLM_MSG_UNLOCK_REPLY, rv);
3823}
3824
3825static int send_cancel_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
3826{
3827 return send_common_reply(r, lkb, DLM_MSG_CANCEL_REPLY, rv);
3828}
3829
3830static int send_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms_in,
3831 int ret_nodeid, int rv)
3832{
3833 struct dlm_rsb *r = &ls->ls_stub_rsb;
3834 struct dlm_message *ms;
3835 struct dlm_mhandle *mh;
3428785a 3836 int error, nodeid = le32_to_cpu(ms_in->m_header.h_nodeid);
e7fd4179
DT
3837
3838 error = create_message(r, NULL, nodeid, DLM_MSG_LOOKUP_REPLY, &ms, &mh);
3839 if (error)
3840 goto out;
3841
3842 ms->m_lkid = ms_in->m_lkid;
00e99ccd
AA
3843 ms->m_result = cpu_to_le32(to_dlm_errno(rv));
3844 ms->m_nodeid = cpu_to_le32(ret_nodeid);
e7fd4179
DT
3845
3846 error = send_message(mh, ms);
3847 out:
3848 return error;
3849}
3850
3851/* which args we save from a received message depends heavily on the type
3852 of message, unlike the send side where we can safely send everything about
3853 the lkb for any type of message */
3854
3855static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3856{
00e99ccd
AA
3857 lkb->lkb_exflags = le32_to_cpu(ms->m_exflags);
3858 lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags);
e7fd4179 3859 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
00e99ccd 3860 (le32_to_cpu(ms->m_flags) & 0x0000FFFF);
e7fd4179
DT
3861}
3862
3863static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3864{
00e99ccd 3865 if (ms->m_flags == cpu_to_le32(DLM_IFL_STUB_MS))
2a7ce0ed
DT
3866 return;
3867
00e99ccd 3868 lkb->lkb_sbflags = le32_to_cpu(ms->m_sbflags);
e7fd4179 3869 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
00e99ccd 3870 (le32_to_cpu(ms->m_flags) & 0x0000FFFF);
e7fd4179
DT
3871}
3872
3873static int receive_extralen(struct dlm_message *ms)
3874{
3428785a
AA
3875 return (le16_to_cpu(ms->m_header.h_length) -
3876 sizeof(struct dlm_message));
e7fd4179
DT
3877}
3878
e7fd4179
DT
3879static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb,
3880 struct dlm_message *ms)
3881{
3882 int len;
3883
3884 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3885 if (!lkb->lkb_lvbptr)
52bda2b5 3886 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
e7fd4179
DT
3887 if (!lkb->lkb_lvbptr)
3888 return -ENOMEM;
3889 len = receive_extralen(ms);
cfa805f6
BVA
3890 if (len > ls->ls_lvblen)
3891 len = ls->ls_lvblen;
e7fd4179
DT
3892 memcpy(lkb->lkb_lvbptr, ms->m_extra, len);
3893 }
3894 return 0;
3895}
3896
e5dae548
DT
3897static void fake_bastfn(void *astparam, int mode)
3898{
3899 log_print("fake_bastfn should not be called");
3900}
3901
3902static void fake_astfn(void *astparam)
3903{
3904 log_print("fake_astfn should not be called");
3905}
3906
e7fd4179
DT
3907static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3908 struct dlm_message *ms)
3909{
3428785a 3910 lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
00e99ccd
AA
3911 lkb->lkb_ownpid = le32_to_cpu(ms->m_pid);
3912 lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
e7fd4179 3913 lkb->lkb_grmode = DLM_LOCK_IV;
00e99ccd 3914 lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode);
e5dae548 3915
00e99ccd
AA
3916 lkb->lkb_bastfn = (ms->m_asts & cpu_to_le32(DLM_CB_BAST)) ? &fake_bastfn : NULL;
3917 lkb->lkb_astfn = (ms->m_asts & cpu_to_le32(DLM_CB_CAST)) ? &fake_astfn : NULL;
e7fd4179 3918
8d07fd50
DT
3919 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3920 /* lkb was just created so there won't be an lvb yet */
52bda2b5 3921 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
8d07fd50
DT
3922 if (!lkb->lkb_lvbptr)
3923 return -ENOMEM;
3924 }
e7fd4179
DT
3925
3926 return 0;
3927}
3928
3929static int receive_convert_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3930 struct dlm_message *ms)
3931{
e7fd4179
DT
3932 if (lkb->lkb_status != DLM_LKSTS_GRANTED)
3933 return -EBUSY;
3934
e7fd4179
DT
3935 if (receive_lvb(ls, lkb, ms))
3936 return -ENOMEM;
3937
00e99ccd
AA
3938 lkb->lkb_rqmode = le32_to_cpu(ms->m_rqmode);
3939 lkb->lkb_lvbseq = le32_to_cpu(ms->m_lvbseq);
e7fd4179
DT
3940
3941 return 0;
3942}
3943
3944static int receive_unlock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3945 struct dlm_message *ms)
3946{
e7fd4179
DT
3947 if (receive_lvb(ls, lkb, ms))
3948 return -ENOMEM;
3949 return 0;
3950}
3951
3952/* We fill in the stub-lkb fields with the info that send_xxxx_reply()
3953 uses to send a reply and that the remote end uses to process the reply. */
3954
3955static void setup_stub_lkb(struct dlm_ls *ls, struct dlm_message *ms)
3956{
3957 struct dlm_lkb *lkb = &ls->ls_stub_lkb;
3428785a 3958 lkb->lkb_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
00e99ccd 3959 lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
e7fd4179
DT
3960}
3961
c54e04b0
DT
3962/* This is called after the rsb is locked so that we can safely inspect
3963 fields in the lkb. */
3964
3965static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
3966{
3428785a 3967 int from = le32_to_cpu(ms->m_header.h_nodeid);
c54e04b0
DT
3968 int error = 0;
3969
6c2e3bf6 3970 /* currently mixing of user/kernel locks are not supported */
00e99ccd
AA
3971 if (ms->m_flags & cpu_to_le32(DLM_IFL_USER) &&
3972 ~lkb->lkb_flags & DLM_IFL_USER) {
6c2e3bf6
AA
3973 log_error(lkb->lkb_resource->res_ls,
3974 "got user dlm message for a kernel lock");
3975 error = -EINVAL;
3976 goto out;
3977 }
3978
c54e04b0 3979 switch (ms->m_type) {
00e99ccd
AA
3980 case cpu_to_le32(DLM_MSG_CONVERT):
3981 case cpu_to_le32(DLM_MSG_UNLOCK):
3982 case cpu_to_le32(DLM_MSG_CANCEL):
c54e04b0
DT
3983 if (!is_master_copy(lkb) || lkb->lkb_nodeid != from)
3984 error = -EINVAL;
3985 break;
3986
00e99ccd
AA
3987 case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
3988 case cpu_to_le32(DLM_MSG_UNLOCK_REPLY):
3989 case cpu_to_le32(DLM_MSG_CANCEL_REPLY):
3990 case cpu_to_le32(DLM_MSG_GRANT):
3991 case cpu_to_le32(DLM_MSG_BAST):
c54e04b0
DT
3992 if (!is_process_copy(lkb) || lkb->lkb_nodeid != from)
3993 error = -EINVAL;
3994 break;
3995
00e99ccd 3996 case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
c54e04b0
DT
3997 if (!is_process_copy(lkb))
3998 error = -EINVAL;
3999 else if (lkb->lkb_nodeid != -1 && lkb->lkb_nodeid != from)
4000 error = -EINVAL;
4001 break;
4002
4003 default:
4004 error = -EINVAL;
4005 }
4006
6c2e3bf6 4007out:
c54e04b0
DT
4008 if (error)
4009 log_error(lkb->lkb_resource->res_ls,
4010 "ignore invalid message %d from %d %x %x %x %d",
00e99ccd
AA
4011 le32_to_cpu(ms->m_type), from, lkb->lkb_id,
4012 lkb->lkb_remid, lkb->lkb_flags, lkb->lkb_nodeid);
c54e04b0
DT
4013 return error;
4014}
4015
96006ea6
DT
4016static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
4017{
4018 char name[DLM_RESNAME_MAXLEN + 1];
4019 struct dlm_message *ms;
4020 struct dlm_mhandle *mh;
4021 struct dlm_rsb *r;
4022 uint32_t hash, b;
4023 int rv, dir_nodeid;
4024
4025 memset(name, 0, sizeof(name));
4026 memcpy(name, ms_name, len);
4027
4028 hash = jhash(name, len, 0);
4029 b = hash & (ls->ls_rsbtbl_size - 1);
4030
4031 dir_nodeid = dlm_hash2nodeid(ls, hash);
4032
4033 log_error(ls, "send_repeat_remove dir %d %s", dir_nodeid, name);
4034
4035 spin_lock(&ls->ls_rsbtbl[b].lock);
4036 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
4037 if (!rv) {
4038 spin_unlock(&ls->ls_rsbtbl[b].lock);
4039 log_error(ls, "repeat_remove on keep %s", name);
4040 return;
4041 }
4042
4043 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
4044 if (!rv) {
4045 spin_unlock(&ls->ls_rsbtbl[b].lock);
4046 log_error(ls, "repeat_remove on toss %s", name);
4047 return;
4048 }
4049
4050 /* use ls->remove_name2 to avoid conflict with shrink? */
4051
4052 spin_lock(&ls->ls_remove_spin);
4053 ls->ls_remove_len = len;
4054 memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
4055 spin_unlock(&ls->ls_remove_spin);
4056 spin_unlock(&ls->ls_rsbtbl[b].lock);
4057
4058 rv = _create_message(ls, sizeof(struct dlm_message) + len,
4059 dir_nodeid, DLM_MSG_REMOVE, &ms, &mh);
4060 if (rv)
ba589959 4061 goto out;
96006ea6
DT
4062
4063 memcpy(ms->m_extra, name, len);
00e99ccd 4064 ms->m_hash = cpu_to_le32(hash);
96006ea6
DT
4065
4066 send_message(mh, ms);
4067
ba589959 4068out:
96006ea6
DT
4069 spin_lock(&ls->ls_remove_spin);
4070 ls->ls_remove_len = 0;
4071 memset(ls->ls_remove_name, 0, DLM_RESNAME_MAXLEN);
4072 spin_unlock(&ls->ls_remove_spin);
f6f74183 4073 wake_up(&ls->ls_remove_wait);
96006ea6
DT
4074}
4075
6d40c4a7 4076static int receive_request(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4077{
4078 struct dlm_lkb *lkb;
4079 struct dlm_rsb *r;
c04fecb4 4080 int from_nodeid;
96006ea6 4081 int error, namelen = 0;
e7fd4179 4082
3428785a 4083 from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
c04fecb4 4084
e7fd4179
DT
4085 error = create_lkb(ls, &lkb);
4086 if (error)
4087 goto fail;
4088
4089 receive_flags(lkb, ms);
4090 lkb->lkb_flags |= DLM_IFL_MSTCPY;
4091 error = receive_request_args(ls, lkb, ms);
4092 if (error) {
b3f58d8f 4093 __put_lkb(ls, lkb);
e7fd4179
DT
4094 goto fail;
4095 }
4096
c04fecb4
DT
4097 /* The dir node is the authority on whether we are the master
4098 for this rsb or not, so if the master sends us a request, we should
4099 recreate the rsb if we've destroyed it. This race happens when we
4100 send a remove message to the dir node at the same time that the dir
4101 node sends us a request for the rsb. */
4102
e7fd4179
DT
4103 namelen = receive_extralen(ms);
4104
c04fecb4
DT
4105 error = find_rsb(ls, ms->m_extra, namelen, from_nodeid,
4106 R_RECEIVE_REQUEST, &r);
e7fd4179 4107 if (error) {
b3f58d8f 4108 __put_lkb(ls, lkb);
e7fd4179
DT
4109 goto fail;
4110 }
4111
4112 lock_rsb(r);
4113
c04fecb4
DT
4114 if (r->res_master_nodeid != dlm_our_nodeid()) {
4115 error = validate_master_nodeid(ls, r, from_nodeid);
4116 if (error) {
4117 unlock_rsb(r);
4118 put_rsb(r);
4119 __put_lkb(ls, lkb);
4120 goto fail;
4121 }
4122 }
4123
e7fd4179
DT
4124 attach_lkb(r, lkb);
4125 error = do_request(r, lkb);
4126 send_request_reply(r, lkb, error);
cf6620ac 4127 do_request_effects(r, lkb, error);
e7fd4179
DT
4128
4129 unlock_rsb(r);
4130 put_rsb(r);
4131
4132 if (error == -EINPROGRESS)
4133 error = 0;
4134 if (error)
b3f58d8f 4135 dlm_put_lkb(lkb);
6d40c4a7 4136 return 0;
e7fd4179
DT
4137
4138 fail:
c04fecb4
DT
4139 /* TODO: instead of returning ENOTBLK, add the lkb to res_lookup
4140 and do this receive_request again from process_lookup_list once
4141 we get the lookup reply. This would avoid a many repeated
4142 ENOTBLK request failures when the lookup reply designating us
4143 as master is delayed. */
4144
4145 /* We could repeatedly return -EBADR here if our send_remove() is
4146 delayed in being sent/arriving/being processed on the dir node.
4147 Another node would repeatedly lookup up the master, and the dir
4148 node would continue returning our nodeid until our send_remove
96006ea6
DT
4149 took effect.
4150
4151 We send another remove message in case our previous send_remove
4152 was lost/ignored/missed somehow. */
c04fecb4
DT
4153
4154 if (error != -ENOTBLK) {
4155 log_limit(ls, "receive_request %x from %d %d",
00e99ccd 4156 le32_to_cpu(ms->m_lkid), from_nodeid, error);
c04fecb4
DT
4157 }
4158
96006ea6
DT
4159 if (namelen && error == -EBADR) {
4160 send_repeat_remove(ls, ms->m_extra, namelen);
4161 msleep(1000);
4162 }
4163
e7fd4179
DT
4164 setup_stub_lkb(ls, ms);
4165 send_request_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
6d40c4a7 4166 return error;
e7fd4179
DT
4167}
4168
6d40c4a7 4169static int receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4170{
4171 struct dlm_lkb *lkb;
4172 struct dlm_rsb *r;
90135925 4173 int error, reply = 1;
e7fd4179 4174
00e99ccd 4175 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
e7fd4179
DT
4176 if (error)
4177 goto fail;
4178
00e99ccd 4179 if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) {
4875647a
DT
4180 log_error(ls, "receive_convert %x remid %x recover_seq %llu "
4181 "remote %d %x", lkb->lkb_id, lkb->lkb_remid,
4182 (unsigned long long)lkb->lkb_recover_seq,
00e99ccd
AA
4183 le32_to_cpu(ms->m_header.h_nodeid),
4184 le32_to_cpu(ms->m_lkid));
6d40c4a7 4185 error = -ENOENT;
c0174726 4186 dlm_put_lkb(lkb);
6d40c4a7
DT
4187 goto fail;
4188 }
4189
e7fd4179
DT
4190 r = lkb->lkb_resource;
4191
4192 hold_rsb(r);
4193 lock_rsb(r);
4194
c54e04b0
DT
4195 error = validate_message(lkb, ms);
4196 if (error)
4197 goto out;
4198
e7fd4179 4199 receive_flags(lkb, ms);
cf6620ac 4200
e7fd4179 4201 error = receive_convert_args(ls, lkb, ms);
cf6620ac
DT
4202 if (error) {
4203 send_convert_reply(r, lkb, error);
4204 goto out;
4205 }
4206
e7fd4179
DT
4207 reply = !down_conversion(lkb);
4208
4209 error = do_convert(r, lkb);
e7fd4179
DT
4210 if (reply)
4211 send_convert_reply(r, lkb, error);
cf6620ac 4212 do_convert_effects(r, lkb, error);
c54e04b0 4213 out:
e7fd4179
DT
4214 unlock_rsb(r);
4215 put_rsb(r);
b3f58d8f 4216 dlm_put_lkb(lkb);
6d40c4a7 4217 return 0;
e7fd4179
DT
4218
4219 fail:
4220 setup_stub_lkb(ls, ms);
4221 send_convert_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
6d40c4a7 4222 return error;
e7fd4179
DT
4223}
4224
6d40c4a7 4225static int receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4226{
4227 struct dlm_lkb *lkb;
4228 struct dlm_rsb *r;
4229 int error;
4230
00e99ccd 4231 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
e7fd4179
DT
4232 if (error)
4233 goto fail;
4234
00e99ccd 4235 if (lkb->lkb_remid != le32_to_cpu(ms->m_lkid)) {
6d40c4a7
DT
4236 log_error(ls, "receive_unlock %x remid %x remote %d %x",
4237 lkb->lkb_id, lkb->lkb_remid,
00e99ccd
AA
4238 le32_to_cpu(ms->m_header.h_nodeid),
4239 le32_to_cpu(ms->m_lkid));
6d40c4a7 4240 error = -ENOENT;
c0174726 4241 dlm_put_lkb(lkb);
6d40c4a7
DT
4242 goto fail;
4243 }
4244
e7fd4179
DT
4245 r = lkb->lkb_resource;
4246
4247 hold_rsb(r);
4248 lock_rsb(r);
4249
c54e04b0
DT
4250 error = validate_message(lkb, ms);
4251 if (error)
4252 goto out;
4253
e7fd4179 4254 receive_flags(lkb, ms);
cf6620ac 4255
e7fd4179 4256 error = receive_unlock_args(ls, lkb, ms);
cf6620ac
DT
4257 if (error) {
4258 send_unlock_reply(r, lkb, error);
4259 goto out;
4260 }
e7fd4179
DT
4261
4262 error = do_unlock(r, lkb);
e7fd4179 4263 send_unlock_reply(r, lkb, error);
cf6620ac 4264 do_unlock_effects(r, lkb, error);
c54e04b0 4265 out:
e7fd4179
DT
4266 unlock_rsb(r);
4267 put_rsb(r);
b3f58d8f 4268 dlm_put_lkb(lkb);
6d40c4a7 4269 return 0;
e7fd4179
DT
4270
4271 fail:
4272 setup_stub_lkb(ls, ms);
4273 send_unlock_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
6d40c4a7 4274 return error;
e7fd4179
DT
4275}
4276
6d40c4a7 4277static int receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4278{
4279 struct dlm_lkb *lkb;
4280 struct dlm_rsb *r;
4281 int error;
4282
00e99ccd 4283 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
e7fd4179
DT
4284 if (error)
4285 goto fail;
4286
4287 receive_flags(lkb, ms);
4288
4289 r = lkb->lkb_resource;
4290
4291 hold_rsb(r);
4292 lock_rsb(r);
4293
c54e04b0
DT
4294 error = validate_message(lkb, ms);
4295 if (error)
4296 goto out;
4297
e7fd4179
DT
4298 error = do_cancel(r, lkb);
4299 send_cancel_reply(r, lkb, error);
cf6620ac 4300 do_cancel_effects(r, lkb, error);
c54e04b0 4301 out:
e7fd4179
DT
4302 unlock_rsb(r);
4303 put_rsb(r);
b3f58d8f 4304 dlm_put_lkb(lkb);
6d40c4a7 4305 return 0;
e7fd4179
DT
4306
4307 fail:
4308 setup_stub_lkb(ls, ms);
4309 send_cancel_reply(&ls->ls_stub_rsb, &ls->ls_stub_lkb, error);
6d40c4a7 4310 return error;
e7fd4179
DT
4311}
4312
6d40c4a7 4313static int receive_grant(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4314{
4315 struct dlm_lkb *lkb;
4316 struct dlm_rsb *r;
4317 int error;
4318
00e99ccd 4319 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4320 if (error)
4321 return error;
e7fd4179
DT
4322
4323 r = lkb->lkb_resource;
4324
4325 hold_rsb(r);
4326 lock_rsb(r);
4327
c54e04b0
DT
4328 error = validate_message(lkb, ms);
4329 if (error)
4330 goto out;
4331
e7fd4179 4332 receive_flags_reply(lkb, ms);
7d3c1feb
DT
4333 if (is_altmode(lkb))
4334 munge_altmode(lkb, ms);
e7fd4179
DT
4335 grant_lock_pc(r, lkb, ms);
4336 queue_cast(r, lkb, 0);
c54e04b0 4337 out:
e7fd4179
DT
4338 unlock_rsb(r);
4339 put_rsb(r);
b3f58d8f 4340 dlm_put_lkb(lkb);
6d40c4a7 4341 return 0;
e7fd4179
DT
4342}
4343
6d40c4a7 4344static int receive_bast(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4345{
4346 struct dlm_lkb *lkb;
4347 struct dlm_rsb *r;
4348 int error;
4349
00e99ccd 4350 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4351 if (error)
4352 return error;
e7fd4179
DT
4353
4354 r = lkb->lkb_resource;
4355
4356 hold_rsb(r);
4357 lock_rsb(r);
4358
c54e04b0
DT
4359 error = validate_message(lkb, ms);
4360 if (error)
4361 goto out;
e7fd4179 4362
00e99ccd
AA
4363 queue_bast(r, lkb, le32_to_cpu(ms->m_bastmode));
4364 lkb->lkb_highbast = le32_to_cpu(ms->m_bastmode);
c54e04b0 4365 out:
e7fd4179
DT
4366 unlock_rsb(r);
4367 put_rsb(r);
b3f58d8f 4368 dlm_put_lkb(lkb);
6d40c4a7 4369 return 0;
e7fd4179
DT
4370}
4371
4372static void receive_lookup(struct dlm_ls *ls, struct dlm_message *ms)
4373{
c04fecb4 4374 int len, error, ret_nodeid, from_nodeid, our_nodeid;
e7fd4179 4375
3428785a 4376 from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
e7fd4179
DT
4377 our_nodeid = dlm_our_nodeid();
4378
4379 len = receive_extralen(ms);
4380
c04fecb4
DT
4381 error = dlm_master_lookup(ls, from_nodeid, ms->m_extra, len, 0,
4382 &ret_nodeid, NULL);
e7fd4179
DT
4383
4384 /* Optimization: we're master so treat lookup as a request */
4385 if (!error && ret_nodeid == our_nodeid) {
4386 receive_request(ls, ms);
4387 return;
4388 }
e7fd4179
DT
4389 send_lookup_reply(ls, ms, ret_nodeid, error);
4390}
4391
4392static void receive_remove(struct dlm_ls *ls, struct dlm_message *ms)
4393{
c04fecb4
DT
4394 char name[DLM_RESNAME_MAXLEN+1];
4395 struct dlm_rsb *r;
4396 uint32_t hash, b;
4397 int rv, len, dir_nodeid, from_nodeid;
e7fd4179 4398
3428785a 4399 from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
e7fd4179
DT
4400
4401 len = receive_extralen(ms);
4402
c04fecb4
DT
4403 if (len > DLM_RESNAME_MAXLEN) {
4404 log_error(ls, "receive_remove from %d bad len %d",
4405 from_nodeid, len);
4406 return;
4407 }
4408
00e99ccd 4409 dir_nodeid = dlm_hash2nodeid(ls, le32_to_cpu(ms->m_hash));
e7fd4179 4410 if (dir_nodeid != dlm_our_nodeid()) {
c04fecb4
DT
4411 log_error(ls, "receive_remove from %d bad nodeid %d",
4412 from_nodeid, dir_nodeid);
e7fd4179
DT
4413 return;
4414 }
4415
c04fecb4
DT
4416 /* Look for name on rsbtbl.toss, if it's there, kill it.
4417 If it's on rsbtbl.keep, it's being used, and we should ignore this
4418 message. This is an expected race between the dir node sending a
4419 request to the master node at the same time as the master node sends
4420 a remove to the dir node. The resolution to that race is for the
4421 dir node to ignore the remove message, and the master node to
4422 recreate the master rsb when it gets a request from the dir node for
4423 an rsb it doesn't have. */
4424
4425 memset(name, 0, sizeof(name));
4426 memcpy(name, ms->m_extra, len);
4427
4428 hash = jhash(name, len, 0);
4429 b = hash & (ls->ls_rsbtbl_size - 1);
4430
4431 spin_lock(&ls->ls_rsbtbl[b].lock);
4432
4433 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].toss, name, len, &r);
4434 if (rv) {
4435 /* verify the rsb is on keep list per comment above */
4436 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[b].keep, name, len, &r);
4437 if (rv) {
4438 /* should not happen */
4439 log_error(ls, "receive_remove from %d not found %s",
4440 from_nodeid, name);
4441 spin_unlock(&ls->ls_rsbtbl[b].lock);
4442 return;
4443 }
4444 if (r->res_master_nodeid != from_nodeid) {
4445 /* should not happen */
4446 log_error(ls, "receive_remove keep from %d master %d",
4447 from_nodeid, r->res_master_nodeid);
4448 dlm_print_rsb(r);
4449 spin_unlock(&ls->ls_rsbtbl[b].lock);
4450 return;
4451 }
4452
4453 log_debug(ls, "receive_remove from %d master %d first %x %s",
4454 from_nodeid, r->res_master_nodeid, r->res_first_lkid,
4455 name);
4456 spin_unlock(&ls->ls_rsbtbl[b].lock);
4457 return;
4458 }
4459
4460 if (r->res_master_nodeid != from_nodeid) {
4461 log_error(ls, "receive_remove toss from %d master %d",
4462 from_nodeid, r->res_master_nodeid);
4463 dlm_print_rsb(r);
4464 spin_unlock(&ls->ls_rsbtbl[b].lock);
4465 return;
4466 }
4467
4468 if (kref_put(&r->res_ref, kill_rsb)) {
4469 rb_erase(&r->res_hashnode, &ls->ls_rsbtbl[b].toss);
4470 spin_unlock(&ls->ls_rsbtbl[b].lock);
4471 dlm_free_rsb(r);
4472 } else {
4473 log_error(ls, "receive_remove from %d rsb ref error",
4474 from_nodeid);
4475 dlm_print_rsb(r);
4476 spin_unlock(&ls->ls_rsbtbl[b].lock);
4477 }
e7fd4179
DT
4478}
4479
8499137d
DT
4480static void receive_purge(struct dlm_ls *ls, struct dlm_message *ms)
4481{
00e99ccd 4482 do_purge(ls, le32_to_cpu(ms->m_nodeid), le32_to_cpu(ms->m_pid));
8499137d
DT
4483}
4484
6d40c4a7 4485static int receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4486{
4487 struct dlm_lkb *lkb;
4488 struct dlm_rsb *r;
ef0c2bb0 4489 int error, mstype, result;
3428785a 4490 int from_nodeid = le32_to_cpu(ms->m_header.h_nodeid);
e7fd4179 4491
00e99ccd 4492 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4493 if (error)
4494 return error;
e7fd4179 4495
e7fd4179
DT
4496 r = lkb->lkb_resource;
4497 hold_rsb(r);
4498 lock_rsb(r);
4499
c54e04b0
DT
4500 error = validate_message(lkb, ms);
4501 if (error)
4502 goto out;
4503
ef0c2bb0
DT
4504 mstype = lkb->lkb_wait_type;
4505 error = remove_from_waiters(lkb, DLM_MSG_REQUEST_REPLY);
4875647a
DT
4506 if (error) {
4507 log_error(ls, "receive_request_reply %x remote %d %x result %d",
00e99ccd
AA
4508 lkb->lkb_id, from_nodeid, le32_to_cpu(ms->m_lkid),
4509 from_dlm_errno(le32_to_cpu(ms->m_result)));
4875647a 4510 dlm_dump_rsb(r);
ef0c2bb0 4511 goto out;
4875647a 4512 }
ef0c2bb0 4513
e7fd4179
DT
4514 /* Optimization: the dir node was also the master, so it took our
4515 lookup as a request and sent request reply instead of lookup reply */
4516 if (mstype == DLM_MSG_LOOKUP) {
c04fecb4
DT
4517 r->res_master_nodeid = from_nodeid;
4518 r->res_nodeid = from_nodeid;
4519 lkb->lkb_nodeid = from_nodeid;
e7fd4179
DT
4520 }
4521
ef0c2bb0 4522 /* this is the value returned from do_request() on the master */
00e99ccd 4523 result = from_dlm_errno(le32_to_cpu(ms->m_result));
ef0c2bb0
DT
4524
4525 switch (result) {
e7fd4179 4526 case -EAGAIN:
ef0c2bb0 4527 /* request would block (be queued) on remote master */
e7fd4179
DT
4528 queue_cast(r, lkb, -EAGAIN);
4529 confirm_master(r, -EAGAIN);
ef0c2bb0 4530 unhold_lkb(lkb); /* undoes create_lkb() */
e7fd4179
DT
4531 break;
4532
4533 case -EINPROGRESS:
4534 case 0:
4535 /* request was queued or granted on remote master */
4536 receive_flags_reply(lkb, ms);
00e99ccd 4537 lkb->lkb_remid = le32_to_cpu(ms->m_lkid);
7d3c1feb
DT
4538 if (is_altmode(lkb))
4539 munge_altmode(lkb, ms);
3ae1acf9 4540 if (result) {
e7fd4179 4541 add_lkb(r, lkb, DLM_LKSTS_WAITING);
3ae1acf9
DT
4542 add_timeout(lkb);
4543 } else {
e7fd4179
DT
4544 grant_lock_pc(r, lkb, ms);
4545 queue_cast(r, lkb, 0);
4546 }
ef0c2bb0 4547 confirm_master(r, result);
e7fd4179
DT
4548 break;
4549
597d0cae 4550 case -EBADR:
e7fd4179
DT
4551 case -ENOTBLK:
4552 /* find_rsb failed to find rsb or rsb wasn't master */
c04fecb4
DT
4553 log_limit(ls, "receive_request_reply %x from %d %d "
4554 "master %d dir %d first %x %s", lkb->lkb_id,
4555 from_nodeid, result, r->res_master_nodeid,
4556 r->res_dir_nodeid, r->res_first_lkid, r->res_name);
4557
4558 if (r->res_dir_nodeid != dlm_our_nodeid() &&
4559 r->res_master_nodeid != dlm_our_nodeid()) {
4560 /* cause _request_lock->set_master->send_lookup */
4561 r->res_master_nodeid = 0;
4562 r->res_nodeid = -1;
4563 lkb->lkb_nodeid = -1;
4564 }
ef0c2bb0
DT
4565
4566 if (is_overlap(lkb)) {
4567 /* we'll ignore error in cancel/unlock reply */
4568 queue_cast_overlap(r, lkb);
aec64e1b 4569 confirm_master(r, result);
ef0c2bb0 4570 unhold_lkb(lkb); /* undoes create_lkb() */
c04fecb4 4571 } else {
ef0c2bb0 4572 _request_lock(r, lkb);
c04fecb4
DT
4573
4574 if (r->res_master_nodeid == dlm_our_nodeid())
4575 confirm_master(r, 0);
4576 }
e7fd4179
DT
4577 break;
4578
4579 default:
ef0c2bb0
DT
4580 log_error(ls, "receive_request_reply %x error %d",
4581 lkb->lkb_id, result);
e7fd4179
DT
4582 }
4583
ef0c2bb0
DT
4584 if (is_overlap_unlock(lkb) && (result == 0 || result == -EINPROGRESS)) {
4585 log_debug(ls, "receive_request_reply %x result %d unlock",
4586 lkb->lkb_id, result);
4587 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4588 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4589 send_unlock(r, lkb);
4590 } else if (is_overlap_cancel(lkb) && (result == -EINPROGRESS)) {
4591 log_debug(ls, "receive_request_reply %x cancel", lkb->lkb_id);
4592 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4593 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4594 send_cancel(r, lkb);
4595 } else {
4596 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
4597 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
4598 }
4599 out:
e7fd4179
DT
4600 unlock_rsb(r);
4601 put_rsb(r);
b3f58d8f 4602 dlm_put_lkb(lkb);
6d40c4a7 4603 return 0;
e7fd4179
DT
4604}
4605
4606static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
4607 struct dlm_message *ms)
4608{
e7fd4179 4609 /* this is the value returned from do_convert() on the master */
00e99ccd 4610 switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
e7fd4179
DT
4611 case -EAGAIN:
4612 /* convert would block (be queued) on remote master */
4613 queue_cast(r, lkb, -EAGAIN);
4614 break;
4615
c85d65e9
DT
4616 case -EDEADLK:
4617 receive_flags_reply(lkb, ms);
4618 revert_lock_pc(r, lkb);
4619 queue_cast(r, lkb, -EDEADLK);
4620 break;
4621
e7fd4179
DT
4622 case -EINPROGRESS:
4623 /* convert was queued on remote master */
7d3c1feb
DT
4624 receive_flags_reply(lkb, ms);
4625 if (is_demoted(lkb))
2a7ce0ed 4626 munge_demoted(lkb);
e7fd4179
DT
4627 del_lkb(r, lkb);
4628 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3ae1acf9 4629 add_timeout(lkb);
e7fd4179
DT
4630 break;
4631
4632 case 0:
4633 /* convert was granted on remote master */
4634 receive_flags_reply(lkb, ms);
7d3c1feb 4635 if (is_demoted(lkb))
2a7ce0ed 4636 munge_demoted(lkb);
e7fd4179
DT
4637 grant_lock_pc(r, lkb, ms);
4638 queue_cast(r, lkb, 0);
4639 break;
4640
4641 default:
6d40c4a7 4642 log_error(r->res_ls, "receive_convert_reply %x remote %d %x %d",
3428785a 4643 lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid),
00e99ccd
AA
4644 le32_to_cpu(ms->m_lkid),
4645 from_dlm_errno(le32_to_cpu(ms->m_result)));
6d40c4a7
DT
4646 dlm_print_rsb(r);
4647 dlm_print_lkb(lkb);
e7fd4179
DT
4648 }
4649}
4650
4651static void _receive_convert_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4652{
4653 struct dlm_rsb *r = lkb->lkb_resource;
ef0c2bb0 4654 int error;
e7fd4179
DT
4655
4656 hold_rsb(r);
4657 lock_rsb(r);
4658
c54e04b0
DT
4659 error = validate_message(lkb, ms);
4660 if (error)
4661 goto out;
4662
ef0c2bb0
DT
4663 /* stub reply can happen with waiters_mutex held */
4664 error = remove_from_waiters_ms(lkb, ms);
4665 if (error)
4666 goto out;
e7fd4179 4667
ef0c2bb0
DT
4668 __receive_convert_reply(r, lkb, ms);
4669 out:
e7fd4179
DT
4670 unlock_rsb(r);
4671 put_rsb(r);
4672}
4673
6d40c4a7 4674static int receive_convert_reply(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4675{
4676 struct dlm_lkb *lkb;
4677 int error;
4678
00e99ccd 4679 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4680 if (error)
4681 return error;
e7fd4179 4682
e7fd4179 4683 _receive_convert_reply(lkb, ms);
b3f58d8f 4684 dlm_put_lkb(lkb);
6d40c4a7 4685 return 0;
e7fd4179
DT
4686}
4687
4688static void _receive_unlock_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4689{
4690 struct dlm_rsb *r = lkb->lkb_resource;
ef0c2bb0 4691 int error;
e7fd4179
DT
4692
4693 hold_rsb(r);
4694 lock_rsb(r);
4695
c54e04b0
DT
4696 error = validate_message(lkb, ms);
4697 if (error)
4698 goto out;
4699
ef0c2bb0
DT
4700 /* stub reply can happen with waiters_mutex held */
4701 error = remove_from_waiters_ms(lkb, ms);
4702 if (error)
4703 goto out;
4704
e7fd4179
DT
4705 /* this is the value returned from do_unlock() on the master */
4706
00e99ccd 4707 switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
e7fd4179
DT
4708 case -DLM_EUNLOCK:
4709 receive_flags_reply(lkb, ms);
4710 remove_lock_pc(r, lkb);
4711 queue_cast(r, lkb, -DLM_EUNLOCK);
4712 break;
ef0c2bb0
DT
4713 case -ENOENT:
4714 break;
e7fd4179 4715 default:
ef0c2bb0 4716 log_error(r->res_ls, "receive_unlock_reply %x error %d",
00e99ccd 4717 lkb->lkb_id, from_dlm_errno(le32_to_cpu(ms->m_result)));
e7fd4179 4718 }
ef0c2bb0 4719 out:
e7fd4179
DT
4720 unlock_rsb(r);
4721 put_rsb(r);
4722}
4723
6d40c4a7 4724static int receive_unlock_reply(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4725{
4726 struct dlm_lkb *lkb;
4727 int error;
4728
00e99ccd 4729 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4730 if (error)
4731 return error;
e7fd4179 4732
e7fd4179 4733 _receive_unlock_reply(lkb, ms);
b3f58d8f 4734 dlm_put_lkb(lkb);
6d40c4a7 4735 return 0;
e7fd4179
DT
4736}
4737
4738static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
4739{
4740 struct dlm_rsb *r = lkb->lkb_resource;
ef0c2bb0 4741 int error;
e7fd4179
DT
4742
4743 hold_rsb(r);
4744 lock_rsb(r);
4745
c54e04b0
DT
4746 error = validate_message(lkb, ms);
4747 if (error)
4748 goto out;
4749
ef0c2bb0
DT
4750 /* stub reply can happen with waiters_mutex held */
4751 error = remove_from_waiters_ms(lkb, ms);
4752 if (error)
4753 goto out;
4754
e7fd4179
DT
4755 /* this is the value returned from do_cancel() on the master */
4756
00e99ccd 4757 switch (from_dlm_errno(le32_to_cpu(ms->m_result))) {
e7fd4179
DT
4758 case -DLM_ECANCEL:
4759 receive_flags_reply(lkb, ms);
4760 revert_lock_pc(r, lkb);
84d8cd69 4761 queue_cast(r, lkb, -DLM_ECANCEL);
ef0c2bb0
DT
4762 break;
4763 case 0:
e7fd4179
DT
4764 break;
4765 default:
ef0c2bb0 4766 log_error(r->res_ls, "receive_cancel_reply %x error %d",
00e99ccd
AA
4767 lkb->lkb_id,
4768 from_dlm_errno(le32_to_cpu(ms->m_result)));
e7fd4179 4769 }
ef0c2bb0 4770 out:
e7fd4179
DT
4771 unlock_rsb(r);
4772 put_rsb(r);
4773}
4774
6d40c4a7 4775static int receive_cancel_reply(struct dlm_ls *ls, struct dlm_message *ms)
e7fd4179
DT
4776{
4777 struct dlm_lkb *lkb;
4778 int error;
4779
00e99ccd 4780 error = find_lkb(ls, le32_to_cpu(ms->m_remid), &lkb);
6d40c4a7
DT
4781 if (error)
4782 return error;
e7fd4179 4783
e7fd4179 4784 _receive_cancel_reply(lkb, ms);
b3f58d8f 4785 dlm_put_lkb(lkb);
6d40c4a7 4786 return 0;
e7fd4179
DT
4787}
4788
4789static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms)
4790{
4791 struct dlm_lkb *lkb;
4792 struct dlm_rsb *r;
4793 int error, ret_nodeid;
c04fecb4 4794 int do_lookup_list = 0;
e7fd4179 4795
00e99ccd 4796 error = find_lkb(ls, le32_to_cpu(ms->m_lkid), &lkb);
e7fd4179 4797 if (error) {
00e99ccd
AA
4798 log_error(ls, "%s no lkid %x", __func__,
4799 le32_to_cpu(ms->m_lkid));
e7fd4179
DT
4800 return;
4801 }
4802
c04fecb4 4803 /* ms->m_result is the value returned by dlm_master_lookup on dir node
e7fd4179 4804 FIXME: will a non-zero error ever be returned? */
e7fd4179
DT
4805
4806 r = lkb->lkb_resource;
4807 hold_rsb(r);
4808 lock_rsb(r);
4809
ef0c2bb0
DT
4810 error = remove_from_waiters(lkb, DLM_MSG_LOOKUP_REPLY);
4811 if (error)
4812 goto out;
4813
00e99ccd 4814 ret_nodeid = le32_to_cpu(ms->m_nodeid);
c04fecb4
DT
4815
4816 /* We sometimes receive a request from the dir node for this
4817 rsb before we've received the dir node's loookup_reply for it.
4818 The request from the dir node implies we're the master, so we set
4819 ourself as master in receive_request_reply, and verify here that
4820 we are indeed the master. */
4821
4822 if (r->res_master_nodeid && (r->res_master_nodeid != ret_nodeid)) {
4823 /* This should never happen */
4824 log_error(ls, "receive_lookup_reply %x from %d ret %d "
4825 "master %d dir %d our %d first %x %s",
3428785a
AA
4826 lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid),
4827 ret_nodeid, r->res_master_nodeid, r->res_dir_nodeid,
c04fecb4
DT
4828 dlm_our_nodeid(), r->res_first_lkid, r->res_name);
4829 }
4830
e7fd4179 4831 if (ret_nodeid == dlm_our_nodeid()) {
c04fecb4 4832 r->res_master_nodeid = ret_nodeid;
e7fd4179 4833 r->res_nodeid = 0;
c04fecb4 4834 do_lookup_list = 1;
e7fd4179 4835 r->res_first_lkid = 0;
c04fecb4
DT
4836 } else if (ret_nodeid == -1) {
4837 /* the remote node doesn't believe it's the dir node */
4838 log_error(ls, "receive_lookup_reply %x from %d bad ret_nodeid",
3428785a 4839 lkb->lkb_id, le32_to_cpu(ms->m_header.h_nodeid));
c04fecb4
DT
4840 r->res_master_nodeid = 0;
4841 r->res_nodeid = -1;
4842 lkb->lkb_nodeid = -1;
e7fd4179 4843 } else {
c04fecb4
DT
4844 /* set_master() will set lkb_nodeid from r */
4845 r->res_master_nodeid = ret_nodeid;
e7fd4179
DT
4846 r->res_nodeid = ret_nodeid;
4847 }
4848
ef0c2bb0
DT
4849 if (is_overlap(lkb)) {
4850 log_debug(ls, "receive_lookup_reply %x unlock %x",
4851 lkb->lkb_id, lkb->lkb_flags);
4852 queue_cast_overlap(r, lkb);
4853 unhold_lkb(lkb); /* undoes create_lkb() */
4854 goto out_list;
4855 }
4856
e7fd4179
DT
4857 _request_lock(r, lkb);
4858
ef0c2bb0 4859 out_list:
c04fecb4 4860 if (do_lookup_list)
e7fd4179 4861 process_lookup_list(r);
ef0c2bb0 4862 out:
e7fd4179
DT
4863 unlock_rsb(r);
4864 put_rsb(r);
b3f58d8f 4865 dlm_put_lkb(lkb);
e7fd4179
DT
4866}
4867
6d40c4a7
DT
4868static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms,
4869 uint32_t saved_seq)
e7fd4179 4870{
6d40c4a7
DT
4871 int error = 0, noent = 0;
4872
3428785a 4873 if (!dlm_is_member(ls, le32_to_cpu(ms->m_header.h_nodeid))) {
c04fecb4 4874 log_limit(ls, "receive %d from non-member %d %x %x %d",
00e99ccd
AA
4875 le32_to_cpu(ms->m_type),
4876 le32_to_cpu(ms->m_header.h_nodeid),
4877 le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid),
4878 from_dlm_errno(le32_to_cpu(ms->m_result)));
46b43eed
DT
4879 return;
4880 }
4881
e7fd4179
DT
4882 switch (ms->m_type) {
4883
4884 /* messages sent to a master node */
4885
00e99ccd 4886 case cpu_to_le32(DLM_MSG_REQUEST):
6d40c4a7 4887 error = receive_request(ls, ms);
e7fd4179
DT
4888 break;
4889
00e99ccd 4890 case cpu_to_le32(DLM_MSG_CONVERT):
6d40c4a7 4891 error = receive_convert(ls, ms);
e7fd4179
DT
4892 break;
4893
00e99ccd 4894 case cpu_to_le32(DLM_MSG_UNLOCK):
6d40c4a7 4895 error = receive_unlock(ls, ms);
e7fd4179
DT
4896 break;
4897
00e99ccd 4898 case cpu_to_le32(DLM_MSG_CANCEL):
6d40c4a7
DT
4899 noent = 1;
4900 error = receive_cancel(ls, ms);
e7fd4179
DT
4901 break;
4902
4903 /* messages sent from a master node (replies to above) */
4904
00e99ccd 4905 case cpu_to_le32(DLM_MSG_REQUEST_REPLY):
6d40c4a7 4906 error = receive_request_reply(ls, ms);
e7fd4179
DT
4907 break;
4908
00e99ccd 4909 case cpu_to_le32(DLM_MSG_CONVERT_REPLY):
6d40c4a7 4910 error = receive_convert_reply(ls, ms);
e7fd4179
DT
4911 break;
4912
00e99ccd 4913 case cpu_to_le32(DLM_MSG_UNLOCK_REPLY):
6d40c4a7 4914 error = receive_unlock_reply(ls, ms);
e7fd4179
DT
4915 break;
4916
00e99ccd 4917 case cpu_to_le32(DLM_MSG_CANCEL_REPLY):
6d40c4a7 4918 error = receive_cancel_reply(ls, ms);
e7fd4179
DT
4919 break;
4920
4921 /* messages sent from a master node (only two types of async msg) */
4922
00e99ccd 4923 case cpu_to_le32(DLM_MSG_GRANT):
6d40c4a7
DT
4924 noent = 1;
4925 error = receive_grant(ls, ms);
e7fd4179
DT
4926 break;
4927
00e99ccd 4928 case cpu_to_le32(DLM_MSG_BAST):
6d40c4a7
DT
4929 noent = 1;
4930 error = receive_bast(ls, ms);
e7fd4179
DT
4931 break;
4932
4933 /* messages sent to a dir node */
4934
00e99ccd 4935 case cpu_to_le32(DLM_MSG_LOOKUP):
e7fd4179
DT
4936 receive_lookup(ls, ms);
4937 break;
4938
00e99ccd 4939 case cpu_to_le32(DLM_MSG_REMOVE):
e7fd4179
DT
4940 receive_remove(ls, ms);
4941 break;
4942
4943 /* messages sent from a dir node (remove has no reply) */
4944
00e99ccd 4945 case cpu_to_le32(DLM_MSG_LOOKUP_REPLY):
e7fd4179
DT
4946 receive_lookup_reply(ls, ms);
4947 break;
4948
8499137d
DT
4949 /* other messages */
4950
00e99ccd 4951 case cpu_to_le32(DLM_MSG_PURGE):
8499137d
DT
4952 receive_purge(ls, ms);
4953 break;
4954
e7fd4179 4955 default:
00e99ccd
AA
4956 log_error(ls, "unknown message type %d",
4957 le32_to_cpu(ms->m_type));
e7fd4179 4958 }
6d40c4a7
DT
4959
4960 /*
4961 * When checking for ENOENT, we're checking the result of
4962 * find_lkb(m_remid):
4963 *
4964 * The lock id referenced in the message wasn't found. This may
4965 * happen in normal usage for the async messages and cancel, so
4966 * only use log_debug for them.
4967 *
4875647a 4968 * Some errors are expected and normal.
6d40c4a7
DT
4969 */
4970
4971 if (error == -ENOENT && noent) {
4875647a 4972 log_debug(ls, "receive %d no %x remote %d %x saved_seq %u",
00e99ccd 4973 le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid),
3428785a 4974 le32_to_cpu(ms->m_header.h_nodeid),
00e99ccd 4975 le32_to_cpu(ms->m_lkid), saved_seq);
6d40c4a7 4976 } else if (error == -ENOENT) {
4875647a 4977 log_error(ls, "receive %d no %x remote %d %x saved_seq %u",
00e99ccd 4978 le32_to_cpu(ms->m_type), le32_to_cpu(ms->m_remid),
3428785a 4979 le32_to_cpu(ms->m_header.h_nodeid),
00e99ccd 4980 le32_to_cpu(ms->m_lkid), saved_seq);
6d40c4a7 4981
00e99ccd
AA
4982 if (ms->m_type == cpu_to_le32(DLM_MSG_CONVERT))
4983 dlm_dump_rsb_hash(ls, le32_to_cpu(ms->m_hash));
6d40c4a7 4984 }
4875647a
DT
4985
4986 if (error == -EINVAL) {
4987 log_error(ls, "receive %d inval from %d lkid %x remid %x "
4988 "saved_seq %u",
00e99ccd
AA
4989 le32_to_cpu(ms->m_type),
4990 le32_to_cpu(ms->m_header.h_nodeid),
4991 le32_to_cpu(ms->m_lkid), le32_to_cpu(ms->m_remid),
4992 saved_seq);
4875647a 4993 }
e7fd4179
DT
4994}
4995
c36258b5
DT
4996/* If the lockspace is in recovery mode (locking stopped), then normal
4997 messages are saved on the requestqueue for processing after recovery is
4998 done. When not in recovery mode, we wait for dlm_recoverd to drain saved
4999 messages off the requestqueue before we process new ones. This occurs right
5000 after recovery completes when we transition from saving all messages on
5001 requestqueue, to processing all the saved messages, to processing new
5002 messages as they arrive. */
e7fd4179 5003
c36258b5
DT
5004static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms,
5005 int nodeid)
5006{
5007 if (dlm_locking_stopped(ls)) {
c04fecb4
DT
5008 /* If we were a member of this lockspace, left, and rejoined,
5009 other nodes may still be sending us messages from the
5010 lockspace generation before we left. */
5011 if (!ls->ls_generation) {
5012 log_limit(ls, "receive %d from %d ignore old gen",
00e99ccd 5013 le32_to_cpu(ms->m_type), nodeid);
c04fecb4
DT
5014 return;
5015 }
5016
8b0d8e03 5017 dlm_add_requestqueue(ls, nodeid, ms);
c36258b5
DT
5018 } else {
5019 dlm_wait_requestqueue(ls);
6d40c4a7 5020 _receive_message(ls, ms, 0);
c36258b5
DT
5021 }
5022}
5023
5024/* This is called by dlm_recoverd to process messages that were saved on
5025 the requestqueue. */
5026
6d40c4a7
DT
5027void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms,
5028 uint32_t saved_seq)
c36258b5 5029{
6d40c4a7 5030 _receive_message(ls, ms, saved_seq);
c36258b5
DT
5031}
5032
5033/* This is called by the midcomms layer when something is received for
5034 the lockspace. It could be either a MSG (normal message sent as part of
5035 standard locking activity) or an RCOM (recovery message sent as part of
5036 lockspace recovery). */
5037
eef7d739 5038void dlm_receive_buffer(union dlm_packet *p, int nodeid)
c36258b5 5039{
eef7d739 5040 struct dlm_header *hd = &p->header;
c36258b5
DT
5041 struct dlm_ls *ls;
5042 int type = 0;
5043
5044 switch (hd->h_cmd) {
5045 case DLM_MSG:
00e99ccd 5046 type = le32_to_cpu(p->message.m_type);
c36258b5
DT
5047 break;
5048 case DLM_RCOM:
2f9dbeda 5049 type = le32_to_cpu(p->rcom.rc_type);
c36258b5
DT
5050 break;
5051 default:
5052 log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid);
5053 return;
5054 }
5055
3428785a 5056 if (le32_to_cpu(hd->h_nodeid) != nodeid) {
c36258b5 5057 log_print("invalid h_nodeid %d from %d lockspace %x",
3428785a
AA
5058 le32_to_cpu(hd->h_nodeid), nodeid,
5059 le32_to_cpu(hd->u.h_lockspace));
c36258b5
DT
5060 return;
5061 }
5062
3428785a 5063 ls = dlm_find_lockspace_global(le32_to_cpu(hd->u.h_lockspace));
c36258b5 5064 if (!ls) {
4875647a
DT
5065 if (dlm_config.ci_log_debug) {
5066 printk_ratelimited(KERN_DEBUG "dlm: invalid lockspace "
5067 "%u from %d cmd %d type %d\n",
3428785a
AA
5068 le32_to_cpu(hd->u.h_lockspace), nodeid,
5069 hd->h_cmd, type);
4875647a 5070 }
c36258b5
DT
5071
5072 if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS)
eef7d739 5073 dlm_send_ls_not_ready(nodeid, &p->rcom);
c36258b5
DT
5074 return;
5075 }
5076
5077 /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to
5078 be inactive (in this ls) before transitioning to recovery mode */
5079
5080 down_read(&ls->ls_recv_active);
5081 if (hd->h_cmd == DLM_MSG)
eef7d739 5082 dlm_receive_message(ls, &p->message, nodeid);
c36258b5 5083 else
eef7d739 5084 dlm_receive_rcom(ls, &p->rcom, nodeid);
c36258b5
DT
5085 up_read(&ls->ls_recv_active);
5086
5087 dlm_put_lockspace(ls);
5088}
e7fd4179 5089
2a7ce0ed
DT
5090static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
5091 struct dlm_message *ms_stub)
e7fd4179
DT
5092{
5093 if (middle_conversion(lkb)) {
5094 hold_lkb(lkb);
2a7ce0ed 5095 memset(ms_stub, 0, sizeof(struct dlm_message));
00e99ccd
AA
5096 ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
5097 ms_stub->m_type = cpu_to_le32(DLM_MSG_CONVERT_REPLY);
5098 ms_stub->m_result = cpu_to_le32(to_dlm_errno(-EINPROGRESS));
3428785a 5099 ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
2a7ce0ed 5100 _receive_convert_reply(lkb, ms_stub);
e7fd4179
DT
5101
5102 /* Same special case as in receive_rcom_lock_args() */
5103 lkb->lkb_grmode = DLM_LOCK_IV;
5104 rsb_set_flag(lkb->lkb_resource, RSB_RECOVER_CONVERT);
5105 unhold_lkb(lkb);
5106
5107 } else if (lkb->lkb_rqmode >= lkb->lkb_grmode) {
5108 lkb->lkb_flags |= DLM_IFL_RESEND;
5109 }
5110
5111 /* lkb->lkb_rqmode < lkb->lkb_grmode shouldn't happen since down
5112 conversions are async; there's no reply from the remote master */
5113}
5114
5115/* A waiting lkb needs recovery if the master node has failed, or
5116 the master node is changing (only when no directory is used) */
5117
13ef1111
DT
5118static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb,
5119 int dir_nodeid)
e7fd4179 5120{
4875647a 5121 if (dlm_no_directory(ls))
13ef1111
DT
5122 return 1;
5123
4875647a 5124 if (dlm_is_removed(ls, lkb->lkb_wait_nodeid))
e7fd4179
DT
5125 return 1;
5126
5127 return 0;
5128}
5129
5130/* Recovery for locks that are waiting for replies from nodes that are now
5131 gone. We can just complete unlocks and cancels by faking a reply from the
5132 dead node. Requests and up-conversions we flag to be resent after
5133 recovery. Down-conversions can just be completed with a fake reply like
5134 unlocks. Conversions between PR and CW need special attention. */
5135
5136void dlm_recover_waiters_pre(struct dlm_ls *ls)
5137{
5138 struct dlm_lkb *lkb, *safe;
2a7ce0ed 5139 struct dlm_message *ms_stub;
601342ce 5140 int wait_type, stub_unlock_result, stub_cancel_result;
13ef1111 5141 int dir_nodeid;
e7fd4179 5142
102e67d4 5143 ms_stub = kmalloc(sizeof(*ms_stub), GFP_KERNEL);
0d37eca7 5144 if (!ms_stub)
2a7ce0ed 5145 return;
2a7ce0ed 5146
90135925 5147 mutex_lock(&ls->ls_waiters_mutex);
e7fd4179
DT
5148
5149 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
2a7ce0ed 5150
13ef1111
DT
5151 dir_nodeid = dlm_dir_nodeid(lkb->lkb_resource);
5152
2a7ce0ed
DT
5153 /* exclude debug messages about unlocks because there can be so
5154 many and they aren't very interesting */
5155
5156 if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
13ef1111
DT
5157 log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d "
5158 "lkb_nodeid %d wait_nodeid %d dir_nodeid %d",
5159 lkb->lkb_id,
5160 lkb->lkb_remid,
5161 lkb->lkb_wait_type,
5162 lkb->lkb_resource->res_nodeid,
5163 lkb->lkb_nodeid,
5164 lkb->lkb_wait_nodeid,
5165 dir_nodeid);
2a7ce0ed 5166 }
e7fd4179
DT
5167
5168 /* all outstanding lookups, regardless of destination will be
5169 resent after recovery is done */
5170
5171 if (lkb->lkb_wait_type == DLM_MSG_LOOKUP) {
5172 lkb->lkb_flags |= DLM_IFL_RESEND;
5173 continue;
5174 }
5175
13ef1111 5176 if (!waiter_needs_recovery(ls, lkb, dir_nodeid))
e7fd4179
DT
5177 continue;
5178
601342ce
DT
5179 wait_type = lkb->lkb_wait_type;
5180 stub_unlock_result = -DLM_EUNLOCK;
5181 stub_cancel_result = -DLM_ECANCEL;
5182
5183 /* Main reply may have been received leaving a zero wait_type,
5184 but a reply for the overlapping op may not have been
5185 received. In that case we need to fake the appropriate
5186 reply for the overlap op. */
5187
5188 if (!wait_type) {
5189 if (is_overlap_cancel(lkb)) {
5190 wait_type = DLM_MSG_CANCEL;
5191 if (lkb->lkb_grmode == DLM_LOCK_IV)
5192 stub_cancel_result = 0;
5193 }
5194 if (is_overlap_unlock(lkb)) {
5195 wait_type = DLM_MSG_UNLOCK;
5196 if (lkb->lkb_grmode == DLM_LOCK_IV)
5197 stub_unlock_result = -ENOENT;
5198 }
5199
5200 log_debug(ls, "rwpre overlap %x %x %d %d %d",
5201 lkb->lkb_id, lkb->lkb_flags, wait_type,
5202 stub_cancel_result, stub_unlock_result);
5203 }
5204
5205 switch (wait_type) {
e7fd4179
DT
5206
5207 case DLM_MSG_REQUEST:
5208 lkb->lkb_flags |= DLM_IFL_RESEND;
5209 break;
5210
5211 case DLM_MSG_CONVERT:
2a7ce0ed 5212 recover_convert_waiter(ls, lkb, ms_stub);
e7fd4179
DT
5213 break;
5214
5215 case DLM_MSG_UNLOCK:
5216 hold_lkb(lkb);
2a7ce0ed 5217 memset(ms_stub, 0, sizeof(struct dlm_message));
00e99ccd
AA
5218 ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
5219 ms_stub->m_type = cpu_to_le32(DLM_MSG_UNLOCK_REPLY);
5220 ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_unlock_result));
3428785a 5221 ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
2a7ce0ed 5222 _receive_unlock_reply(lkb, ms_stub);
b3f58d8f 5223 dlm_put_lkb(lkb);
e7fd4179
DT
5224 break;
5225
5226 case DLM_MSG_CANCEL:
5227 hold_lkb(lkb);
2a7ce0ed 5228 memset(ms_stub, 0, sizeof(struct dlm_message));
00e99ccd
AA
5229 ms_stub->m_flags = cpu_to_le32(DLM_IFL_STUB_MS);
5230 ms_stub->m_type = cpu_to_le32(DLM_MSG_CANCEL_REPLY);
5231 ms_stub->m_result = cpu_to_le32(to_dlm_errno(stub_cancel_result));
3428785a 5232 ms_stub->m_header.h_nodeid = cpu_to_le32(lkb->lkb_nodeid);
2a7ce0ed 5233 _receive_cancel_reply(lkb, ms_stub);
b3f58d8f 5234 dlm_put_lkb(lkb);
e7fd4179
DT
5235 break;
5236
5237 default:
601342ce
DT
5238 log_error(ls, "invalid lkb wait_type %d %d",
5239 lkb->lkb_wait_type, wait_type);
e7fd4179 5240 }
81456807 5241 schedule();
e7fd4179 5242 }
90135925 5243 mutex_unlock(&ls->ls_waiters_mutex);
2a7ce0ed 5244 kfree(ms_stub);
e7fd4179
DT
5245}
5246
ef0c2bb0 5247static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
e7fd4179 5248{
dc1acd5c 5249 struct dlm_lkb *lkb = NULL, *iter;
e7fd4179 5250
90135925 5251 mutex_lock(&ls->ls_waiters_mutex);
dc1acd5c
JK
5252 list_for_each_entry(iter, &ls->ls_waiters, lkb_wait_reply) {
5253 if (iter->lkb_flags & DLM_IFL_RESEND) {
5254 hold_lkb(iter);
5255 lkb = iter;
e7fd4179
DT
5256 break;
5257 }
5258 }
90135925 5259 mutex_unlock(&ls->ls_waiters_mutex);
e7fd4179 5260
ef0c2bb0 5261 return lkb;
e7fd4179
DT
5262}
5263
5264/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
5265 master or dir-node for r. Processing the lkb may result in it being placed
5266 back on waiters. */
5267
ef0c2bb0
DT
5268/* We do this after normal locking has been enabled and any saved messages
5269 (in requestqueue) have been processed. We should be confident that at
5270 this point we won't get or process a reply to any of these waiting
5271 operations. But, new ops may be coming in on the rsbs/locks here from
5272 userspace or remotely. */
5273
5274/* there may have been an overlap unlock/cancel prior to recovery or after
5275 recovery. if before, the lkb may still have a pos wait_count; if after, the
5276 overlap flag would just have been set and nothing new sent. we can be
5277 confident here than any replies to either the initial op or overlap ops
5278 prior to recovery have been received. */
5279
e7fd4179
DT
5280int dlm_recover_waiters_post(struct dlm_ls *ls)
5281{
5282 struct dlm_lkb *lkb;
5283 struct dlm_rsb *r;
ef0c2bb0 5284 int error = 0, mstype, err, oc, ou;
e7fd4179
DT
5285
5286 while (1) {
5287 if (dlm_locking_stopped(ls)) {
5288 log_debug(ls, "recover_waiters_post aborted");
5289 error = -EINTR;
5290 break;
5291 }
5292
ef0c2bb0
DT
5293 lkb = find_resend_waiter(ls);
5294 if (!lkb)
e7fd4179
DT
5295 break;
5296
5297 r = lkb->lkb_resource;
ef0c2bb0
DT
5298 hold_rsb(r);
5299 lock_rsb(r);
5300
5301 mstype = lkb->lkb_wait_type;
5302 oc = is_overlap_cancel(lkb);
5303 ou = is_overlap_unlock(lkb);
5304 err = 0;
e7fd4179 5305
13ef1111
DT
5306 log_debug(ls, "waiter %x remote %x msg %d r_nodeid %d "
5307 "lkb_nodeid %d wait_nodeid %d dir_nodeid %d "
5308 "overlap %d %d", lkb->lkb_id, lkb->lkb_remid, mstype,
5309 r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid,
5310 dlm_dir_nodeid(r), oc, ou);
e7fd4179 5311
ef0c2bb0
DT
5312 /* At this point we assume that we won't get a reply to any
5313 previous op or overlap op on this lock. First, do a big
5314 remove_from_waiters() for all previous ops. */
5315
5316 lkb->lkb_flags &= ~DLM_IFL_RESEND;
5317 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_UNLOCK;
5318 lkb->lkb_flags &= ~DLM_IFL_OVERLAP_CANCEL;
5319 lkb->lkb_wait_type = 0;
1689c169
AA
5320 /* drop all wait_count references we still
5321 * hold a reference for this iteration.
5322 */
5323 while (lkb->lkb_wait_count) {
5324 lkb->lkb_wait_count--;
5325 unhold_lkb(lkb);
5326 }
ef0c2bb0
DT
5327 mutex_lock(&ls->ls_waiters_mutex);
5328 list_del_init(&lkb->lkb_wait_reply);
5329 mutex_unlock(&ls->ls_waiters_mutex);
ef0c2bb0
DT
5330
5331 if (oc || ou) {
5332 /* do an unlock or cancel instead of resending */
5333 switch (mstype) {
5334 case DLM_MSG_LOOKUP:
5335 case DLM_MSG_REQUEST:
5336 queue_cast(r, lkb, ou ? -DLM_EUNLOCK :
5337 -DLM_ECANCEL);
5338 unhold_lkb(lkb); /* undoes create_lkb() */
5339 break;
5340 case DLM_MSG_CONVERT:
5341 if (oc) {
5342 queue_cast(r, lkb, -DLM_ECANCEL);
5343 } else {
5344 lkb->lkb_exflags |= DLM_LKF_FORCEUNLOCK;
5345 _unlock_lock(r, lkb);
5346 }
5347 break;
5348 default:
5349 err = 1;
5350 }
5351 } else {
5352 switch (mstype) {
5353 case DLM_MSG_LOOKUP:
5354 case DLM_MSG_REQUEST:
5355 _request_lock(r, lkb);
5356 if (is_master(r))
5357 confirm_master(r, 0);
5358 break;
5359 case DLM_MSG_CONVERT:
5360 _convert_lock(r, lkb);
5361 break;
5362 default:
5363 err = 1;
5364 }
e7fd4179 5365 }
ef0c2bb0 5366
13ef1111
DT
5367 if (err) {
5368 log_error(ls, "waiter %x msg %d r_nodeid %d "
5369 "dir_nodeid %d overlap %d %d",
5370 lkb->lkb_id, mstype, r->res_nodeid,
5371 dlm_dir_nodeid(r), oc, ou);
5372 }
ef0c2bb0
DT
5373 unlock_rsb(r);
5374 put_rsb(r);
5375 dlm_put_lkb(lkb);
e7fd4179
DT
5376 }
5377
5378 return error;
5379}
5380
4875647a
DT
5381static void purge_mstcpy_list(struct dlm_ls *ls, struct dlm_rsb *r,
5382 struct list_head *list)
e7fd4179 5383{
e7fd4179
DT
5384 struct dlm_lkb *lkb, *safe;
5385
4875647a
DT
5386 list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
5387 if (!is_master_copy(lkb))
5388 continue;
5389
5390 /* don't purge lkbs we've added in recover_master_copy for
5391 the current recovery seq */
5392
5393 if (lkb->lkb_recover_seq == ls->ls_recover_seq)
5394 continue;
5395
5396 del_lkb(r, lkb);
5397
5398 /* this put should free the lkb */
5399 if (!dlm_put_lkb(lkb))
5400 log_error(ls, "purged mstcpy lkb not released");
e7fd4179
DT
5401 }
5402}
5403
4875647a 5404void dlm_purge_mstcpy_locks(struct dlm_rsb *r)
e7fd4179 5405{
4875647a 5406 struct dlm_ls *ls = r->res_ls;
e7fd4179 5407
4875647a
DT
5408 purge_mstcpy_list(ls, r, &r->res_grantqueue);
5409 purge_mstcpy_list(ls, r, &r->res_convertqueue);
5410 purge_mstcpy_list(ls, r, &r->res_waitqueue);
e7fd4179
DT
5411}
5412
4875647a
DT
5413static void purge_dead_list(struct dlm_ls *ls, struct dlm_rsb *r,
5414 struct list_head *list,
5415 int nodeid_gone, unsigned int *count)
e7fd4179 5416{
4875647a 5417 struct dlm_lkb *lkb, *safe;
e7fd4179 5418
4875647a
DT
5419 list_for_each_entry_safe(lkb, safe, list, lkb_statequeue) {
5420 if (!is_master_copy(lkb))
5421 continue;
5422
5423 if ((lkb->lkb_nodeid == nodeid_gone) ||
5424 dlm_is_removed(ls, lkb->lkb_nodeid)) {
5425
da8c6663
DT
5426 /* tell recover_lvb to invalidate the lvb
5427 because a node holding EX/PW failed */
5428 if ((lkb->lkb_exflags & DLM_LKF_VALBLK) &&
5429 (lkb->lkb_grmode >= DLM_LOCK_PW)) {
5430 rsb_set_flag(r, RSB_RECOVER_LVB_INVAL);
5431 }
5432
4875647a
DT
5433 del_lkb(r, lkb);
5434
5435 /* this put should free the lkb */
5436 if (!dlm_put_lkb(lkb))
5437 log_error(ls, "purged dead lkb not released");
5438
5439 rsb_set_flag(r, RSB_RECOVER_GRANT);
5440
5441 (*count)++;
5442 }
5443 }
e7fd4179
DT
5444}
5445
5446/* Get rid of locks held by nodes that are gone. */
5447
4875647a 5448void dlm_recover_purge(struct dlm_ls *ls)
e7fd4179
DT
5449{
5450 struct dlm_rsb *r;
4875647a
DT
5451 struct dlm_member *memb;
5452 int nodes_count = 0;
5453 int nodeid_gone = 0;
5454 unsigned int lkb_count = 0;
5455
5456 /* cache one removed nodeid to optimize the common
5457 case of a single node removed */
5458
5459 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
5460 nodes_count++;
5461 nodeid_gone = memb->nodeid;
5462 }
e7fd4179 5463
4875647a
DT
5464 if (!nodes_count)
5465 return;
e7fd4179
DT
5466
5467 down_write(&ls->ls_root_sem);
5468 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
5469 hold_rsb(r);
5470 lock_rsb(r);
4875647a
DT
5471 if (is_master(r)) {
5472 purge_dead_list(ls, r, &r->res_grantqueue,
5473 nodeid_gone, &lkb_count);
5474 purge_dead_list(ls, r, &r->res_convertqueue,
5475 nodeid_gone, &lkb_count);
5476 purge_dead_list(ls, r, &r->res_waitqueue,
5477 nodeid_gone, &lkb_count);
5478 }
e7fd4179
DT
5479 unlock_rsb(r);
5480 unhold_rsb(r);
4875647a 5481 cond_resched();
e7fd4179
DT
5482 }
5483 up_write(&ls->ls_root_sem);
5484
4875647a 5485 if (lkb_count)
075f0177 5486 log_rinfo(ls, "dlm_recover_purge %u locks for %u nodes",
4875647a 5487 lkb_count, nodes_count);
e7fd4179
DT
5488}
5489
4875647a 5490static struct dlm_rsb *find_grant_rsb(struct dlm_ls *ls, int bucket)
97a35d1e 5491{
9beb3bf5 5492 struct rb_node *n;
4875647a 5493 struct dlm_rsb *r;
97a35d1e 5494
c7be761a 5495 spin_lock(&ls->ls_rsbtbl[bucket].lock);
9beb3bf5
BP
5496 for (n = rb_first(&ls->ls_rsbtbl[bucket].keep); n; n = rb_next(n)) {
5497 r = rb_entry(n, struct dlm_rsb, res_hashnode);
4875647a
DT
5498
5499 if (!rsb_flag(r, RSB_RECOVER_GRANT))
5500 continue;
c503a621
DT
5501 if (!is_master(r)) {
5502 rsb_clear_flag(r, RSB_RECOVER_GRANT);
97a35d1e 5503 continue;
c503a621 5504 }
97a35d1e 5505 hold_rsb(r);
4875647a
DT
5506 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
5507 return r;
97a35d1e 5508 }
c7be761a 5509 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
4875647a 5510 return NULL;
97a35d1e
DT
5511}
5512
4875647a
DT
5513/*
5514 * Attempt to grant locks on resources that we are the master of.
5515 * Locks may have become grantable during recovery because locks
5516 * from departed nodes have been purged (or not rebuilt), allowing
5517 * previously blocked locks to now be granted. The subset of rsb's
5518 * we are interested in are those with lkb's on either the convert or
5519 * waiting queues.
5520 *
5521 * Simplest would be to go through each master rsb and check for non-empty
5522 * convert or waiting queues, and attempt to grant on those rsbs.
5523 * Checking the queues requires lock_rsb, though, for which we'd need
5524 * to release the rsbtbl lock. This would make iterating through all
5525 * rsb's very inefficient. So, we rely on earlier recovery routines
5526 * to set RECOVER_GRANT on any rsb's that we should attempt to grant
5527 * locks for.
5528 */
5529
5530void dlm_recover_grant(struct dlm_ls *ls)
e7fd4179
DT
5531{
5532 struct dlm_rsb *r;
2b4e926a 5533 int bucket = 0;
4875647a
DT
5534 unsigned int count = 0;
5535 unsigned int rsb_count = 0;
5536 unsigned int lkb_count = 0;
e7fd4179 5537
2b4e926a 5538 while (1) {
4875647a 5539 r = find_grant_rsb(ls, bucket);
2b4e926a
DT
5540 if (!r) {
5541 if (bucket == ls->ls_rsbtbl_size - 1)
5542 break;
5543 bucket++;
97a35d1e 5544 continue;
2b4e926a 5545 }
4875647a
DT
5546 rsb_count++;
5547 count = 0;
97a35d1e 5548 lock_rsb(r);
c503a621 5549 /* the RECOVER_GRANT flag is checked in the grant path */
4875647a 5550 grant_pending_locks(r, &count);
c503a621 5551 rsb_clear_flag(r, RSB_RECOVER_GRANT);
4875647a
DT
5552 lkb_count += count;
5553 confirm_master(r, 0);
97a35d1e
DT
5554 unlock_rsb(r);
5555 put_rsb(r);
4875647a 5556 cond_resched();
e7fd4179 5557 }
4875647a
DT
5558
5559 if (lkb_count)
075f0177 5560 log_rinfo(ls, "dlm_recover_grant %u locks on %u resources",
4875647a 5561 lkb_count, rsb_count);
e7fd4179
DT
5562}
5563
5564static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
5565 uint32_t remid)
5566{
5567 struct dlm_lkb *lkb;
5568
5569 list_for_each_entry(lkb, head, lkb_statequeue) {
5570 if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid)
5571 return lkb;
5572 }
5573 return NULL;
5574}
5575
5576static struct dlm_lkb *search_remid(struct dlm_rsb *r, int nodeid,
5577 uint32_t remid)
5578{
5579 struct dlm_lkb *lkb;
5580
5581 lkb = search_remid_list(&r->res_grantqueue, nodeid, remid);
5582 if (lkb)
5583 return lkb;
5584 lkb = search_remid_list(&r->res_convertqueue, nodeid, remid);
5585 if (lkb)
5586 return lkb;
5587 lkb = search_remid_list(&r->res_waitqueue, nodeid, remid);
5588 if (lkb)
5589 return lkb;
5590 return NULL;
5591}
5592
ae773d0b 5593/* needs at least dlm_rcom + rcom_lock */
e7fd4179
DT
5594static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
5595 struct dlm_rsb *r, struct dlm_rcom *rc)
5596{
5597 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
e7fd4179 5598
3428785a 5599 lkb->lkb_nodeid = le32_to_cpu(rc->rc_header.h_nodeid);
163a1859
AV
5600 lkb->lkb_ownpid = le32_to_cpu(rl->rl_ownpid);
5601 lkb->lkb_remid = le32_to_cpu(rl->rl_lkid);
5602 lkb->lkb_exflags = le32_to_cpu(rl->rl_exflags);
5603 lkb->lkb_flags = le32_to_cpu(rl->rl_flags) & 0x0000FFFF;
e7fd4179 5604 lkb->lkb_flags |= DLM_IFL_MSTCPY;
163a1859 5605 lkb->lkb_lvbseq = le32_to_cpu(rl->rl_lvbseq);
e7fd4179
DT
5606 lkb->lkb_rqmode = rl->rl_rqmode;
5607 lkb->lkb_grmode = rl->rl_grmode;
5608 /* don't set lkb_status because add_lkb wants to itself */
5609
8304d6f2
DT
5610 lkb->lkb_bastfn = (rl->rl_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
5611 lkb->lkb_astfn = (rl->rl_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
e7fd4179 5612
e7fd4179 5613 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3428785a
AA
5614 int lvblen = le16_to_cpu(rc->rc_header.h_length) -
5615 sizeof(struct dlm_rcom) - sizeof(struct rcom_lock);
a5dd0631
AV
5616 if (lvblen > ls->ls_lvblen)
5617 return -EINVAL;
52bda2b5 5618 lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
e7fd4179
DT
5619 if (!lkb->lkb_lvbptr)
5620 return -ENOMEM;
e7fd4179
DT
5621 memcpy(lkb->lkb_lvbptr, rl->rl_lvb, lvblen);
5622 }
5623
5624 /* Conversions between PR and CW (middle modes) need special handling.
5625 The real granted mode of these converting locks cannot be determined
5626 until all locks have been rebuilt on the rsb (recover_conversion) */
5627
163a1859
AV
5628 if (rl->rl_wait_type == cpu_to_le16(DLM_MSG_CONVERT) &&
5629 middle_conversion(lkb)) {
e7fd4179
DT
5630 rl->rl_status = DLM_LKSTS_CONVERT;
5631 lkb->lkb_grmode = DLM_LOCK_IV;
5632 rsb_set_flag(r, RSB_RECOVER_CONVERT);
5633 }
5634
5635 return 0;
5636}
5637
5638/* This lkb may have been recovered in a previous aborted recovery so we need
5639 to check if the rsb already has an lkb with the given remote nodeid/lkid.
5640 If so we just send back a standard reply. If not, we create a new lkb with
5641 the given values and send back our lkid. We send back our lkid by sending
5642 back the rcom_lock struct we got but with the remid field filled in. */
5643
ae773d0b 5644/* needs at least dlm_rcom + rcom_lock */
e7fd4179
DT
5645int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
5646{
5647 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
5648 struct dlm_rsb *r;
5649 struct dlm_lkb *lkb;
6d40c4a7 5650 uint32_t remid = 0;
3428785a 5651 int from_nodeid = le32_to_cpu(rc->rc_header.h_nodeid);
e7fd4179
DT
5652 int error;
5653
5654 if (rl->rl_parent_lkid) {
5655 error = -EOPNOTSUPP;
5656 goto out;
5657 }
5658
6d40c4a7
DT
5659 remid = le32_to_cpu(rl->rl_lkid);
5660
4875647a
DT
5661 /* In general we expect the rsb returned to be R_MASTER, but we don't
5662 have to require it. Recovery of masters on one node can overlap
5663 recovery of locks on another node, so one node can send us MSTCPY
5664 locks before we've made ourselves master of this rsb. We can still
5665 add new MSTCPY locks that we receive here without any harm; when
5666 we make ourselves master, dlm_recover_masters() won't touch the
5667 MSTCPY locks we've received early. */
5668
c04fecb4
DT
5669 error = find_rsb(ls, rl->rl_name, le16_to_cpu(rl->rl_namelen),
5670 from_nodeid, R_RECEIVE_RECOVER, &r);
e7fd4179
DT
5671 if (error)
5672 goto out;
5673
c04fecb4
DT
5674 lock_rsb(r);
5675
4875647a
DT
5676 if (dlm_no_directory(ls) && (dlm_dir_nodeid(r) != dlm_our_nodeid())) {
5677 log_error(ls, "dlm_recover_master_copy remote %d %x not dir",
c04fecb4 5678 from_nodeid, remid);
4875647a 5679 error = -EBADR;
c04fecb4 5680 goto out_unlock;
4875647a
DT
5681 }
5682
c04fecb4 5683 lkb = search_remid(r, from_nodeid, remid);
e7fd4179
DT
5684 if (lkb) {
5685 error = -EEXIST;
5686 goto out_remid;
5687 }
5688
5689 error = create_lkb(ls, &lkb);
5690 if (error)
5691 goto out_unlock;
5692
5693 error = receive_rcom_lock_args(ls, lkb, r, rc);
5694 if (error) {
b3f58d8f 5695 __put_lkb(ls, lkb);
e7fd4179
DT
5696 goto out_unlock;
5697 }
5698
5699 attach_lkb(r, lkb);
5700 add_lkb(r, lkb, rl->rl_status);
4875647a
DT
5701 ls->ls_recover_locks_in++;
5702
5703 if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
5704 rsb_set_flag(r, RSB_RECOVER_GRANT);
e7fd4179
DT
5705
5706 out_remid:
5707 /* this is the new value returned to the lock holder for
5708 saving in its process-copy lkb */
163a1859 5709 rl->rl_remid = cpu_to_le32(lkb->lkb_id);
e7fd4179 5710
4875647a
DT
5711 lkb->lkb_recover_seq = ls->ls_recover_seq;
5712
e7fd4179
DT
5713 out_unlock:
5714 unlock_rsb(r);
5715 put_rsb(r);
5716 out:
6d40c4a7 5717 if (error && error != -EEXIST)
075f0177 5718 log_rinfo(ls, "dlm_recover_master_copy remote %d %x error %d",
c04fecb4 5719 from_nodeid, remid, error);
163a1859 5720 rl->rl_result = cpu_to_le32(error);
e7fd4179
DT
5721 return error;
5722}
5723
ae773d0b 5724/* needs at least dlm_rcom + rcom_lock */
e7fd4179
DT
5725int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
5726{
5727 struct rcom_lock *rl = (struct rcom_lock *) rc->rc_buf;
5728 struct dlm_rsb *r;
5729 struct dlm_lkb *lkb;
6d40c4a7
DT
5730 uint32_t lkid, remid;
5731 int error, result;
5732
5733 lkid = le32_to_cpu(rl->rl_lkid);
5734 remid = le32_to_cpu(rl->rl_remid);
5735 result = le32_to_cpu(rl->rl_result);
e7fd4179 5736
6d40c4a7 5737 error = find_lkb(ls, lkid, &lkb);
e7fd4179 5738 if (error) {
6d40c4a7 5739 log_error(ls, "dlm_recover_process_copy no %x remote %d %x %d",
3428785a
AA
5740 lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
5741 result);
e7fd4179
DT
5742 return error;
5743 }
5744
4875647a
DT
5745 r = lkb->lkb_resource;
5746 hold_rsb(r);
5747 lock_rsb(r);
5748
6d40c4a7
DT
5749 if (!is_process_copy(lkb)) {
5750 log_error(ls, "dlm_recover_process_copy bad %x remote %d %x %d",
3428785a
AA
5751 lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
5752 result);
4875647a
DT
5753 dlm_dump_rsb(r);
5754 unlock_rsb(r);
5755 put_rsb(r);
5756 dlm_put_lkb(lkb);
6d40c4a7
DT
5757 return -EINVAL;
5758 }
e7fd4179 5759
6d40c4a7 5760 switch (result) {
dc200a88
DT
5761 case -EBADR:
5762 /* There's a chance the new master received our lock before
5763 dlm_recover_master_reply(), this wouldn't happen if we did
5764 a barrier between recover_masters and recover_locks. */
6d40c4a7
DT
5765
5766 log_debug(ls, "dlm_recover_process_copy %x remote %d %x %d",
3428785a
AA
5767 lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
5768 result);
6d40c4a7 5769
dc200a88
DT
5770 dlm_send_rcom_lock(r, lkb);
5771 goto out;
e7fd4179 5772 case -EEXIST:
e7fd4179 5773 case 0:
6d40c4a7 5774 lkb->lkb_remid = remid;
e7fd4179
DT
5775 break;
5776 default:
6d40c4a7 5777 log_error(ls, "dlm_recover_process_copy %x remote %d %x %d unk",
3428785a
AA
5778 lkid, le32_to_cpu(rc->rc_header.h_nodeid), remid,
5779 result);
e7fd4179
DT
5780 }
5781
5782 /* an ack for dlm_recover_locks() which waits for replies from
5783 all the locks it sends to new masters */
5784 dlm_recovered_lock(r);
dc200a88 5785 out:
e7fd4179
DT
5786 unlock_rsb(r);
5787 put_rsb(r);
b3f58d8f 5788 dlm_put_lkb(lkb);
e7fd4179
DT
5789
5790 return 0;
5791}
5792
6b0afc0c 5793#ifdef CONFIG_DLM_DEPRECATED_API
597d0cae
DT
5794int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
5795 int mode, uint32_t flags, void *name, unsigned int namelen,
d7db923e 5796 unsigned long timeout_cs)
6b0afc0c
AA
5797#else
5798int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
5799 int mode, uint32_t flags, void *name, unsigned int namelen)
5800#endif
597d0cae
DT
5801{
5802 struct dlm_lkb *lkb;
5803 struct dlm_args args;
5804 int error;
5805
85e86edf 5806 dlm_lock_recovery(ls);
597d0cae
DT
5807
5808 error = create_lkb(ls, &lkb);
5809 if (error) {
5810 kfree(ua);
5811 goto out;
5812 }
5813
5814 if (flags & DLM_LKF_VALBLK) {
573c24c4 5815 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
597d0cae
DT
5816 if (!ua->lksb.sb_lvbptr) {
5817 kfree(ua);
5818 __put_lkb(ls, lkb);
5819 error = -ENOMEM;
5820 goto out;
5821 }
5822 }
6b0afc0c 5823#ifdef CONFIG_DLM_DEPRECATED_API
d7db923e 5824 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
e5dae548 5825 fake_astfn, ua, fake_bastfn, &args);
6b0afc0c
AA
5826#else
5827 error = set_lock_args(mode, &ua->lksb, flags, namelen, fake_astfn, ua,
5828 fake_bastfn, &args);
5829#endif
597d0cae 5830 if (error) {
d47b41ac
VA
5831 kfree(ua->lksb.sb_lvbptr);
5832 ua->lksb.sb_lvbptr = NULL;
5833 kfree(ua);
597d0cae
DT
5834 __put_lkb(ls, lkb);
5835 goto out;
5836 }
5837
d47b41ac
VA
5838 /* After ua is attached to lkb it will be freed by dlm_free_lkb().
5839 When DLM_IFL_USER is set, the dlm knows that this is a userspace
5840 lock and that lkb_astparam is the dlm_user_args structure. */
5841 lkb->lkb_flags |= DLM_IFL_USER;
597d0cae
DT
5842 error = request_lock(ls, lkb, name, namelen, &args);
5843
5844 switch (error) {
5845 case 0:
5846 break;
5847 case -EINPROGRESS:
5848 error = 0;
5849 break;
5850 case -EAGAIN:
5851 error = 0;
df561f66 5852 fallthrough;
597d0cae
DT
5853 default:
5854 __put_lkb(ls, lkb);
5855 goto out;
5856 }
5857
5858 /* add this new lkb to the per-process list of locks */
5859 spin_lock(&ua->proc->locks_spin);
ef0c2bb0 5860 hold_lkb(lkb);
597d0cae
DT
5861 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
5862 spin_unlock(&ua->proc->locks_spin);
5863 out:
85e86edf 5864 dlm_unlock_recovery(ls);
597d0cae
DT
5865 return error;
5866}
5867
6b0afc0c 5868#ifdef CONFIG_DLM_DEPRECATED_API
597d0cae 5869int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
d7db923e
DT
5870 int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
5871 unsigned long timeout_cs)
6b0afc0c
AA
5872#else
5873int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
5874 int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
5875#endif
597d0cae
DT
5876{
5877 struct dlm_lkb *lkb;
5878 struct dlm_args args;
5879 struct dlm_user_args *ua;
5880 int error;
5881
85e86edf 5882 dlm_lock_recovery(ls);
597d0cae
DT
5883
5884 error = find_lkb(ls, lkid, &lkb);
5885 if (error)
5886 goto out;
5887
5888 /* user can change the params on its lock when it converts it, or
5889 add an lvb that didn't exist before */
5890
d292c0cc 5891 ua = lkb->lkb_ua;
597d0cae
DT
5892
5893 if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
573c24c4 5894 ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
597d0cae
DT
5895 if (!ua->lksb.sb_lvbptr) {
5896 error = -ENOMEM;
5897 goto out_put;
5898 }
5899 }
5900 if (lvb_in && ua->lksb.sb_lvbptr)
5901 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
5902
d7db923e 5903 ua->xid = ua_tmp->xid;
597d0cae
DT
5904 ua->castparam = ua_tmp->castparam;
5905 ua->castaddr = ua_tmp->castaddr;
5906 ua->bastparam = ua_tmp->bastparam;
5907 ua->bastaddr = ua_tmp->bastaddr;
10948eb4 5908 ua->user_lksb = ua_tmp->user_lksb;
597d0cae 5909
6b0afc0c 5910#ifdef CONFIG_DLM_DEPRECATED_API
d7db923e 5911 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
e5dae548 5912 fake_astfn, ua, fake_bastfn, &args);
6b0afc0c
AA
5913#else
5914 error = set_lock_args(mode, &ua->lksb, flags, 0, fake_astfn, ua,
5915 fake_bastfn, &args);
5916#endif
597d0cae
DT
5917 if (error)
5918 goto out_put;
5919
5920 error = convert_lock(ls, lkb, &args);
5921
c85d65e9 5922 if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
597d0cae
DT
5923 error = 0;
5924 out_put:
5925 dlm_put_lkb(lkb);
5926 out:
85e86edf 5927 dlm_unlock_recovery(ls);
597d0cae
DT
5928 kfree(ua_tmp);
5929 return error;
5930}
5931
2ab4bd8e
DT
5932/*
5933 * The caller asks for an orphan lock on a given resource with a given mode.
5934 * If a matching lock exists, it's moved to the owner's list of locks and
5935 * the lkid is returned.
5936 */
5937
5938int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
5939 int mode, uint32_t flags, void *name, unsigned int namelen,
8d614a44 5940 uint32_t *lkid)
2ab4bd8e 5941{
dc1acd5c 5942 struct dlm_lkb *lkb = NULL, *iter;
2ab4bd8e
DT
5943 struct dlm_user_args *ua;
5944 int found_other_mode = 0;
2ab4bd8e
DT
5945 int rv = 0;
5946
5947 mutex_lock(&ls->ls_orphans_mutex);
dc1acd5c
JK
5948 list_for_each_entry(iter, &ls->ls_orphans, lkb_ownqueue) {
5949 if (iter->lkb_resource->res_length != namelen)
2ab4bd8e 5950 continue;
dc1acd5c 5951 if (memcmp(iter->lkb_resource->res_name, name, namelen))
2ab4bd8e 5952 continue;
dc1acd5c 5953 if (iter->lkb_grmode != mode) {
2ab4bd8e
DT
5954 found_other_mode = 1;
5955 continue;
5956 }
5957
dc1acd5c
JK
5958 lkb = iter;
5959 list_del_init(&iter->lkb_ownqueue);
5960 iter->lkb_flags &= ~DLM_IFL_ORPHAN;
5961 *lkid = iter->lkb_id;
2ab4bd8e
DT
5962 break;
5963 }
5964 mutex_unlock(&ls->ls_orphans_mutex);
5965
dc1acd5c 5966 if (!lkb && found_other_mode) {
2ab4bd8e
DT
5967 rv = -EAGAIN;
5968 goto out;
5969 }
5970
dc1acd5c 5971 if (!lkb) {
2ab4bd8e
DT
5972 rv = -ENOENT;
5973 goto out;
5974 }
5975
5976 lkb->lkb_exflags = flags;
5977 lkb->lkb_ownpid = (int) current->pid;
5978
5979 ua = lkb->lkb_ua;
5980
5981 ua->proc = ua_tmp->proc;
5982 ua->xid = ua_tmp->xid;
5983 ua->castparam = ua_tmp->castparam;
5984 ua->castaddr = ua_tmp->castaddr;
5985 ua->bastparam = ua_tmp->bastparam;
5986 ua->bastaddr = ua_tmp->bastaddr;
5987 ua->user_lksb = ua_tmp->user_lksb;
5988
5989 /*
5990 * The lkb reference from the ls_orphans list was not
5991 * removed above, and is now considered the reference
5992 * for the proc locks list.
5993 */
5994
5995 spin_lock(&ua->proc->locks_spin);
5996 list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
5997 spin_unlock(&ua->proc->locks_spin);
5998 out:
5999 kfree(ua_tmp);
6000 return rv;
6001}
6002
597d0cae
DT
6003int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
6004 uint32_t flags, uint32_t lkid, char *lvb_in)
6005{
6006 struct dlm_lkb *lkb;
6007 struct dlm_args args;
6008 struct dlm_user_args *ua;
6009 int error;
6010
85e86edf 6011 dlm_lock_recovery(ls);
597d0cae
DT
6012
6013 error = find_lkb(ls, lkid, &lkb);
6014 if (error)
6015 goto out;
6016
d292c0cc 6017 ua = lkb->lkb_ua;
597d0cae
DT
6018
6019 if (lvb_in && ua->lksb.sb_lvbptr)
6020 memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
b434eda6
PC
6021 if (ua_tmp->castparam)
6022 ua->castparam = ua_tmp->castparam;
cc346d55 6023 ua->user_lksb = ua_tmp->user_lksb;
597d0cae
DT
6024
6025 error = set_unlock_args(flags, ua, &args);
6026 if (error)
6027 goto out_put;
6028
6029 error = unlock_lock(ls, lkb, &args);
6030
6031 if (error == -DLM_EUNLOCK)
6032 error = 0;
ef0c2bb0
DT
6033 /* from validate_unlock_args() */
6034 if (error == -EBUSY && (flags & DLM_LKF_FORCEUNLOCK))
6035 error = 0;
597d0cae
DT
6036 if (error)
6037 goto out_put;
6038
6039 spin_lock(&ua->proc->locks_spin);
23e8e1aa 6040 /* dlm_user_add_cb() may have already taken lkb off the proc list */
a1bc86e6
DT
6041 if (!list_empty(&lkb->lkb_ownqueue))
6042 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
597d0cae 6043 spin_unlock(&ua->proc->locks_spin);
597d0cae
DT
6044 out_put:
6045 dlm_put_lkb(lkb);
6046 out:
85e86edf 6047 dlm_unlock_recovery(ls);
ef0c2bb0 6048 kfree(ua_tmp);
597d0cae
DT
6049 return error;
6050}
6051
6052int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
6053 uint32_t flags, uint32_t lkid)
6054{
6055 struct dlm_lkb *lkb;
6056 struct dlm_args args;
6057 struct dlm_user_args *ua;
6058 int error;
6059
85e86edf 6060 dlm_lock_recovery(ls);
597d0cae
DT
6061
6062 error = find_lkb(ls, lkid, &lkb);
6063 if (error)
6064 goto out;
6065
d292c0cc 6066 ua = lkb->lkb_ua;
b434eda6
PC
6067 if (ua_tmp->castparam)
6068 ua->castparam = ua_tmp->castparam;
c059f70e 6069 ua->user_lksb = ua_tmp->user_lksb;
597d0cae
DT
6070
6071 error = set_unlock_args(flags, ua, &args);
6072 if (error)
6073 goto out_put;
6074
6075 error = cancel_lock(ls, lkb, &args);
6076
6077 if (error == -DLM_ECANCEL)
6078 error = 0;
ef0c2bb0
DT
6079 /* from validate_unlock_args() */
6080 if (error == -EBUSY)
6081 error = 0;
597d0cae
DT
6082 out_put:
6083 dlm_put_lkb(lkb);
6084 out:
85e86edf 6085 dlm_unlock_recovery(ls);
ef0c2bb0 6086 kfree(ua_tmp);
597d0cae
DT
6087 return error;
6088}
6089
8b4021fa
DT
6090int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
6091{
6092 struct dlm_lkb *lkb;
6093 struct dlm_args args;
6094 struct dlm_user_args *ua;
6095 struct dlm_rsb *r;
6096 int error;
6097
6098 dlm_lock_recovery(ls);
6099
6100 error = find_lkb(ls, lkid, &lkb);
6101 if (error)
6102 goto out;
6103
d292c0cc 6104 ua = lkb->lkb_ua;
8b4021fa
DT
6105
6106 error = set_unlock_args(flags, ua, &args);
6107 if (error)
6108 goto out_put;
6109
6110 /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
6111
6112 r = lkb->lkb_resource;
6113 hold_rsb(r);
6114 lock_rsb(r);
6115
6116 error = validate_unlock_args(lkb, &args);
6117 if (error)
6118 goto out_r;
6119 lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
6120
6121 error = _cancel_lock(r, lkb);
6122 out_r:
6123 unlock_rsb(r);
6124 put_rsb(r);
6125
6126 if (error == -DLM_ECANCEL)
6127 error = 0;
6128 /* from validate_unlock_args() */
6129 if (error == -EBUSY)
6130 error = 0;
6131 out_put:
6132 dlm_put_lkb(lkb);
6133 out:
6134 dlm_unlock_recovery(ls);
6135 return error;
6136}
6137
ef0c2bb0
DT
6138/* lkb's that are removed from the waiters list by revert are just left on the
6139 orphans list with the granted orphan locks, to be freed by purge */
6140
597d0cae
DT
6141static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
6142{
ef0c2bb0
DT
6143 struct dlm_args args;
6144 int error;
597d0cae 6145
2ab4bd8e 6146 hold_lkb(lkb); /* reference for the ls_orphans list */
ef0c2bb0
DT
6147 mutex_lock(&ls->ls_orphans_mutex);
6148 list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
6149 mutex_unlock(&ls->ls_orphans_mutex);
597d0cae 6150
d292c0cc 6151 set_unlock_args(0, lkb->lkb_ua, &args);
ef0c2bb0
DT
6152
6153 error = cancel_lock(ls, lkb, &args);
6154 if (error == -DLM_ECANCEL)
6155 error = 0;
6156 return error;
597d0cae
DT
6157}
6158
da8c6663
DT
6159/* The FORCEUNLOCK flag allows the unlock to go ahead even if the lkb isn't
6160 granted. Regardless of what rsb queue the lock is on, it's removed and
6161 freed. The IVVALBLK flag causes the lvb on the resource to be invalidated
6162 if our lock is PW/EX (it's ignored if our granted mode is smaller.) */
597d0cae
DT
6163
6164static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
6165{
597d0cae
DT
6166 struct dlm_args args;
6167 int error;
6168
da8c6663
DT
6169 set_unlock_args(DLM_LKF_FORCEUNLOCK | DLM_LKF_IVVALBLK,
6170 lkb->lkb_ua, &args);
597d0cae
DT
6171
6172 error = unlock_lock(ls, lkb, &args);
6173 if (error == -DLM_EUNLOCK)
6174 error = 0;
6175 return error;
6176}
6177
ef0c2bb0
DT
6178/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
6179 (which does lock_rsb) due to deadlock with receiving a message that does
23e8e1aa 6180 lock_rsb followed by dlm_user_add_cb() */
ef0c2bb0
DT
6181
6182static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
6183 struct dlm_user_proc *proc)
6184{
6185 struct dlm_lkb *lkb = NULL;
6186
6187 mutex_lock(&ls->ls_clear_proc_locks);
6188 if (list_empty(&proc->locks))
6189 goto out;
6190
6191 lkb = list_entry(proc->locks.next, struct dlm_lkb, lkb_ownqueue);
6192 list_del_init(&lkb->lkb_ownqueue);
6193
6194 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
6195 lkb->lkb_flags |= DLM_IFL_ORPHAN;
6196 else
6197 lkb->lkb_flags |= DLM_IFL_DEAD;
6198 out:
6199 mutex_unlock(&ls->ls_clear_proc_locks);
6200 return lkb;
6201}
6202
23e8e1aa 6203/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
597d0cae
DT
6204 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
6205 which we clear here. */
6206
6207/* proc CLOSING flag is set so no more device_reads should look at proc->asts
6208 list, and no more device_writes should add lkb's to proc->locks list; so we
6209 shouldn't need to take asts_spin or locks_spin here. this assumes that
6210 device reads/writes/closes are serialized -- FIXME: we may need to serialize
6211 them ourself. */
6212
6213void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
6214{
6215 struct dlm_lkb *lkb, *safe;
6216
85e86edf 6217 dlm_lock_recovery(ls);
597d0cae 6218
ef0c2bb0
DT
6219 while (1) {
6220 lkb = del_proc_lock(ls, proc);
6221 if (!lkb)
6222 break;
84d8cd69 6223 del_timeout(lkb);
ef0c2bb0 6224 if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
597d0cae 6225 orphan_proc_lock(ls, lkb);
ef0c2bb0 6226 else
597d0cae 6227 unlock_proc_lock(ls, lkb);
597d0cae
DT
6228
6229 /* this removes the reference for the proc->locks list
6230 added by dlm_user_request, it may result in the lkb
6231 being freed */
6232
6233 dlm_put_lkb(lkb);
6234 }
a1bc86e6 6235
ef0c2bb0
DT
6236 mutex_lock(&ls->ls_clear_proc_locks);
6237
a1bc86e6
DT
6238 /* in-progress unlocks */
6239 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
6240 list_del_init(&lkb->lkb_ownqueue);
6241 lkb->lkb_flags |= DLM_IFL_DEAD;
6242 dlm_put_lkb(lkb);
6243 }
6244
23e8e1aa 6245 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
8304d6f2
DT
6246 memset(&lkb->lkb_callbacks, 0,
6247 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
23e8e1aa 6248 list_del_init(&lkb->lkb_cb_list);
a1bc86e6
DT
6249 dlm_put_lkb(lkb);
6250 }
6251
597d0cae 6252 mutex_unlock(&ls->ls_clear_proc_locks);
85e86edf 6253 dlm_unlock_recovery(ls);
597d0cae 6254}
a1bc86e6 6255
8499137d
DT
6256static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
6257{
6258 struct dlm_lkb *lkb, *safe;
6259
6260 while (1) {
6261 lkb = NULL;
6262 spin_lock(&proc->locks_spin);
6263 if (!list_empty(&proc->locks)) {
6264 lkb = list_entry(proc->locks.next, struct dlm_lkb,
6265 lkb_ownqueue);
6266 list_del_init(&lkb->lkb_ownqueue);
6267 }
6268 spin_unlock(&proc->locks_spin);
6269
6270 if (!lkb)
6271 break;
6272
6273 lkb->lkb_flags |= DLM_IFL_DEAD;
6274 unlock_proc_lock(ls, lkb);
6275 dlm_put_lkb(lkb); /* ref from proc->locks list */
6276 }
6277
6278 spin_lock(&proc->locks_spin);
6279 list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
6280 list_del_init(&lkb->lkb_ownqueue);
6281 lkb->lkb_flags |= DLM_IFL_DEAD;
6282 dlm_put_lkb(lkb);
6283 }
6284 spin_unlock(&proc->locks_spin);
6285
6286 spin_lock(&proc->asts_spin);
23e8e1aa 6287 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
8304d6f2
DT
6288 memset(&lkb->lkb_callbacks, 0,
6289 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
23e8e1aa 6290 list_del_init(&lkb->lkb_cb_list);
8499137d
DT
6291 dlm_put_lkb(lkb);
6292 }
6293 spin_unlock(&proc->asts_spin);
6294}
6295
6296/* pid of 0 means purge all orphans */
6297
6298static void do_purge(struct dlm_ls *ls, int nodeid, int pid)
6299{
6300 struct dlm_lkb *lkb, *safe;
6301
6302 mutex_lock(&ls->ls_orphans_mutex);
6303 list_for_each_entry_safe(lkb, safe, &ls->ls_orphans, lkb_ownqueue) {
6304 if (pid && lkb->lkb_ownpid != pid)
6305 continue;
6306 unlock_proc_lock(ls, lkb);
6307 list_del_init(&lkb->lkb_ownqueue);
6308 dlm_put_lkb(lkb);
6309 }
6310 mutex_unlock(&ls->ls_orphans_mutex);
6311}
6312
6313static int send_purge(struct dlm_ls *ls, int nodeid, int pid)
6314{
6315 struct dlm_message *ms;
6316 struct dlm_mhandle *mh;
6317 int error;
6318
6319 error = _create_message(ls, sizeof(struct dlm_message), nodeid,
6320 DLM_MSG_PURGE, &ms, &mh);
6321 if (error)
6322 return error;
00e99ccd
AA
6323 ms->m_nodeid = cpu_to_le32(nodeid);
6324 ms->m_pid = cpu_to_le32(pid);
8499137d
DT
6325
6326 return send_message(mh, ms);
6327}
6328
6329int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
6330 int nodeid, int pid)
6331{
6332 int error = 0;
6333
2ab4bd8e 6334 if (nodeid && (nodeid != dlm_our_nodeid())) {
8499137d
DT
6335 error = send_purge(ls, nodeid, pid);
6336 } else {
85e86edf 6337 dlm_lock_recovery(ls);
8499137d
DT
6338 if (pid == current->pid)
6339 purge_proc_locks(ls, proc);
6340 else
6341 do_purge(ls, nodeid, pid);
85e86edf 6342 dlm_unlock_recovery(ls);
8499137d
DT
6343 }
6344 return error;
6345}
6346
5054e79d
AA
6347/* debug functionality */
6348int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
6349 int lkb_nodeid, unsigned int lkb_flags, int lkb_status)
6350{
6351 struct dlm_lksb *lksb;
6352 struct dlm_lkb *lkb;
6353 struct dlm_rsb *r;
6354 int error;
6355
6356 /* we currently can't set a valid user lock */
6357 if (lkb_flags & DLM_IFL_USER)
6358 return -EOPNOTSUPP;
6359
6360 lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
6361 if (!lksb)
6362 return -ENOMEM;
6363
6364 error = _create_lkb(ls, &lkb, lkb_id, lkb_id + 1);
6365 if (error) {
6366 kfree(lksb);
6367 return error;
6368 }
6369
6370 lkb->lkb_flags = lkb_flags;
6371 lkb->lkb_nodeid = lkb_nodeid;
6372 lkb->lkb_lksb = lksb;
6373 /* user specific pointer, just don't have it NULL for kernel locks */
6374 if (~lkb_flags & DLM_IFL_USER)
6375 lkb->lkb_astparam = (void *)0xDEADBEEF;
6376
6377 error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
6378 if (error) {
6379 kfree(lksb);
6380 __put_lkb(ls, lkb);
6381 return error;
6382 }
6383
6384 lock_rsb(r);
6385 attach_lkb(r, lkb);
6386 add_lkb(r, lkb, lkb_status);
6387 unlock_rsb(r);
6388 put_rsb(r);
6389
6390 return 0;
6391}
6392
63eab2b0
AA
6393int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
6394 int mstype, int to_nodeid)
6395{
6396 struct dlm_lkb *lkb;
6397 int error;
6398
6399 error = find_lkb(ls, lkb_id, &lkb);
6400 if (error)
6401 return error;
6402
6403 error = add_to_waiters(lkb, mstype, to_nodeid);
6404 dlm_put_lkb(lkb);
6405 return error;
6406}
6407