Commit | Line | Data |
---|---|---|
328970de | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
fa60ce2c | 2 | /* |
6714d8e8 KH |
3 | * dlmthread.c |
4 | * | |
5 | * standalone DLM module | |
6 | * | |
7 | * Copyright (C) 2004 Oracle. All rights reserved. | |
6714d8e8 KH |
8 | */ |
9 | ||
10 | ||
11 | #include <linux/module.h> | |
12 | #include <linux/fs.h> | |
13 | #include <linux/types.h> | |
6714d8e8 | 14 | #include <linux/highmem.h> |
6714d8e8 KH |
15 | #include <linux/init.h> |
16 | #include <linux/sysctl.h> | |
17 | #include <linux/random.h> | |
18 | #include <linux/blkdev.h> | |
19 | #include <linux/socket.h> | |
20 | #include <linux/inet.h> | |
21 | #include <linux/timer.h> | |
22 | #include <linux/kthread.h> | |
8d79d088 | 23 | #include <linux/delay.h> |
6714d8e8 KH |
24 | |
25 | ||
ca322fb6 MY |
26 | #include "../cluster/heartbeat.h" |
27 | #include "../cluster/nodemanager.h" | |
28 | #include "../cluster/tcp.h" | |
6714d8e8 KH |
29 | |
30 | #include "dlmapi.h" | |
31 | #include "dlmcommon.h" | |
32 | #include "dlmdomain.h" | |
33 | ||
34 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD) | |
ca322fb6 | 35 | #include "../cluster/masklog.h" |
6714d8e8 | 36 | |
6714d8e8 | 37 | static int dlm_thread(void *data); |
6714d8e8 KH |
38 | static void dlm_flush_asts(struct dlm_ctxt *dlm); |
39 | ||
6714d8e8 KH |
40 | /* will exit holding res->spinlock, but may drop in function */ |
41 | /* waits until flags are cleared on res->state */ | |
42 | void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags) | |
43 | { | |
44 | DECLARE_WAITQUEUE(wait, current); | |
45 | ||
46 | assert_spin_locked(&res->spinlock); | |
47 | ||
48 | add_wait_queue(&res->wq, &wait); | |
49 | repeat: | |
50 | set_current_state(TASK_UNINTERRUPTIBLE); | |
51 | if (res->state & flags) { | |
52 | spin_unlock(&res->spinlock); | |
53 | schedule(); | |
54 | spin_lock(&res->spinlock); | |
55 | goto repeat; | |
56 | } | |
57 | remove_wait_queue(&res->wq, &wait); | |
5c2c9d38 | 58 | __set_current_state(TASK_RUNNING); |
6714d8e8 KH |
59 | } |
60 | ||
ba2bf218 | 61 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res) |
6714d8e8 KH |
62 | { |
63 | if (list_empty(&res->granted) && | |
64 | list_empty(&res->converting) && | |
ba2bf218 KH |
65 | list_empty(&res->blocked)) |
66 | return 0; | |
67 | return 1; | |
68 | } | |
69 | ||
70 | /* "unused": the lockres has no locks, is not on the dirty list, | |
71 | * has no inflight locks (in the gap between mastery and acquiring | |
72 | * the first lock), and has no bits in its refmap. | |
73 | * truly ready to be freed. */ | |
74 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | |
75 | { | |
a524812b WW |
76 | int bit; |
77 | ||
ff0a522e SM |
78 | assert_spin_locked(&res->spinlock); |
79 | ||
a524812b WW |
80 | if (__dlm_lockres_has_locks(res)) |
81 | return 0; | |
82 | ||
ff0a522e SM |
83 | /* Locks are in the process of being created */ |
84 | if (res->inflight_locks) | |
85 | return 0; | |
86 | ||
a524812b WW |
87 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
88 | return 0; | |
89 | ||
814ce694 JX |
90 | if (res->state & (DLM_LOCK_RES_RECOVERING| |
91 | DLM_LOCK_RES_RECOVERY_WAITING)) | |
a524812b WW |
92 | return 0; |
93 | ||
ff0a522e | 94 | /* Another node has this resource with this node as the master */ |
a524812b WW |
95 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
96 | if (bit < O2NM_MAX_NODES) | |
97 | return 0; | |
98 | ||
a524812b | 99 | return 1; |
6714d8e8 KH |
100 | } |
101 | ||
102 | ||
103 | /* Call whenever you may have added or deleted something from one of | |
104 | * the lockres queue's. This will figure out whether it belongs on the | |
105 | * unused list or not and does the appropriate thing. */ | |
106 | void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |
107 | struct dlm_lock_resource *res) | |
108 | { | |
6714d8e8 KH |
109 | assert_spin_locked(&dlm->spinlock); |
110 | assert_spin_locked(&res->spinlock); | |
111 | ||
112 | if (__dlm_lockres_unused(res)){ | |
113 | if (list_empty(&res->purge)) { | |
8e17d16f SM |
114 | mlog(0, "%s: Adding res %.*s to purge list\n", |
115 | dlm->name, res->lockname.len, res->lockname.name); | |
6714d8e8 KH |
116 | |
117 | res->last_used = jiffies; | |
ba2bf218 | 118 | dlm_lockres_get(res); |
6714d8e8 KH |
119 | list_add_tail(&res->purge, &dlm->purge_list); |
120 | dlm->purge_count++; | |
121 | } | |
122 | } else if (!list_empty(&res->purge)) { | |
8e17d16f SM |
123 | mlog(0, "%s: Removing res %.*s from purge list\n", |
124 | dlm->name, res->lockname.len, res->lockname.name); | |
6714d8e8 KH |
125 | |
126 | list_del_init(&res->purge); | |
ba2bf218 | 127 | dlm_lockres_put(res); |
6714d8e8 KH |
128 | dlm->purge_count--; |
129 | } | |
130 | } | |
131 | ||
132 | void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |
133 | struct dlm_lock_resource *res) | |
134 | { | |
6714d8e8 KH |
135 | spin_lock(&dlm->spinlock); |
136 | spin_lock(&res->spinlock); | |
137 | ||
138 | __dlm_lockres_calc_usage(dlm, res); | |
139 | ||
140 | spin_unlock(&res->spinlock); | |
141 | spin_unlock(&dlm->spinlock); | |
142 | } | |
143 | ||
ee8f7fcb | 144 | /* |
145 | * Do the real purge work: | |
146 | * unhash the lockres, and | |
147 | * clear flag DLM_LOCK_RES_DROPPING_REF. | |
148 | * It requires dlm and lockres spinlock to be taken. | |
149 | */ | |
150 | void __dlm_do_purge_lockres(struct dlm_ctxt *dlm, | |
151 | struct dlm_lock_resource *res) | |
152 | { | |
153 | assert_spin_locked(&dlm->spinlock); | |
154 | assert_spin_locked(&res->spinlock); | |
155 | ||
156 | if (!list_empty(&res->purge)) { | |
157 | mlog(0, "%s: Removing res %.*s from purgelist\n", | |
158 | dlm->name, res->lockname.len, res->lockname.name); | |
159 | list_del_init(&res->purge); | |
160 | dlm_lockres_put(res); | |
161 | dlm->purge_count--; | |
162 | } | |
163 | ||
164 | if (!__dlm_lockres_unused(res)) { | |
165 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", | |
166 | dlm->name, res->lockname.len, res->lockname.name); | |
167 | __dlm_print_one_lock_resource(res); | |
168 | BUG(); | |
169 | } | |
170 | ||
171 | __dlm_unhash_lockres(dlm, res); | |
172 | ||
173 | spin_lock(&dlm->track_lock); | |
174 | if (!list_empty(&res->tracking)) | |
175 | list_del_init(&res->tracking); | |
176 | else { | |
177 | mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n", | |
178 | dlm->name, res->lockname.len, res->lockname.name); | |
179 | __dlm_print_one_lock_resource(res); | |
180 | } | |
181 | spin_unlock(&dlm->track_lock); | |
182 | ||
183 | /* | |
184 | * lockres is not in the hash now. drop the flag and wake up | |
185 | * any processes waiting in dlm_get_lock_resource. | |
186 | */ | |
187 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | |
188 | } | |
189 | ||
7beaf243 | 190 | static void dlm_purge_lockres(struct dlm_ctxt *dlm, |
faf0ec9f | 191 | struct dlm_lock_resource *res) |
6714d8e8 KH |
192 | { |
193 | int master; | |
ba2bf218 | 194 | int ret = 0; |
6714d8e8 | 195 | |
7beaf243 SE |
196 | assert_spin_locked(&dlm->spinlock); |
197 | assert_spin_locked(&res->spinlock); | |
516b7e52 | 198 | |
ba2bf218 | 199 | master = (res->owner == dlm->node_num); |
516b7e52 | 200 | |
8e17d16f SM |
201 | mlog(0, "%s: Purging res %.*s, master %d\n", dlm->name, |
202 | res->lockname.len, res->lockname.name, master); | |
ba2bf218 KH |
203 | |
204 | if (!master) { | |
309e9191 | 205 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
ee8f7fcb | 206 | mlog(ML_NOTICE, "%s: res %.*s already in DLM_LOCK_RES_DROPPING_REF state\n", |
207 | dlm->name, res->lockname.len, res->lockname.name); | |
309e9191 | 208 | spin_unlock(&res->spinlock); |
209 | return; | |
210 | } | |
211 | ||
7beaf243 | 212 | res->state |= DLM_LOCK_RES_DROPPING_REF; |
c824c3c7 | 213 | /* drop spinlock... retake below */ |
7beaf243 | 214 | spin_unlock(&res->spinlock); |
c824c3c7 SM |
215 | spin_unlock(&dlm->spinlock); |
216 | ||
3b8118cf KH |
217 | spin_lock(&res->spinlock); |
218 | /* This ensures that clear refmap is sent after the set */ | |
7dc102b7 | 219 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); |
3b8118cf | 220 | spin_unlock(&res->spinlock); |
c824c3c7 | 221 | |
ba2bf218 KH |
222 | /* clear our bit from the master's refmap, ignore errors */ |
223 | ret = dlm_drop_lockres_ref(dlm, res); | |
224 | if (ret < 0) { | |
ba2bf218 KH |
225 | if (!dlm_is_host_down(ret)) |
226 | BUG(); | |
227 | } | |
ba2bf218 | 228 | spin_lock(&dlm->spinlock); |
7beaf243 | 229 | spin_lock(&res->spinlock); |
6714d8e8 KH |
230 | } |
231 | ||
ba2bf218 | 232 | if (!list_empty(&res->purge)) { |
8e17d16f SM |
233 | mlog(0, "%s: Removing res %.*s from purgelist, master %d\n", |
234 | dlm->name, res->lockname.len, res->lockname.name, master); | |
ba2bf218 KH |
235 | list_del_init(&res->purge); |
236 | dlm_lockres_put(res); | |
6714d8e8 | 237 | dlm->purge_count--; |
7beaf243 SE |
238 | } |
239 | ||
309e9191 | 240 | if (!master && ret == DLM_DEREF_RESPONSE_INPROG) { |
241 | mlog(0, "%s: deref %.*s in progress\n", | |
842b90b6 | 242 | dlm->name, res->lockname.len, res->lockname.name); |
243 | spin_unlock(&res->spinlock); | |
244 | return; | |
245 | } | |
246 | ||
7beaf243 | 247 | if (!__dlm_lockres_unused(res)) { |
8e17d16f | 248 | mlog(ML_ERROR, "%s: res %.*s in use after deref\n", |
7beaf243 SE |
249 | dlm->name, res->lockname.len, res->lockname.name); |
250 | __dlm_print_one_lock_resource(res); | |
251 | BUG(); | |
252 | } | |
83e32d90 | 253 | |
e9f0b6a6 | 254 | __dlm_unhash_lockres(dlm, res); |
6714d8e8 | 255 | |
f57a22dd YJ |
256 | spin_lock(&dlm->track_lock); |
257 | if (!list_empty(&res->tracking)) | |
258 | list_del_init(&res->tracking); | |
259 | else { | |
260 | mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", | |
261 | res->lockname.len, res->lockname.name); | |
262 | __dlm_print_one_lock_resource(res); | |
263 | } | |
264 | spin_unlock(&dlm->track_lock); | |
265 | ||
ba2bf218 KH |
266 | /* lockres is not in the hash now. drop the flag and wake up |
267 | * any processes waiting in dlm_get_lock_resource. */ | |
268 | if (!master) { | |
ba2bf218 KH |
269 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
270 | spin_unlock(&res->spinlock); | |
271 | wake_up(&res->wq); | |
7beaf243 SE |
272 | } else |
273 | spin_unlock(&res->spinlock); | |
8b219809 KH |
274 | } |
275 | ||
6714d8e8 KH |
276 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
277 | int purge_now) | |
278 | { | |
279 | unsigned int run_max, unused; | |
280 | unsigned long purge_jiffies; | |
281 | struct dlm_lock_resource *lockres; | |
282 | ||
283 | spin_lock(&dlm->spinlock); | |
284 | run_max = dlm->purge_count; | |
285 | ||
286 | while(run_max && !list_empty(&dlm->purge_list)) { | |
287 | run_max--; | |
288 | ||
289 | lockres = list_entry(dlm->purge_list.next, | |
290 | struct dlm_lock_resource, purge); | |
291 | ||
6714d8e8 | 292 | spin_lock(&lockres->spinlock); |
6714d8e8 KH |
293 | |
294 | purge_jiffies = lockres->last_used + | |
295 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); | |
296 | ||
297 | /* Make sure that we want to be processing this guy at | |
298 | * this time. */ | |
299 | if (!purge_now && time_after(purge_jiffies, jiffies)) { | |
300 | /* Since resources are added to the purge list | |
301 | * in tail order, we can stop at the first | |
302 | * unpurgable resource -- anyone added after | |
303 | * him will have a greater last_used value */ | |
7beaf243 | 304 | spin_unlock(&lockres->spinlock); |
6714d8e8 KH |
305 | break; |
306 | } | |
307 | ||
7beaf243 SE |
308 | /* Status of the lockres *might* change so double |
309 | * check. If the lockres is unused, holding the dlm | |
310 | * spinlock will prevent people from getting and more | |
311 | * refs on it. */ | |
312 | unused = __dlm_lockres_unused(lockres); | |
313 | if (!unused || | |
ac4fef4d X |
314 | (lockres->state & DLM_LOCK_RES_MIGRATING) || |
315 | (lockres->inflight_assert_workers != 0)) { | |
8e17d16f | 316 | mlog(0, "%s: res %.*s is in use or being remastered, " |
ac4fef4d X |
317 | "used %d, state %d, assert master workers %u\n", |
318 | dlm->name, lockres->lockname.len, | |
319 | lockres->lockname.name, | |
320 | !unused, lockres->state, | |
321 | lockres->inflight_assert_workers); | |
a270c6d3 | 322 | list_move_tail(&lockres->purge, &dlm->purge_list); |
7beaf243 SE |
323 | spin_unlock(&lockres->spinlock); |
324 | continue; | |
325 | } | |
326 | ||
78062cb2 | 327 | dlm_lockres_get(lockres); |
6714d8e8 | 328 | |
7beaf243 | 329 | dlm_purge_lockres(dlm, lockres); |
78062cb2 | 330 | |
3fca0894 | 331 | dlm_lockres_put(lockres); |
6714d8e8 KH |
332 | |
333 | /* Avoid adding any scheduling latencies */ | |
334 | cond_resched_lock(&dlm->spinlock); | |
335 | } | |
336 | ||
337 | spin_unlock(&dlm->spinlock); | |
338 | } | |
339 | ||
340 | static void dlm_shuffle_lists(struct dlm_ctxt *dlm, | |
341 | struct dlm_lock_resource *res) | |
342 | { | |
343 | struct dlm_lock *lock, *target; | |
6714d8e8 KH |
344 | int can_grant = 1; |
345 | ||
8e17d16f SM |
346 | /* |
347 | * Because this function is called with the lockres | |
6714d8e8 KH |
348 | * spinlock, and because we know that it is not migrating/ |
349 | * recovering/in-progress, it is fine to reserve asts and | |
8e17d16f SM |
350 | * basts right before queueing them all throughout |
351 | */ | |
d9ef7522 | 352 | assert_spin_locked(&dlm->ast_lock); |
6714d8e8 KH |
353 | assert_spin_locked(&res->spinlock); |
354 | BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| | |
355 | DLM_LOCK_RES_RECOVERING| | |
356 | DLM_LOCK_RES_IN_PROGRESS))); | |
357 | ||
358 | converting: | |
359 | if (list_empty(&res->converting)) | |
360 | goto blocked; | |
8e17d16f SM |
361 | mlog(0, "%s: res %.*s has locks on the convert queue\n", dlm->name, |
362 | res->lockname.len, res->lockname.name); | |
6714d8e8 KH |
363 | |
364 | target = list_entry(res->converting.next, struct dlm_lock, list); | |
365 | if (target->ml.convert_type == LKM_IVMODE) { | |
8e17d16f SM |
366 | mlog(ML_ERROR, "%s: res %.*s converting lock to invalid mode\n", |
367 | dlm->name, res->lockname.len, res->lockname.name); | |
6714d8e8 KH |
368 | BUG(); |
369 | } | |
df53cd3b | 370 | list_for_each_entry(lock, &res->granted, list) { |
6714d8e8 KH |
371 | if (lock==target) |
372 | continue; | |
373 | if (!dlm_lock_compatible(lock->ml.type, | |
374 | target->ml.convert_type)) { | |
375 | can_grant = 0; | |
376 | /* queue the BAST if not already */ | |
377 | if (lock->ml.highest_blocked == LKM_IVMODE) { | |
378 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 379 | __dlm_queue_bast(dlm, lock); |
6714d8e8 KH |
380 | } |
381 | /* update the highest_blocked if needed */ | |
382 | if (lock->ml.highest_blocked < target->ml.convert_type) | |
383 | lock->ml.highest_blocked = | |
384 | target->ml.convert_type; | |
385 | } | |
386 | } | |
df53cd3b DF |
387 | |
388 | list_for_each_entry(lock, &res->converting, list) { | |
6714d8e8 KH |
389 | if (lock==target) |
390 | continue; | |
391 | if (!dlm_lock_compatible(lock->ml.type, | |
392 | target->ml.convert_type)) { | |
393 | can_grant = 0; | |
394 | if (lock->ml.highest_blocked == LKM_IVMODE) { | |
395 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 396 | __dlm_queue_bast(dlm, lock); |
6714d8e8 KH |
397 | } |
398 | if (lock->ml.highest_blocked < target->ml.convert_type) | |
399 | lock->ml.highest_blocked = | |
400 | target->ml.convert_type; | |
401 | } | |
402 | } | |
403 | ||
404 | /* we can convert the lock */ | |
405 | if (can_grant) { | |
406 | spin_lock(&target->spinlock); | |
407 | BUG_ON(target->ml.highest_blocked != LKM_IVMODE); | |
408 | ||
8e17d16f SM |
409 | mlog(0, "%s: res %.*s, AST for Converting lock %u:%llu, type " |
410 | "%d => %d, node %u\n", dlm->name, res->lockname.len, | |
411 | res->lockname.name, | |
412 | dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), | |
413 | dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), | |
414 | target->ml.type, | |
6714d8e8 KH |
415 | target->ml.convert_type, target->ml.node); |
416 | ||
417 | target->ml.type = target->ml.convert_type; | |
418 | target->ml.convert_type = LKM_IVMODE; | |
f116629d | 419 | list_move_tail(&target->list, &res->granted); |
6714d8e8 KH |
420 | |
421 | BUG_ON(!target->lksb); | |
422 | target->lksb->status = DLM_NORMAL; | |
423 | ||
424 | spin_unlock(&target->spinlock); | |
425 | ||
426 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 427 | __dlm_queue_ast(dlm, target); |
6714d8e8 KH |
428 | /* go back and check for more */ |
429 | goto converting; | |
430 | } | |
431 | ||
432 | blocked: | |
433 | if (list_empty(&res->blocked)) | |
434 | goto leave; | |
435 | target = list_entry(res->blocked.next, struct dlm_lock, list); | |
436 | ||
df53cd3b | 437 | list_for_each_entry(lock, &res->granted, list) { |
6714d8e8 KH |
438 | if (lock==target) |
439 | continue; | |
440 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { | |
441 | can_grant = 0; | |
442 | if (lock->ml.highest_blocked == LKM_IVMODE) { | |
443 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 444 | __dlm_queue_bast(dlm, lock); |
6714d8e8 KH |
445 | } |
446 | if (lock->ml.highest_blocked < target->ml.type) | |
447 | lock->ml.highest_blocked = target->ml.type; | |
448 | } | |
449 | } | |
450 | ||
df53cd3b | 451 | list_for_each_entry(lock, &res->converting, list) { |
6714d8e8 KH |
452 | if (lock==target) |
453 | continue; | |
454 | if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { | |
455 | can_grant = 0; | |
456 | if (lock->ml.highest_blocked == LKM_IVMODE) { | |
457 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 458 | __dlm_queue_bast(dlm, lock); |
6714d8e8 KH |
459 | } |
460 | if (lock->ml.highest_blocked < target->ml.type) | |
461 | lock->ml.highest_blocked = target->ml.type; | |
462 | } | |
463 | } | |
464 | ||
465 | /* we can grant the blocked lock (only | |
466 | * possible if converting list empty) */ | |
467 | if (can_grant) { | |
468 | spin_lock(&target->spinlock); | |
469 | BUG_ON(target->ml.highest_blocked != LKM_IVMODE); | |
470 | ||
8e17d16f SM |
471 | mlog(0, "%s: res %.*s, AST for Blocked lock %u:%llu, type %d, " |
472 | "node %u\n", dlm->name, res->lockname.len, | |
473 | res->lockname.name, | |
474 | dlm_get_lock_cookie_node(be64_to_cpu(target->ml.cookie)), | |
475 | dlm_get_lock_cookie_seq(be64_to_cpu(target->ml.cookie)), | |
6714d8e8 KH |
476 | target->ml.type, target->ml.node); |
477 | ||
8e17d16f | 478 | /* target->ml.type is already correct */ |
f116629d | 479 | list_move_tail(&target->list, &res->granted); |
6714d8e8 KH |
480 | |
481 | BUG_ON(!target->lksb); | |
482 | target->lksb->status = DLM_NORMAL; | |
483 | ||
484 | spin_unlock(&target->spinlock); | |
485 | ||
486 | __dlm_lockres_reserve_ast(res); | |
d9ef7522 | 487 | __dlm_queue_ast(dlm, target); |
6714d8e8 KH |
488 | /* go back and check for more */ |
489 | goto converting; | |
490 | } | |
491 | ||
492 | leave: | |
493 | return; | |
494 | } | |
495 | ||
496 | /* must have NO locks when calling this with res !=NULL * */ | |
497 | void dlm_kick_thread(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |
498 | { | |
6714d8e8 KH |
499 | if (res) { |
500 | spin_lock(&dlm->spinlock); | |
501 | spin_lock(&res->spinlock); | |
502 | __dlm_dirty_lockres(dlm, res); | |
503 | spin_unlock(&res->spinlock); | |
504 | spin_unlock(&dlm->spinlock); | |
505 | } | |
506 | wake_up(&dlm->dlm_thread_wq); | |
507 | } | |
508 | ||
509 | void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |
510 | { | |
6714d8e8 KH |
511 | assert_spin_locked(&dlm->spinlock); |
512 | assert_spin_locked(&res->spinlock); | |
513 | ||
514 | /* don't shuffle secondary queues */ | |
32c1b90d | 515 | if (res->owner == dlm->node_num) { |
ddc09c8d KH |
516 | if (res->state & (DLM_LOCK_RES_MIGRATING | |
517 | DLM_LOCK_RES_BLOCK_DIRTY)) | |
518 | return; | |
519 | ||
520 | if (list_empty(&res->dirty)) { | |
521 | /* ref for dirty_list */ | |
522 | dlm_lockres_get(res); | |
523 | list_add_tail(&res->dirty, &dlm->dirty_list); | |
524 | res->state |= DLM_LOCK_RES_DIRTY; | |
525 | } | |
6714d8e8 | 526 | } |
8e17d16f SM |
527 | |
528 | mlog(0, "%s: res %.*s\n", dlm->name, res->lockname.len, | |
529 | res->lockname.name); | |
6714d8e8 KH |
530 | } |
531 | ||
532 | ||
533 | /* Launch the NM thread for the mounted volume */ | |
534 | int dlm_launch_thread(struct dlm_ctxt *dlm) | |
535 | { | |
8e17d16f | 536 | mlog(0, "Starting dlm_thread...\n"); |
6714d8e8 | 537 | |
5afc44e2 JQ |
538 | dlm->dlm_thread_task = kthread_run(dlm_thread, dlm, "dlm-%s", |
539 | dlm->name); | |
6714d8e8 KH |
540 | if (IS_ERR(dlm->dlm_thread_task)) { |
541 | mlog_errno(PTR_ERR(dlm->dlm_thread_task)); | |
542 | dlm->dlm_thread_task = NULL; | |
543 | return -EINVAL; | |
544 | } | |
545 | ||
546 | return 0; | |
547 | } | |
548 | ||
549 | void dlm_complete_thread(struct dlm_ctxt *dlm) | |
550 | { | |
551 | if (dlm->dlm_thread_task) { | |
8e17d16f | 552 | mlog(ML_KTHREAD, "Waiting for dlm thread to exit\n"); |
6714d8e8 KH |
553 | kthread_stop(dlm->dlm_thread_task); |
554 | dlm->dlm_thread_task = NULL; | |
555 | } | |
556 | } | |
557 | ||
558 | static int dlm_dirty_list_empty(struct dlm_ctxt *dlm) | |
559 | { | |
560 | int empty; | |
561 | ||
562 | spin_lock(&dlm->spinlock); | |
563 | empty = list_empty(&dlm->dirty_list); | |
564 | spin_unlock(&dlm->spinlock); | |
565 | ||
566 | return empty; | |
567 | } | |
568 | ||
569 | static void dlm_flush_asts(struct dlm_ctxt *dlm) | |
570 | { | |
571 | int ret; | |
572 | struct dlm_lock *lock; | |
573 | struct dlm_lock_resource *res; | |
574 | u8 hi; | |
575 | ||
576 | spin_lock(&dlm->ast_lock); | |
577 | while (!list_empty(&dlm->pending_asts)) { | |
578 | lock = list_entry(dlm->pending_asts.next, | |
579 | struct dlm_lock, ast_list); | |
580 | /* get an extra ref on lock */ | |
581 | dlm_lock_get(lock); | |
582 | res = lock->lockres; | |
8e17d16f SM |
583 | mlog(0, "%s: res %.*s, Flush AST for lock %u:%llu, type %d, " |
584 | "node %u\n", dlm->name, res->lockname.len, | |
585 | res->lockname.name, | |
586 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | |
587 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | |
588 | lock->ml.type, lock->ml.node); | |
6714d8e8 KH |
589 | |
590 | BUG_ON(!lock->ast_pending); | |
591 | ||
592 | /* remove from list (including ref) */ | |
593 | list_del_init(&lock->ast_list); | |
594 | dlm_lock_put(lock); | |
595 | spin_unlock(&dlm->ast_lock); | |
596 | ||
597 | if (lock->ml.node != dlm->node_num) { | |
598 | ret = dlm_do_remote_ast(dlm, res, lock); | |
599 | if (ret < 0) | |
600 | mlog_errno(ret); | |
601 | } else | |
602 | dlm_do_local_ast(dlm, res, lock); | |
603 | ||
604 | spin_lock(&dlm->ast_lock); | |
605 | ||
606 | /* possible that another ast was queued while | |
607 | * we were delivering the last one */ | |
608 | if (!list_empty(&lock->ast_list)) { | |
8e17d16f SM |
609 | mlog(0, "%s: res %.*s, AST queued while flushing last " |
610 | "one\n", dlm->name, res->lockname.len, | |
611 | res->lockname.name); | |
6714d8e8 KH |
612 | } else |
613 | lock->ast_pending = 0; | |
614 | ||
615 | /* drop the extra ref. | |
616 | * this may drop it completely. */ | |
617 | dlm_lock_put(lock); | |
618 | dlm_lockres_release_ast(dlm, res); | |
619 | } | |
620 | ||
621 | while (!list_empty(&dlm->pending_basts)) { | |
622 | lock = list_entry(dlm->pending_basts.next, | |
623 | struct dlm_lock, bast_list); | |
624 | /* get an extra ref on lock */ | |
625 | dlm_lock_get(lock); | |
626 | res = lock->lockres; | |
627 | ||
628 | BUG_ON(!lock->bast_pending); | |
629 | ||
630 | /* get the highest blocked lock, and reset */ | |
631 | spin_lock(&lock->spinlock); | |
632 | BUG_ON(lock->ml.highest_blocked <= LKM_IVMODE); | |
633 | hi = lock->ml.highest_blocked; | |
634 | lock->ml.highest_blocked = LKM_IVMODE; | |
635 | spin_unlock(&lock->spinlock); | |
636 | ||
637 | /* remove from list (including ref) */ | |
638 | list_del_init(&lock->bast_list); | |
639 | dlm_lock_put(lock); | |
640 | spin_unlock(&dlm->ast_lock); | |
641 | ||
8e17d16f SM |
642 | mlog(0, "%s: res %.*s, Flush BAST for lock %u:%llu, " |
643 | "blocked %d, node %u\n", | |
644 | dlm->name, res->lockname.len, res->lockname.name, | |
645 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | |
646 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | |
647 | hi, lock->ml.node); | |
6714d8e8 KH |
648 | |
649 | if (lock->ml.node != dlm->node_num) { | |
650 | ret = dlm_send_proxy_bast(dlm, res, lock, hi); | |
651 | if (ret < 0) | |
652 | mlog_errno(ret); | |
653 | } else | |
654 | dlm_do_local_bast(dlm, res, lock, hi); | |
655 | ||
656 | spin_lock(&dlm->ast_lock); | |
657 | ||
658 | /* possible that another bast was queued while | |
659 | * we were delivering the last one */ | |
660 | if (!list_empty(&lock->bast_list)) { | |
8e17d16f SM |
661 | mlog(0, "%s: res %.*s, BAST queued while flushing last " |
662 | "one\n", dlm->name, res->lockname.len, | |
663 | res->lockname.name); | |
6714d8e8 KH |
664 | } else |
665 | lock->bast_pending = 0; | |
666 | ||
667 | /* drop the extra ref. | |
668 | * this may drop it completely. */ | |
669 | dlm_lock_put(lock); | |
670 | dlm_lockres_release_ast(dlm, res); | |
671 | } | |
672 | wake_up(&dlm->ast_wq); | |
673 | spin_unlock(&dlm->ast_lock); | |
674 | } | |
675 | ||
676 | ||
677 | #define DLM_THREAD_TIMEOUT_MS (4 * 1000) | |
678 | #define DLM_THREAD_MAX_DIRTY 100 | |
6714d8e8 KH |
679 | |
680 | static int dlm_thread(void *data) | |
681 | { | |
682 | struct dlm_lock_resource *res; | |
683 | struct dlm_ctxt *dlm = data; | |
684 | unsigned long timeout = msecs_to_jiffies(DLM_THREAD_TIMEOUT_MS); | |
685 | ||
686 | mlog(0, "dlm thread running for %s...\n", dlm->name); | |
687 | ||
688 | while (!kthread_should_stop()) { | |
689 | int n = DLM_THREAD_MAX_DIRTY; | |
690 | ||
691 | /* dlm_shutting_down is very point-in-time, but that | |
692 | * doesn't matter as we'll just loop back around if we | |
693 | * get false on the leading edge of a state | |
694 | * transition. */ | |
695 | dlm_run_purge_list(dlm, dlm_shutting_down(dlm)); | |
696 | ||
697 | /* We really don't want to hold dlm->spinlock while | |
698 | * calling dlm_shuffle_lists on each lockres that | |
699 | * needs to have its queues adjusted and AST/BASTs | |
700 | * run. So let's pull each entry off the dirty_list | |
701 | * and drop dlm->spinlock ASAP. Once off the list, | |
702 | * res->spinlock needs to be taken again to protect | |
703 | * the queues while calling dlm_shuffle_lists. */ | |
704 | spin_lock(&dlm->spinlock); | |
705 | while (!list_empty(&dlm->dirty_list)) { | |
706 | int delay = 0; | |
707 | res = list_entry(dlm->dirty_list.next, | |
708 | struct dlm_lock_resource, dirty); | |
709 | ||
710 | /* peel a lockres off, remove it from the list, | |
711 | * unset the dirty flag and drop the dlm lock */ | |
712 | BUG_ON(!res); | |
713 | dlm_lockres_get(res); | |
714 | ||
715 | spin_lock(&res->spinlock); | |
ddc09c8d | 716 | /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */ |
6714d8e8 KH |
717 | list_del_init(&res->dirty); |
718 | spin_unlock(&res->spinlock); | |
719 | spin_unlock(&dlm->spinlock); | |
6ff06a93 KH |
720 | /* Drop dirty_list ref */ |
721 | dlm_lockres_put(res); | |
6714d8e8 KH |
722 | |
723 | /* lockres can be re-dirtied/re-added to the | |
724 | * dirty_list in this gap, but that is ok */ | |
725 | ||
d9ef7522 | 726 | spin_lock(&dlm->ast_lock); |
6714d8e8 KH |
727 | spin_lock(&res->spinlock); |
728 | if (res->owner != dlm->node_num) { | |
729 | __dlm_print_one_lock_resource(res); | |
8e17d16f SM |
730 | mlog(ML_ERROR, "%s: inprog %d, mig %d, reco %d," |
731 | " dirty %d\n", dlm->name, | |
732 | !!(res->state & DLM_LOCK_RES_IN_PROGRESS), | |
733 | !!(res->state & DLM_LOCK_RES_MIGRATING), | |
734 | !!(res->state & DLM_LOCK_RES_RECOVERING), | |
735 | !!(res->state & DLM_LOCK_RES_DIRTY)); | |
6714d8e8 KH |
736 | } |
737 | BUG_ON(res->owner != dlm->node_num); | |
738 | ||
739 | /* it is now ok to move lockreses in these states | |
740 | * to the dirty list, assuming that they will only be | |
741 | * dirty for a short while. */ | |
ddc09c8d | 742 | BUG_ON(res->state & DLM_LOCK_RES_MIGRATING); |
6714d8e8 | 743 | if (res->state & (DLM_LOCK_RES_IN_PROGRESS | |
814ce694 JX |
744 | DLM_LOCK_RES_RECOVERING | |
745 | DLM_LOCK_RES_RECOVERY_WAITING)) { | |
6714d8e8 | 746 | /* move it to the tail and keep going */ |
ddc09c8d | 747 | res->state &= ~DLM_LOCK_RES_DIRTY; |
6714d8e8 | 748 | spin_unlock(&res->spinlock); |
d9ef7522 | 749 | spin_unlock(&dlm->ast_lock); |
8e17d16f SM |
750 | mlog(0, "%s: res %.*s, inprogress, delay list " |
751 | "shuffle, state %d\n", dlm->name, | |
6714d8e8 KH |
752 | res->lockname.len, res->lockname.name, |
753 | res->state); | |
754 | delay = 1; | |
755 | goto in_progress; | |
756 | } | |
757 | ||
758 | /* at this point the lockres is not migrating/ | |
759 | * recovering/in-progress. we have the lockres | |
760 | * spinlock and do NOT have the dlm lock. | |
761 | * safe to reserve/queue asts and run the lists. */ | |
762 | ||
6714d8e8 KH |
763 | /* called while holding lockres lock */ |
764 | dlm_shuffle_lists(dlm, res); | |
ddc09c8d | 765 | res->state &= ~DLM_LOCK_RES_DIRTY; |
6714d8e8 | 766 | spin_unlock(&res->spinlock); |
d9ef7522 | 767 | spin_unlock(&dlm->ast_lock); |
6714d8e8 KH |
768 | |
769 | dlm_lockres_calc_usage(dlm, res); | |
770 | ||
771 | in_progress: | |
772 | ||
773 | spin_lock(&dlm->spinlock); | |
774 | /* if the lock was in-progress, stick | |
775 | * it on the back of the list */ | |
776 | if (delay) { | |
777 | spin_lock(&res->spinlock); | |
ddc09c8d | 778 | __dlm_dirty_lockres(dlm, res); |
6714d8e8 KH |
779 | spin_unlock(&res->spinlock); |
780 | } | |
781 | dlm_lockres_put(res); | |
782 | ||
783 | /* unlikely, but we may need to give time to | |
784 | * other tasks */ | |
785 | if (!--n) { | |
8e17d16f SM |
786 | mlog(0, "%s: Throttling dlm thread\n", |
787 | dlm->name); | |
6714d8e8 KH |
788 | break; |
789 | } | |
790 | } | |
791 | ||
792 | spin_unlock(&dlm->spinlock); | |
793 | dlm_flush_asts(dlm); | |
794 | ||
795 | /* yield and continue right away if there is more work to do */ | |
796 | if (!n) { | |
f85cd47a | 797 | cond_resched(); |
6714d8e8 KH |
798 | continue; |
799 | } | |
800 | ||
801 | wait_event_interruptible_timeout(dlm->dlm_thread_wq, | |
802 | !dlm_dirty_list_empty(dlm) || | |
803 | kthread_should_stop(), | |
804 | timeout); | |
805 | } | |
806 | ||
807 | mlog(0, "quitting DLM thread\n"); | |
808 | return 0; | |
809 | } |