Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/ptlrpc/pinger.c | |
37 | * | |
38 | * Portal-RPC reconnection and replay operations, for use in recovery. | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_RPC | |
42 | ||
43 | #include <obd_support.h> | |
44 | #include <obd_class.h> | |
45 | #include "ptlrpc_internal.h" | |
46 | ||
47 | static int suppress_pings; | |
48 | CFS_MODULE_PARM(suppress_pings, "i", int, 0644, "Suppress pings"); | |
49 | ||
50 | struct mutex pinger_mutex; | |
51 | static LIST_HEAD(pinger_imports); | |
52 | static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list); | |
53 | ||
7d46a21a | 54 | int ptlrpc_pinger_suppress_pings(void) |
d7e09d03 PT |
55 | { |
56 | return suppress_pings; | |
57 | } | |
58 | EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings); | |
59 | ||
60 | struct ptlrpc_request * | |
61 | ptlrpc_prep_ping(struct obd_import *imp) | |
62 | { | |
63 | struct ptlrpc_request *req; | |
64 | ||
65 | req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, | |
66 | LUSTRE_OBD_VERSION, OBD_PING); | |
67 | if (req) { | |
68 | ptlrpc_request_set_replen(req); | |
69 | req->rq_no_resend = req->rq_no_delay = 1; | |
70 | } | |
71 | return req; | |
72 | } | |
73 | ||
74 | int ptlrpc_obd_ping(struct obd_device *obd) | |
75 | { | |
76 | int rc; | |
77 | struct ptlrpc_request *req; | |
d7e09d03 PT |
78 | |
79 | req = ptlrpc_prep_ping(obd->u.cli.cl_import); | |
80 | if (req == NULL) | |
0a3bdb00 | 81 | return -ENOMEM; |
d7e09d03 PT |
82 | |
83 | req->rq_send_state = LUSTRE_IMP_FULL; | |
84 | ||
85 | rc = ptlrpc_queue_wait(req); | |
86 | ||
87 | ptlrpc_req_finished(req); | |
88 | ||
0a3bdb00 | 89 | return rc; |
d7e09d03 PT |
90 | } |
91 | EXPORT_SYMBOL(ptlrpc_obd_ping); | |
92 | ||
93 | int ptlrpc_ping(struct obd_import *imp) | |
94 | { | |
95 | struct ptlrpc_request *req; | |
d7e09d03 PT |
96 | |
97 | req = ptlrpc_prep_ping(imp); | |
98 | if (req == NULL) { | |
99 | CERROR("OOM trying to ping %s->%s\n", | |
100 | imp->imp_obd->obd_uuid.uuid, | |
101 | obd2cli_tgt(imp->imp_obd)); | |
0a3bdb00 | 102 | return -ENOMEM; |
d7e09d03 PT |
103 | } |
104 | ||
105 | DEBUG_REQ(D_INFO, req, "pinging %s->%s", | |
106 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
107 | ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); | |
108 | ||
0a3bdb00 | 109 | return 0; |
d7e09d03 PT |
110 | } |
111 | ||
112 | void ptlrpc_update_next_ping(struct obd_import *imp, int soon) | |
113 | { | |
114 | int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL; | |
115 | if (imp->imp_state == LUSTRE_IMP_DISCON) { | |
116 | int dtime = max_t(int, CONNECTION_SWITCH_MIN, | |
117 | AT_OFF ? 0 : | |
118 | at_get(&imp->imp_at.iat_net_latency)); | |
119 | time = min(time, dtime); | |
120 | } | |
121 | imp->imp_next_ping = cfs_time_shift(time); | |
122 | } | |
123 | ||
124 | void ptlrpc_ping_import_soon(struct obd_import *imp) | |
125 | { | |
126 | imp->imp_next_ping = cfs_time_current(); | |
127 | } | |
128 | ||
129 | static inline int imp_is_deactive(struct obd_import *imp) | |
130 | { | |
131 | return (imp->imp_deactive || | |
132 | OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE)); | |
133 | } | |
134 | ||
135 | static inline int ptlrpc_next_reconnect(struct obd_import *imp) | |
136 | { | |
137 | if (imp->imp_server_timeout) | |
138 | return cfs_time_shift(obd_timeout / 2); | |
139 | else | |
140 | return cfs_time_shift(obd_timeout); | |
141 | } | |
142 | ||
143 | static atomic_t suspend_timeouts = ATOMIC_INIT(0); | |
144 | static cfs_time_t suspend_wakeup_time = 0; | |
145 | ||
146 | cfs_duration_t pinger_check_timeout(cfs_time_t time) | |
147 | { | |
148 | struct timeout_item *item; | |
149 | cfs_time_t timeout = PING_INTERVAL; | |
150 | ||
151 | /* The timeout list is a increase order sorted list */ | |
152 | mutex_lock(&pinger_mutex); | |
153 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
154 | int ti_timeout = item->ti_timeout; | |
155 | if (timeout > ti_timeout) | |
156 | timeout = ti_timeout; | |
157 | break; | |
158 | } | |
159 | mutex_unlock(&pinger_mutex); | |
160 | ||
161 | return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), | |
162 | cfs_time_current()); | |
163 | } | |
164 | ||
165 | static wait_queue_head_t suspend_timeouts_waitq; | |
166 | ||
167 | cfs_time_t ptlrpc_suspend_wakeup_time(void) | |
168 | { | |
169 | return suspend_wakeup_time; | |
170 | } | |
171 | ||
172 | void ptlrpc_deactivate_timeouts(struct obd_import *imp) | |
173 | { | |
174 | /*XXX: disabled for now, will be replaced by adaptive timeouts */ | |
175 | #if 0 | |
176 | if (imp->imp_no_timeout) | |
177 | return; | |
178 | imp->imp_no_timeout = 1; | |
179 | atomic_inc(&suspend_timeouts); | |
180 | CDEBUG(D_HA|D_WARNING, "deactivate timeouts %u\n", | |
181 | atomic_read(&suspend_timeouts)); | |
182 | #endif | |
183 | } | |
184 | ||
185 | void ptlrpc_activate_timeouts(struct obd_import *imp) | |
186 | { | |
187 | /*XXX: disabled for now, will be replaced by adaptive timeouts */ | |
188 | #if 0 | |
189 | if (!imp->imp_no_timeout) | |
190 | return; | |
191 | imp->imp_no_timeout = 0; | |
192 | LASSERT(atomic_read(&suspend_timeouts) > 0); | |
193 | if (atomic_dec_and_test(&suspend_timeouts)) { | |
194 | suspend_wakeup_time = cfs_time_current(); | |
195 | wake_up(&suspend_timeouts_waitq); | |
196 | } | |
197 | CDEBUG(D_HA|D_WARNING, "activate timeouts %u\n", | |
198 | atomic_read(&suspend_timeouts)); | |
199 | #endif | |
200 | } | |
201 | ||
202 | int ptlrpc_check_suspend(void) | |
203 | { | |
204 | if (atomic_read(&suspend_timeouts)) | |
205 | return 1; | |
206 | return 0; | |
207 | } | |
208 | ||
209 | int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req) | |
210 | { | |
211 | struct l_wait_info lwi; | |
212 | ||
213 | if (atomic_read(&suspend_timeouts)) { | |
214 | DEBUG_REQ(D_NET, req, "-- suspend %d regular timeout", | |
215 | atomic_read(&suspend_timeouts)); | |
216 | lwi = LWI_INTR(NULL, NULL); | |
217 | l_wait_event(suspend_timeouts_waitq, | |
218 | atomic_read(&suspend_timeouts) == 0, &lwi); | |
219 | DEBUG_REQ(D_NET, req, "-- recharge regular timeout"); | |
220 | return 1; | |
221 | } | |
222 | return 0; | |
223 | } | |
224 | ||
225 | ||
226 | static bool ir_up; | |
227 | ||
228 | void ptlrpc_pinger_ir_up(void) | |
229 | { | |
230 | CDEBUG(D_HA, "IR up\n"); | |
231 | ir_up = true; | |
232 | } | |
233 | EXPORT_SYMBOL(ptlrpc_pinger_ir_up); | |
234 | ||
235 | void ptlrpc_pinger_ir_down(void) | |
236 | { | |
237 | CDEBUG(D_HA, "IR down\n"); | |
238 | ir_up = false; | |
239 | } | |
240 | EXPORT_SYMBOL(ptlrpc_pinger_ir_down); | |
241 | ||
242 | static void ptlrpc_pinger_process_import(struct obd_import *imp, | |
243 | unsigned long this_ping) | |
244 | { | |
245 | int level; | |
246 | int force; | |
247 | int force_next; | |
248 | int suppress; | |
249 | ||
250 | spin_lock(&imp->imp_lock); | |
251 | ||
252 | level = imp->imp_state; | |
253 | force = imp->imp_force_verify; | |
254 | force_next = imp->imp_force_next_verify; | |
255 | /* | |
256 | * This will be used below only if the import is "FULL". | |
257 | */ | |
258 | suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS); | |
259 | ||
260 | imp->imp_force_verify = 0; | |
261 | ||
262 | if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) && | |
263 | !force) { | |
264 | spin_unlock(&imp->imp_lock); | |
265 | return; | |
266 | } | |
267 | ||
268 | imp->imp_force_next_verify = 0; | |
269 | ||
270 | spin_unlock(&imp->imp_lock); | |
271 | ||
272 | CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u " | |
273 | "force %u force_next %u deactive %u pingable %u suppress %u\n", | |
274 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), | |
275 | ptlrpc_import_state_name(level), level, force, force_next, | |
276 | imp->imp_deactive, imp->imp_pingable, suppress); | |
277 | ||
278 | if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { | |
279 | /* wait for a while before trying recovery again */ | |
280 | imp->imp_next_ping = ptlrpc_next_reconnect(imp); | |
281 | if (!imp->imp_no_pinger_recover) | |
282 | ptlrpc_initiate_recovery(imp); | |
283 | } else if (level != LUSTRE_IMP_FULL || | |
284 | imp->imp_obd->obd_no_recov || | |
285 | imp_is_deactive(imp)) { | |
286 | CDEBUG(D_HA, "%s->%s: not pinging (in recovery " | |
287 | "or recovery disabled: %s)\n", | |
288 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), | |
289 | ptlrpc_import_state_name(level)); | |
290 | } else if ((imp->imp_pingable && !suppress) || force_next || force) { | |
291 | ptlrpc_ping(imp); | |
292 | } | |
293 | } | |
294 | ||
295 | static int ptlrpc_pinger_main(void *arg) | |
296 | { | |
297 | struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; | |
d7e09d03 PT |
298 | |
299 | /* Record that the thread is running */ | |
300 | thread_set_flags(thread, SVC_RUNNING); | |
301 | wake_up(&thread->t_ctl_waitq); | |
302 | ||
303 | /* And now, loop forever, pinging as needed. */ | |
304 | while (1) { | |
305 | cfs_time_t this_ping = cfs_time_current(); | |
306 | struct l_wait_info lwi; | |
307 | cfs_duration_t time_to_next_wake; | |
308 | struct timeout_item *item; | |
309 | struct list_head *iter; | |
310 | ||
311 | mutex_lock(&pinger_mutex); | |
312 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
313 | item->ti_cb(item, item->ti_cb_data); | |
314 | } | |
315 | list_for_each(iter, &pinger_imports) { | |
316 | struct obd_import *imp = | |
317 | list_entry(iter, struct obd_import, | |
318 | imp_pinger_chain); | |
319 | ||
320 | ptlrpc_pinger_process_import(imp, this_ping); | |
321 | /* obd_timeout might have changed */ | |
322 | if (imp->imp_pingable && imp->imp_next_ping && | |
323 | cfs_time_after(imp->imp_next_ping, | |
324 | cfs_time_add(this_ping, | |
325 | cfs_time_seconds(PING_INTERVAL)))) | |
326 | ptlrpc_update_next_ping(imp, 0); | |
327 | } | |
328 | mutex_unlock(&pinger_mutex); | |
329 | /* update memory usage info */ | |
330 | obd_update_maxusage(); | |
331 | ||
332 | /* Wait until the next ping time, or until we're stopped. */ | |
333 | time_to_next_wake = pinger_check_timeout(this_ping); | |
334 | /* The ping sent by ptlrpc_send_rpc may get sent out | |
335 | say .01 second after this. | |
336 | ptlrpc_pinger_sending_on_import will then set the | |
337 | next ping time to next_ping + .01 sec, which means | |
338 | we will SKIP the next ping at next_ping, and the | |
339 | ping will get sent 2 timeouts from now! Beware. */ | |
340 | CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" (" | |
341 | CFS_TIME_T")\n", time_to_next_wake, | |
342 | cfs_time_add(this_ping,cfs_time_seconds(PING_INTERVAL))); | |
343 | if (time_to_next_wake > 0) { | |
344 | lwi = LWI_TIMEOUT(max_t(cfs_duration_t, | |
345 | time_to_next_wake, | |
346 | cfs_time_seconds(1)), | |
347 | NULL, NULL); | |
348 | l_wait_event(thread->t_ctl_waitq, | |
349 | thread_is_stopping(thread) || | |
350 | thread_is_event(thread), | |
351 | &lwi); | |
352 | if (thread_test_and_clear_flags(thread, SVC_STOPPING)) { | |
d7e09d03 PT |
353 | break; |
354 | } else { | |
355 | /* woken after adding import to reset timer */ | |
356 | thread_test_and_clear_flags(thread, SVC_EVENT); | |
357 | } | |
358 | } | |
359 | } | |
360 | ||
361 | thread_set_flags(thread, SVC_STOPPED); | |
362 | wake_up(&thread->t_ctl_waitq); | |
363 | ||
364 | CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid()); | |
365 | return 0; | |
366 | } | |
367 | ||
20802057 | 368 | static struct ptlrpc_thread pinger_thread; |
d7e09d03 PT |
369 | |
370 | int ptlrpc_start_pinger(void) | |
371 | { | |
372 | struct l_wait_info lwi = { 0 }; | |
373 | int rc; | |
d7e09d03 | 374 | |
20802057 DE |
375 | if (!thread_is_init(&pinger_thread) && |
376 | !thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 377 | return -EALREADY; |
d7e09d03 | 378 | |
20802057 | 379 | init_waitqueue_head(&pinger_thread.t_ctl_waitq); |
d7e09d03 PT |
380 | init_waitqueue_head(&suspend_timeouts_waitq); |
381 | ||
20802057 | 382 | strcpy(pinger_thread.t_name, "ll_ping"); |
d7e09d03 PT |
383 | |
384 | /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we | |
385 | * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */ | |
386 | rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, | |
20802057 | 387 | &pinger_thread, pinger_thread.t_name)); |
d7e09d03 PT |
388 | if (IS_ERR_VALUE(rc)) { |
389 | CERROR("cannot start thread: %d\n", rc); | |
0a3bdb00 | 390 | return rc; |
d7e09d03 | 391 | } |
20802057 DE |
392 | l_wait_event(pinger_thread.t_ctl_waitq, |
393 | thread_is_running(&pinger_thread), &lwi); | |
d7e09d03 PT |
394 | |
395 | if (suppress_pings) | |
396 | CWARN("Pings will be suppressed at the request of the " | |
397 | "administrator. The configuration shall meet the " | |
398 | "additional requirements described in the manual. " | |
399 | "(Search for the \"suppress_pings\" kernel module " | |
400 | "parameter.)\n"); | |
401 | ||
0a3bdb00 | 402 | return 0; |
d7e09d03 PT |
403 | } |
404 | ||
405 | int ptlrpc_pinger_remove_timeouts(void); | |
406 | ||
407 | int ptlrpc_stop_pinger(void) | |
408 | { | |
409 | struct l_wait_info lwi = { 0 }; | |
410 | int rc = 0; | |
d7e09d03 | 411 | |
20802057 DE |
412 | if (!thread_is_init(&pinger_thread) && |
413 | !thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 414 | return -EALREADY; |
d7e09d03 PT |
415 | |
416 | ptlrpc_pinger_remove_timeouts(); | |
20802057 DE |
417 | thread_set_flags(&pinger_thread, SVC_STOPPING); |
418 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 | 419 | |
20802057 DE |
420 | l_wait_event(pinger_thread.t_ctl_waitq, |
421 | thread_is_stopped(&pinger_thread), &lwi); | |
d7e09d03 | 422 | |
0a3bdb00 | 423 | return rc; |
d7e09d03 PT |
424 | } |
425 | ||
426 | void ptlrpc_pinger_sending_on_import(struct obd_import *imp) | |
427 | { | |
428 | ptlrpc_update_next_ping(imp, 0); | |
429 | } | |
430 | EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); | |
431 | ||
432 | void ptlrpc_pinger_commit_expected(struct obd_import *imp) | |
433 | { | |
434 | ptlrpc_update_next_ping(imp, 1); | |
435 | LASSERT(spin_is_locked(&imp->imp_lock)); | |
436 | /* | |
437 | * Avoid reading stale imp_connect_data. When not sure if pings are | |
438 | * expected or not on next connection, we assume they are not and force | |
439 | * one anyway to guarantee the chance of updating | |
440 | * imp_peer_committed_transno. | |
441 | */ | |
442 | if (imp->imp_state != LUSTRE_IMP_FULL || | |
443 | OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS)) | |
444 | imp->imp_force_next_verify = 1; | |
445 | } | |
446 | ||
447 | int ptlrpc_pinger_add_import(struct obd_import *imp) | |
448 | { | |
d7e09d03 | 449 | if (!list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 450 | return -EALREADY; |
d7e09d03 PT |
451 | |
452 | mutex_lock(&pinger_mutex); | |
453 | CDEBUG(D_HA, "adding pingable import %s->%s\n", | |
454 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
455 | /* if we add to pinger we want recovery on this import */ | |
456 | imp->imp_obd->obd_no_recov = 0; | |
457 | ptlrpc_update_next_ping(imp, 0); | |
458 | /* XXX sort, blah blah */ | |
459 | list_add_tail(&imp->imp_pinger_chain, &pinger_imports); | |
460 | class_import_get(imp); | |
461 | ||
462 | ptlrpc_pinger_wake_up(); | |
463 | mutex_unlock(&pinger_mutex); | |
464 | ||
0a3bdb00 | 465 | return 0; |
d7e09d03 PT |
466 | } |
467 | EXPORT_SYMBOL(ptlrpc_pinger_add_import); | |
468 | ||
469 | int ptlrpc_pinger_del_import(struct obd_import *imp) | |
470 | { | |
d7e09d03 | 471 | if (list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 472 | return -ENOENT; |
d7e09d03 PT |
473 | |
474 | mutex_lock(&pinger_mutex); | |
475 | list_del_init(&imp->imp_pinger_chain); | |
476 | CDEBUG(D_HA, "removing pingable import %s->%s\n", | |
477 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
478 | /* if we remove from pinger we don't want recovery on this import */ | |
479 | imp->imp_obd->obd_no_recov = 1; | |
480 | class_import_put(imp); | |
481 | mutex_unlock(&pinger_mutex); | |
0a3bdb00 | 482 | return 0; |
d7e09d03 PT |
483 | } |
484 | EXPORT_SYMBOL(ptlrpc_pinger_del_import); | |
485 | ||
486 | /** | |
487 | * Register a timeout callback to the pinger list, and the callback will | |
488 | * be called when timeout happens. | |
489 | */ | |
490 | struct timeout_item* ptlrpc_new_timeout(int time, enum timeout_event event, | |
491 | timeout_cb_t cb, void *data) | |
492 | { | |
493 | struct timeout_item *ti; | |
494 | ||
495 | OBD_ALLOC_PTR(ti); | |
496 | if (!ti) | |
497 | return(NULL); | |
498 | ||
499 | INIT_LIST_HEAD(&ti->ti_obd_list); | |
500 | INIT_LIST_HEAD(&ti->ti_chain); | |
501 | ti->ti_timeout = time; | |
502 | ti->ti_event = event; | |
503 | ti->ti_cb = cb; | |
504 | ti->ti_cb_data = data; | |
505 | ||
506 | return ti; | |
507 | } | |
508 | ||
509 | /** | |
510 | * Register timeout event on the the pinger thread. | |
511 | * Note: the timeout list is an sorted list with increased timeout value. | |
512 | */ | |
513 | static struct timeout_item* | |
514 | ptlrpc_pinger_register_timeout(int time, enum timeout_event event, | |
515 | timeout_cb_t cb, void *data) | |
516 | { | |
517 | struct timeout_item *item, *tmp; | |
518 | ||
519 | LASSERT(mutex_is_locked(&pinger_mutex)); | |
520 | ||
521 | list_for_each_entry(item, &timeout_list, ti_chain) | |
522 | if (item->ti_event == event) | |
523 | goto out; | |
524 | ||
525 | item = ptlrpc_new_timeout(time, event, cb, data); | |
526 | if (item) { | |
527 | list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) { | |
528 | if (tmp->ti_timeout < time) { | |
529 | list_add(&item->ti_chain, &tmp->ti_chain); | |
530 | goto out; | |
531 | } | |
532 | } | |
533 | list_add(&item->ti_chain, &timeout_list); | |
534 | } | |
535 | out: | |
536 | return item; | |
537 | } | |
538 | ||
539 | /* Add a client_obd to the timeout event list, when timeout(@time) | |
540 | * happens, the callback(@cb) will be called. | |
541 | */ | |
542 | int ptlrpc_add_timeout_client(int time, enum timeout_event event, | |
543 | timeout_cb_t cb, void *data, | |
544 | struct list_head *obd_list) | |
545 | { | |
546 | struct timeout_item *ti; | |
547 | ||
548 | mutex_lock(&pinger_mutex); | |
549 | ti = ptlrpc_pinger_register_timeout(time, event, cb, data); | |
550 | if (!ti) { | |
551 | mutex_unlock(&pinger_mutex); | |
552 | return (-EINVAL); | |
553 | } | |
554 | list_add(obd_list, &ti->ti_obd_list); | |
555 | mutex_unlock(&pinger_mutex); | |
556 | return 0; | |
557 | } | |
558 | EXPORT_SYMBOL(ptlrpc_add_timeout_client); | |
559 | ||
560 | int ptlrpc_del_timeout_client(struct list_head *obd_list, | |
561 | enum timeout_event event) | |
562 | { | |
563 | struct timeout_item *ti = NULL, *item; | |
564 | ||
565 | if (list_empty(obd_list)) | |
566 | return 0; | |
567 | mutex_lock(&pinger_mutex); | |
568 | list_del_init(obd_list); | |
569 | /** | |
570 | * If there are no obd attached to the timeout event | |
571 | * list, remove this timeout event from the pinger | |
572 | */ | |
573 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
574 | if (item->ti_event == event) { | |
575 | ti = item; | |
576 | break; | |
577 | } | |
578 | } | |
579 | LASSERTF(ti != NULL, "ti is NULL ! \n"); | |
580 | if (list_empty(&ti->ti_obd_list)) { | |
581 | list_del(&ti->ti_chain); | |
582 | OBD_FREE_PTR(ti); | |
583 | } | |
584 | mutex_unlock(&pinger_mutex); | |
585 | return 0; | |
586 | } | |
587 | EXPORT_SYMBOL(ptlrpc_del_timeout_client); | |
588 | ||
589 | int ptlrpc_pinger_remove_timeouts(void) | |
590 | { | |
591 | struct timeout_item *item, *tmp; | |
592 | ||
593 | mutex_lock(&pinger_mutex); | |
594 | list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { | |
595 | LASSERT(list_empty(&item->ti_obd_list)); | |
596 | list_del(&item->ti_chain); | |
597 | OBD_FREE_PTR(item); | |
598 | } | |
599 | mutex_unlock(&pinger_mutex); | |
600 | return 0; | |
601 | } | |
602 | ||
7d46a21a | 603 | void ptlrpc_pinger_wake_up(void) |
d7e09d03 | 604 | { |
20802057 DE |
605 | thread_add_flags(&pinger_thread, SVC_EVENT); |
606 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 PT |
607 | } |
608 | ||
609 | /* Ping evictor thread */ | |
610 | #define PET_READY 1 | |
611 | #define PET_TERMINATE 2 | |
612 | ||
613 | static int pet_refcount = 0; | |
614 | static int pet_state; | |
615 | static wait_queue_head_t pet_waitq; | |
616 | LIST_HEAD(pet_list); | |
617 | static DEFINE_SPINLOCK(pet_lock); | |
618 | ||
619 | int ping_evictor_wake(struct obd_export *exp) | |
620 | { | |
621 | struct obd_device *obd; | |
622 | ||
623 | spin_lock(&pet_lock); | |
624 | if (pet_state != PET_READY) { | |
625 | /* eventually the new obd will call here again. */ | |
626 | spin_unlock(&pet_lock); | |
627 | return 1; | |
628 | } | |
629 | ||
630 | obd = class_exp2obd(exp); | |
631 | if (list_empty(&obd->obd_evict_list)) { | |
632 | class_incref(obd, "evictor", obd); | |
633 | list_add(&obd->obd_evict_list, &pet_list); | |
634 | } | |
635 | spin_unlock(&pet_lock); | |
636 | ||
637 | wake_up(&pet_waitq); | |
638 | return 0; | |
639 | } | |
640 | ||
641 | static int ping_evictor_main(void *arg) | |
642 | { | |
643 | struct obd_device *obd; | |
644 | struct obd_export *exp; | |
645 | struct l_wait_info lwi = { 0 }; | |
646 | time_t expire_time; | |
d7e09d03 PT |
647 | |
648 | unshare_fs_struct(); | |
649 | ||
650 | CDEBUG(D_HA, "Starting Ping Evictor\n"); | |
651 | pet_state = PET_READY; | |
652 | while (1) { | |
653 | l_wait_event(pet_waitq, (!list_empty(&pet_list)) || | |
654 | (pet_state == PET_TERMINATE), &lwi); | |
655 | ||
656 | /* loop until all obd's will be removed */ | |
657 | if ((pet_state == PET_TERMINATE) && list_empty(&pet_list)) | |
658 | break; | |
659 | ||
660 | /* we only get here if pet_exp != NULL, and the end of this | |
661 | * loop is the only place which sets it NULL again, so lock | |
662 | * is not strictly necessary. */ | |
663 | spin_lock(&pet_lock); | |
664 | obd = list_entry(pet_list.next, struct obd_device, | |
665 | obd_evict_list); | |
666 | spin_unlock(&pet_lock); | |
667 | ||
668 | expire_time = cfs_time_current_sec() - PING_EVICT_TIMEOUT; | |
669 | ||
670 | CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n", | |
671 | obd->obd_name, expire_time); | |
672 | ||
673 | /* Exports can't be deleted out of the list while we hold | |
674 | * the obd lock (class_unlink_export), which means we can't | |
675 | * lose the last ref on the export. If they've already been | |
676 | * removed from the list, we won't find them here. */ | |
677 | spin_lock(&obd->obd_dev_lock); | |
678 | while (!list_empty(&obd->obd_exports_timed)) { | |
679 | exp = list_entry(obd->obd_exports_timed.next, | |
680 | struct obd_export, | |
681 | exp_obd_chain_timed); | |
682 | if (expire_time > exp->exp_last_request_time) { | |
683 | class_export_get(exp); | |
684 | spin_unlock(&obd->obd_dev_lock); | |
685 | LCONSOLE_WARN("%s: haven't heard from client %s" | |
686 | " (at %s) in %ld seconds. I think" | |
687 | " it's dead, and I am evicting" | |
688 | " it. exp %p, cur %ld expire %ld" | |
689 | " last %ld\n", | |
690 | obd->obd_name, | |
691 | obd_uuid2str(&exp->exp_client_uuid), | |
692 | obd_export_nid2str(exp), | |
693 | (long)(cfs_time_current_sec() - | |
694 | exp->exp_last_request_time), | |
695 | exp, (long)cfs_time_current_sec(), | |
696 | (long)expire_time, | |
697 | (long)exp->exp_last_request_time); | |
698 | CDEBUG(D_HA, "Last request was at %ld\n", | |
699 | exp->exp_last_request_time); | |
700 | class_fail_export(exp); | |
701 | class_export_put(exp); | |
702 | spin_lock(&obd->obd_dev_lock); | |
703 | } else { | |
704 | /* List is sorted, so everyone below is ok */ | |
705 | break; | |
706 | } | |
707 | } | |
708 | spin_unlock(&obd->obd_dev_lock); | |
709 | ||
710 | spin_lock(&pet_lock); | |
711 | list_del_init(&obd->obd_evict_list); | |
712 | spin_unlock(&pet_lock); | |
713 | ||
714 | class_decref(obd, "evictor", obd); | |
715 | } | |
716 | CDEBUG(D_HA, "Exiting Ping Evictor\n"); | |
717 | ||
0a3bdb00 | 718 | return 0; |
d7e09d03 PT |
719 | } |
720 | ||
721 | void ping_evictor_start(void) | |
722 | { | |
68b636b6 | 723 | struct task_struct *task; |
d7e09d03 PT |
724 | |
725 | if (++pet_refcount > 1) | |
726 | return; | |
727 | ||
728 | init_waitqueue_head(&pet_waitq); | |
729 | ||
730 | task = kthread_run(ping_evictor_main, NULL, "ll_evictor"); | |
731 | if (IS_ERR(task)) { | |
732 | pet_refcount--; | |
733 | CERROR("Cannot start ping evictor thread: %ld\n", | |
734 | PTR_ERR(task)); | |
735 | } | |
736 | } | |
737 | EXPORT_SYMBOL(ping_evictor_start); | |
738 | ||
739 | void ping_evictor_stop(void) | |
740 | { | |
741 | if (--pet_refcount > 0) | |
742 | return; | |
743 | ||
744 | pet_state = PET_TERMINATE; | |
745 | wake_up(&pet_waitq); | |
746 | } | |
747 | EXPORT_SYMBOL(ping_evictor_stop); |