Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/ptlrpc/pinger.c | |
37 | * | |
38 | * Portal-RPC reconnection and replay operations, for use in recovery. | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_RPC | |
42 | ||
e27db149 GKH |
43 | #include "../include/obd_support.h" |
44 | #include "../include/obd_class.h" | |
d7e09d03 PT |
45 | #include "ptlrpc_internal.h" |
46 | ||
47 | static int suppress_pings; | |
8cc7b4b9 PT |
48 | module_param(suppress_pings, int, 0644); |
49 | MODULE_PARM_DESC(suppress_pings, "Suppress pings"); | |
d7e09d03 PT |
50 | |
51 | struct mutex pinger_mutex; | |
52 | static LIST_HEAD(pinger_imports); | |
53 | static struct list_head timeout_list = LIST_HEAD_INIT(timeout_list); | |
54 | ||
7d46a21a | 55 | int ptlrpc_pinger_suppress_pings(void) |
d7e09d03 PT |
56 | { |
57 | return suppress_pings; | |
58 | } | |
59 | EXPORT_SYMBOL(ptlrpc_pinger_suppress_pings); | |
60 | ||
61 | struct ptlrpc_request * | |
62 | ptlrpc_prep_ping(struct obd_import *imp) | |
63 | { | |
64 | struct ptlrpc_request *req; | |
65 | ||
66 | req = ptlrpc_request_alloc_pack(imp, &RQF_OBD_PING, | |
67 | LUSTRE_OBD_VERSION, OBD_PING); | |
68 | if (req) { | |
69 | ptlrpc_request_set_replen(req); | |
70 | req->rq_no_resend = req->rq_no_delay = 1; | |
71 | } | |
72 | return req; | |
73 | } | |
74 | ||
75 | int ptlrpc_obd_ping(struct obd_device *obd) | |
76 | { | |
77 | int rc; | |
78 | struct ptlrpc_request *req; | |
d7e09d03 PT |
79 | |
80 | req = ptlrpc_prep_ping(obd->u.cli.cl_import); | |
81 | if (req == NULL) | |
0a3bdb00 | 82 | return -ENOMEM; |
d7e09d03 PT |
83 | |
84 | req->rq_send_state = LUSTRE_IMP_FULL; | |
85 | ||
86 | rc = ptlrpc_queue_wait(req); | |
87 | ||
88 | ptlrpc_req_finished(req); | |
89 | ||
0a3bdb00 | 90 | return rc; |
d7e09d03 PT |
91 | } |
92 | EXPORT_SYMBOL(ptlrpc_obd_ping); | |
93 | ||
94 | int ptlrpc_ping(struct obd_import *imp) | |
95 | { | |
96 | struct ptlrpc_request *req; | |
d7e09d03 PT |
97 | |
98 | req = ptlrpc_prep_ping(imp); | |
99 | if (req == NULL) { | |
100 | CERROR("OOM trying to ping %s->%s\n", | |
101 | imp->imp_obd->obd_uuid.uuid, | |
102 | obd2cli_tgt(imp->imp_obd)); | |
0a3bdb00 | 103 | return -ENOMEM; |
d7e09d03 PT |
104 | } |
105 | ||
106 | DEBUG_REQ(D_INFO, req, "pinging %s->%s", | |
107 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
108 | ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1); | |
109 | ||
0a3bdb00 | 110 | return 0; |
d7e09d03 PT |
111 | } |
112 | ||
113 | void ptlrpc_update_next_ping(struct obd_import *imp, int soon) | |
114 | { | |
115 | int time = soon ? PING_INTERVAL_SHORT : PING_INTERVAL; | |
116 | if (imp->imp_state == LUSTRE_IMP_DISCON) { | |
117 | int dtime = max_t(int, CONNECTION_SWITCH_MIN, | |
118 | AT_OFF ? 0 : | |
119 | at_get(&imp->imp_at.iat_net_latency)); | |
120 | time = min(time, dtime); | |
121 | } | |
122 | imp->imp_next_ping = cfs_time_shift(time); | |
123 | } | |
124 | ||
125 | void ptlrpc_ping_import_soon(struct obd_import *imp) | |
126 | { | |
127 | imp->imp_next_ping = cfs_time_current(); | |
128 | } | |
129 | ||
130 | static inline int imp_is_deactive(struct obd_import *imp) | |
131 | { | |
132 | return (imp->imp_deactive || | |
133 | OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_IMP_DEACTIVE)); | |
134 | } | |
135 | ||
136 | static inline int ptlrpc_next_reconnect(struct obd_import *imp) | |
137 | { | |
138 | if (imp->imp_server_timeout) | |
139 | return cfs_time_shift(obd_timeout / 2); | |
140 | else | |
141 | return cfs_time_shift(obd_timeout); | |
142 | } | |
143 | ||
b2d201bd | 144 | long pinger_check_timeout(unsigned long time) |
d7e09d03 PT |
145 | { |
146 | struct timeout_item *item; | |
a649ad1d | 147 | unsigned long timeout = PING_INTERVAL; |
d7e09d03 PT |
148 | |
149 | /* The timeout list is a increase order sorted list */ | |
150 | mutex_lock(&pinger_mutex); | |
151 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
152 | int ti_timeout = item->ti_timeout; | |
153 | if (timeout > ti_timeout) | |
154 | timeout = ti_timeout; | |
155 | break; | |
156 | } | |
157 | mutex_unlock(&pinger_mutex); | |
158 | ||
159 | return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), | |
160 | cfs_time_current()); | |
161 | } | |
162 | ||
d7e09d03 PT |
163 | static bool ir_up; |
164 | ||
165 | void ptlrpc_pinger_ir_up(void) | |
166 | { | |
167 | CDEBUG(D_HA, "IR up\n"); | |
168 | ir_up = true; | |
169 | } | |
170 | EXPORT_SYMBOL(ptlrpc_pinger_ir_up); | |
171 | ||
172 | void ptlrpc_pinger_ir_down(void) | |
173 | { | |
174 | CDEBUG(D_HA, "IR down\n"); | |
175 | ir_up = false; | |
176 | } | |
177 | EXPORT_SYMBOL(ptlrpc_pinger_ir_down); | |
178 | ||
179 | static void ptlrpc_pinger_process_import(struct obd_import *imp, | |
180 | unsigned long this_ping) | |
181 | { | |
182 | int level; | |
183 | int force; | |
184 | int force_next; | |
185 | int suppress; | |
186 | ||
187 | spin_lock(&imp->imp_lock); | |
188 | ||
189 | level = imp->imp_state; | |
190 | force = imp->imp_force_verify; | |
191 | force_next = imp->imp_force_next_verify; | |
192 | /* | |
193 | * This will be used below only if the import is "FULL". | |
194 | */ | |
195 | suppress = ir_up && OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS); | |
196 | ||
197 | imp->imp_force_verify = 0; | |
198 | ||
199 | if (cfs_time_aftereq(imp->imp_next_ping - 5 * CFS_TICK, this_ping) && | |
200 | !force) { | |
201 | spin_unlock(&imp->imp_lock); | |
202 | return; | |
203 | } | |
204 | ||
205 | imp->imp_force_next_verify = 0; | |
206 | ||
207 | spin_unlock(&imp->imp_lock); | |
208 | ||
2d00bd17 | 209 | CDEBUG(level == LUSTRE_IMP_FULL ? D_INFO : D_HA, "%s->%s: level %s/%u force %u force_next %u deactive %u pingable %u suppress %u\n", |
d7e09d03 PT |
210 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), |
211 | ptlrpc_import_state_name(level), level, force, force_next, | |
212 | imp->imp_deactive, imp->imp_pingable, suppress); | |
213 | ||
214 | if (level == LUSTRE_IMP_DISCON && !imp_is_deactive(imp)) { | |
215 | /* wait for a while before trying recovery again */ | |
216 | imp->imp_next_ping = ptlrpc_next_reconnect(imp); | |
217 | if (!imp->imp_no_pinger_recover) | |
218 | ptlrpc_initiate_recovery(imp); | |
219 | } else if (level != LUSTRE_IMP_FULL || | |
220 | imp->imp_obd->obd_no_recov || | |
221 | imp_is_deactive(imp)) { | |
2d00bd17 | 222 | CDEBUG(D_HA, "%s->%s: not pinging (in recovery or recovery disabled: %s)\n", |
d7e09d03 PT |
223 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd), |
224 | ptlrpc_import_state_name(level)); | |
cca8fca1 AS |
225 | if (force) { |
226 | spin_lock(&imp->imp_lock); | |
227 | imp->imp_force_verify = 1; | |
228 | spin_unlock(&imp->imp_lock); | |
229 | } | |
d7e09d03 PT |
230 | } else if ((imp->imp_pingable && !suppress) || force_next || force) { |
231 | ptlrpc_ping(imp); | |
232 | } | |
233 | } | |
234 | ||
235 | static int ptlrpc_pinger_main(void *arg) | |
236 | { | |
237 | struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; | |
d7e09d03 PT |
238 | |
239 | /* Record that the thread is running */ | |
240 | thread_set_flags(thread, SVC_RUNNING); | |
241 | wake_up(&thread->t_ctl_waitq); | |
242 | ||
243 | /* And now, loop forever, pinging as needed. */ | |
244 | while (1) { | |
a649ad1d | 245 | unsigned long this_ping = cfs_time_current(); |
d7e09d03 | 246 | struct l_wait_info lwi; |
b2d201bd | 247 | long time_to_next_wake; |
d7e09d03 PT |
248 | struct timeout_item *item; |
249 | struct list_head *iter; | |
250 | ||
251 | mutex_lock(&pinger_mutex); | |
252 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
253 | item->ti_cb(item, item->ti_cb_data); | |
254 | } | |
255 | list_for_each(iter, &pinger_imports) { | |
256 | struct obd_import *imp = | |
257 | list_entry(iter, struct obd_import, | |
258 | imp_pinger_chain); | |
259 | ||
260 | ptlrpc_pinger_process_import(imp, this_ping); | |
261 | /* obd_timeout might have changed */ | |
262 | if (imp->imp_pingable && imp->imp_next_ping && | |
263 | cfs_time_after(imp->imp_next_ping, | |
264 | cfs_time_add(this_ping, | |
265 | cfs_time_seconds(PING_INTERVAL)))) | |
266 | ptlrpc_update_next_ping(imp, 0); | |
267 | } | |
268 | mutex_unlock(&pinger_mutex); | |
269 | /* update memory usage info */ | |
270 | obd_update_maxusage(); | |
271 | ||
272 | /* Wait until the next ping time, or until we're stopped. */ | |
273 | time_to_next_wake = pinger_check_timeout(this_ping); | |
274 | /* The ping sent by ptlrpc_send_rpc may get sent out | |
275 | say .01 second after this. | |
276 | ptlrpc_pinger_sending_on_import will then set the | |
277 | next ping time to next_ping + .01 sec, which means | |
278 | we will SKIP the next ping at next_ping, and the | |
279 | ping will get sent 2 timeouts from now! Beware. */ | |
280 | CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" (" | |
281 | CFS_TIME_T")\n", time_to_next_wake, | |
1d8cb70c GD |
282 | cfs_time_add(this_ping, |
283 | cfs_time_seconds(PING_INTERVAL))); | |
d7e09d03 | 284 | if (time_to_next_wake > 0) { |
b2d201bd | 285 | lwi = LWI_TIMEOUT(max_t(long, time_to_next_wake, |
d7e09d03 PT |
286 | cfs_time_seconds(1)), |
287 | NULL, NULL); | |
288 | l_wait_event(thread->t_ctl_waitq, | |
289 | thread_is_stopping(thread) || | |
290 | thread_is_event(thread), | |
291 | &lwi); | |
292 | if (thread_test_and_clear_flags(thread, SVC_STOPPING)) { | |
d7e09d03 PT |
293 | break; |
294 | } else { | |
295 | /* woken after adding import to reset timer */ | |
296 | thread_test_and_clear_flags(thread, SVC_EVENT); | |
297 | } | |
298 | } | |
299 | } | |
300 | ||
301 | thread_set_flags(thread, SVC_STOPPED); | |
302 | wake_up(&thread->t_ctl_waitq); | |
303 | ||
304 | CDEBUG(D_NET, "pinger thread exiting, process %d\n", current_pid()); | |
305 | return 0; | |
306 | } | |
307 | ||
20802057 | 308 | static struct ptlrpc_thread pinger_thread; |
d7e09d03 PT |
309 | |
310 | int ptlrpc_start_pinger(void) | |
311 | { | |
312 | struct l_wait_info lwi = { 0 }; | |
313 | int rc; | |
d7e09d03 | 314 | |
20802057 DE |
315 | if (!thread_is_init(&pinger_thread) && |
316 | !thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 317 | return -EALREADY; |
d7e09d03 | 318 | |
20802057 | 319 | init_waitqueue_head(&pinger_thread.t_ctl_waitq); |
d7e09d03 | 320 | |
20802057 | 321 | strcpy(pinger_thread.t_name, "ll_ping"); |
d7e09d03 PT |
322 | |
323 | /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we | |
324 | * just drop the VM and FILES in cfs_daemonize_ctxt() right away. */ | |
9edf0f67 KC |
325 | rc = PTR_ERR(kthread_run(ptlrpc_pinger_main, &pinger_thread, |
326 | "%s", pinger_thread.t_name)); | |
d7e09d03 PT |
327 | if (IS_ERR_VALUE(rc)) { |
328 | CERROR("cannot start thread: %d\n", rc); | |
0a3bdb00 | 329 | return rc; |
d7e09d03 | 330 | } |
20802057 DE |
331 | l_wait_event(pinger_thread.t_ctl_waitq, |
332 | thread_is_running(&pinger_thread), &lwi); | |
d7e09d03 PT |
333 | |
334 | if (suppress_pings) | |
2d00bd17 | 335 | CWARN("Pings will be suppressed at the request of the administrator. The configuration shall meet the additional requirements described in the manual. (Search for the \"suppress_pings\" kernel module parameter.)\n"); |
d7e09d03 | 336 | |
0a3bdb00 | 337 | return 0; |
d7e09d03 PT |
338 | } |
339 | ||
340 | int ptlrpc_pinger_remove_timeouts(void); | |
341 | ||
342 | int ptlrpc_stop_pinger(void) | |
343 | { | |
344 | struct l_wait_info lwi = { 0 }; | |
345 | int rc = 0; | |
d7e09d03 | 346 | |
b39f15c9 PT |
347 | if (thread_is_init(&pinger_thread) || |
348 | thread_is_stopped(&pinger_thread)) | |
0a3bdb00 | 349 | return -EALREADY; |
d7e09d03 PT |
350 | |
351 | ptlrpc_pinger_remove_timeouts(); | |
20802057 DE |
352 | thread_set_flags(&pinger_thread, SVC_STOPPING); |
353 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 | 354 | |
20802057 DE |
355 | l_wait_event(pinger_thread.t_ctl_waitq, |
356 | thread_is_stopped(&pinger_thread), &lwi); | |
d7e09d03 | 357 | |
0a3bdb00 | 358 | return rc; |
d7e09d03 PT |
359 | } |
360 | ||
361 | void ptlrpc_pinger_sending_on_import(struct obd_import *imp) | |
362 | { | |
363 | ptlrpc_update_next_ping(imp, 0); | |
364 | } | |
365 | EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); | |
366 | ||
367 | void ptlrpc_pinger_commit_expected(struct obd_import *imp) | |
368 | { | |
369 | ptlrpc_update_next_ping(imp, 1); | |
5e42bc9d | 370 | assert_spin_locked(&imp->imp_lock); |
d7e09d03 PT |
371 | /* |
372 | * Avoid reading stale imp_connect_data. When not sure if pings are | |
373 | * expected or not on next connection, we assume they are not and force | |
374 | * one anyway to guarantee the chance of updating | |
375 | * imp_peer_committed_transno. | |
376 | */ | |
377 | if (imp->imp_state != LUSTRE_IMP_FULL || | |
378 | OCD_HAS_FLAG(&imp->imp_connect_data, PINGLESS)) | |
379 | imp->imp_force_next_verify = 1; | |
380 | } | |
381 | ||
382 | int ptlrpc_pinger_add_import(struct obd_import *imp) | |
383 | { | |
d7e09d03 | 384 | if (!list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 385 | return -EALREADY; |
d7e09d03 PT |
386 | |
387 | mutex_lock(&pinger_mutex); | |
388 | CDEBUG(D_HA, "adding pingable import %s->%s\n", | |
389 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
390 | /* if we add to pinger we want recovery on this import */ | |
391 | imp->imp_obd->obd_no_recov = 0; | |
392 | ptlrpc_update_next_ping(imp, 0); | |
393 | /* XXX sort, blah blah */ | |
394 | list_add_tail(&imp->imp_pinger_chain, &pinger_imports); | |
395 | class_import_get(imp); | |
396 | ||
397 | ptlrpc_pinger_wake_up(); | |
398 | mutex_unlock(&pinger_mutex); | |
399 | ||
0a3bdb00 | 400 | return 0; |
d7e09d03 PT |
401 | } |
402 | EXPORT_SYMBOL(ptlrpc_pinger_add_import); | |
403 | ||
404 | int ptlrpc_pinger_del_import(struct obd_import *imp) | |
405 | { | |
d7e09d03 | 406 | if (list_empty(&imp->imp_pinger_chain)) |
0a3bdb00 | 407 | return -ENOENT; |
d7e09d03 PT |
408 | |
409 | mutex_lock(&pinger_mutex); | |
410 | list_del_init(&imp->imp_pinger_chain); | |
411 | CDEBUG(D_HA, "removing pingable import %s->%s\n", | |
412 | imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); | |
413 | /* if we remove from pinger we don't want recovery on this import */ | |
414 | imp->imp_obd->obd_no_recov = 1; | |
415 | class_import_put(imp); | |
416 | mutex_unlock(&pinger_mutex); | |
0a3bdb00 | 417 | return 0; |
d7e09d03 PT |
418 | } |
419 | EXPORT_SYMBOL(ptlrpc_pinger_del_import); | |
420 | ||
421 | /** | |
422 | * Register a timeout callback to the pinger list, and the callback will | |
423 | * be called when timeout happens. | |
424 | */ | |
9c234f6c | 425 | struct timeout_item *ptlrpc_new_timeout(int time, enum timeout_event event, |
d7e09d03 PT |
426 | timeout_cb_t cb, void *data) |
427 | { | |
428 | struct timeout_item *ti; | |
429 | ||
9ae10597 | 430 | ti = kzalloc(sizeof(*ti), GFP_NOFS); |
d7e09d03 | 431 | if (!ti) |
fbe7c6c7 | 432 | return NULL; |
d7e09d03 PT |
433 | |
434 | INIT_LIST_HEAD(&ti->ti_obd_list); | |
435 | INIT_LIST_HEAD(&ti->ti_chain); | |
436 | ti->ti_timeout = time; | |
437 | ti->ti_event = event; | |
438 | ti->ti_cb = cb; | |
439 | ti->ti_cb_data = data; | |
440 | ||
441 | return ti; | |
442 | } | |
443 | ||
444 | /** | |
fa4d19c4 | 445 | * Register timeout event on the pinger thread. |
d7e09d03 PT |
446 | * Note: the timeout list is an sorted list with increased timeout value. |
447 | */ | |
448 | static struct timeout_item* | |
449 | ptlrpc_pinger_register_timeout(int time, enum timeout_event event, | |
450 | timeout_cb_t cb, void *data) | |
451 | { | |
452 | struct timeout_item *item, *tmp; | |
453 | ||
454 | LASSERT(mutex_is_locked(&pinger_mutex)); | |
455 | ||
456 | list_for_each_entry(item, &timeout_list, ti_chain) | |
457 | if (item->ti_event == event) | |
458 | goto out; | |
459 | ||
460 | item = ptlrpc_new_timeout(time, event, cb, data); | |
461 | if (item) { | |
462 | list_for_each_entry_reverse(tmp, &timeout_list, ti_chain) { | |
463 | if (tmp->ti_timeout < time) { | |
464 | list_add(&item->ti_chain, &tmp->ti_chain); | |
465 | goto out; | |
466 | } | |
467 | } | |
468 | list_add(&item->ti_chain, &timeout_list); | |
469 | } | |
470 | out: | |
471 | return item; | |
472 | } | |
473 | ||
474 | /* Add a client_obd to the timeout event list, when timeout(@time) | |
475 | * happens, the callback(@cb) will be called. | |
476 | */ | |
477 | int ptlrpc_add_timeout_client(int time, enum timeout_event event, | |
478 | timeout_cb_t cb, void *data, | |
479 | struct list_head *obd_list) | |
480 | { | |
481 | struct timeout_item *ti; | |
482 | ||
483 | mutex_lock(&pinger_mutex); | |
484 | ti = ptlrpc_pinger_register_timeout(time, event, cb, data); | |
485 | if (!ti) { | |
486 | mutex_unlock(&pinger_mutex); | |
fbe7c6c7 | 487 | return -EINVAL; |
d7e09d03 PT |
488 | } |
489 | list_add(obd_list, &ti->ti_obd_list); | |
490 | mutex_unlock(&pinger_mutex); | |
491 | return 0; | |
492 | } | |
493 | EXPORT_SYMBOL(ptlrpc_add_timeout_client); | |
494 | ||
495 | int ptlrpc_del_timeout_client(struct list_head *obd_list, | |
496 | enum timeout_event event) | |
497 | { | |
498 | struct timeout_item *ti = NULL, *item; | |
499 | ||
500 | if (list_empty(obd_list)) | |
501 | return 0; | |
502 | mutex_lock(&pinger_mutex); | |
503 | list_del_init(obd_list); | |
504 | /** | |
505 | * If there are no obd attached to the timeout event | |
506 | * list, remove this timeout event from the pinger | |
507 | */ | |
508 | list_for_each_entry(item, &timeout_list, ti_chain) { | |
509 | if (item->ti_event == event) { | |
510 | ti = item; | |
511 | break; | |
512 | } | |
513 | } | |
998d2766 | 514 | LASSERTF(ti != NULL, "ti is NULL !\n"); |
d7e09d03 PT |
515 | if (list_empty(&ti->ti_obd_list)) { |
516 | list_del(&ti->ti_chain); | |
9ae10597 | 517 | kfree(ti); |
d7e09d03 PT |
518 | } |
519 | mutex_unlock(&pinger_mutex); | |
520 | return 0; | |
521 | } | |
522 | EXPORT_SYMBOL(ptlrpc_del_timeout_client); | |
523 | ||
524 | int ptlrpc_pinger_remove_timeouts(void) | |
525 | { | |
526 | struct timeout_item *item, *tmp; | |
527 | ||
528 | mutex_lock(&pinger_mutex); | |
529 | list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { | |
530 | LASSERT(list_empty(&item->ti_obd_list)); | |
531 | list_del(&item->ti_chain); | |
9ae10597 | 532 | kfree(item); |
d7e09d03 PT |
533 | } |
534 | mutex_unlock(&pinger_mutex); | |
535 | return 0; | |
536 | } | |
537 | ||
7d46a21a | 538 | void ptlrpc_pinger_wake_up(void) |
d7e09d03 | 539 | { |
20802057 DE |
540 | thread_add_flags(&pinger_thread, SVC_EVENT); |
541 | wake_up(&pinger_thread.t_ctl_waitq); | |
d7e09d03 PT |
542 | } |
543 | ||
544 | /* Ping evictor thread */ | |
545 | #define PET_READY 1 | |
546 | #define PET_TERMINATE 2 | |
547 | ||
225f597c | 548 | static int pet_refcount; |
d7e09d03 PT |
549 | static int pet_state; |
550 | static wait_queue_head_t pet_waitq; | |
551 | LIST_HEAD(pet_list); | |
552 | static DEFINE_SPINLOCK(pet_lock); | |
553 | ||
554 | int ping_evictor_wake(struct obd_export *exp) | |
555 | { | |
556 | struct obd_device *obd; | |
557 | ||
558 | spin_lock(&pet_lock); | |
559 | if (pet_state != PET_READY) { | |
560 | /* eventually the new obd will call here again. */ | |
561 | spin_unlock(&pet_lock); | |
562 | return 1; | |
563 | } | |
564 | ||
565 | obd = class_exp2obd(exp); | |
566 | if (list_empty(&obd->obd_evict_list)) { | |
567 | class_incref(obd, "evictor", obd); | |
568 | list_add(&obd->obd_evict_list, &pet_list); | |
569 | } | |
570 | spin_unlock(&pet_lock); | |
571 | ||
572 | wake_up(&pet_waitq); | |
573 | return 0; | |
574 | } | |
575 | ||
576 | static int ping_evictor_main(void *arg) | |
577 | { | |
578 | struct obd_device *obd; | |
579 | struct obd_export *exp; | |
580 | struct l_wait_info lwi = { 0 }; | |
581 | time_t expire_time; | |
d7e09d03 PT |
582 | |
583 | unshare_fs_struct(); | |
584 | ||
585 | CDEBUG(D_HA, "Starting Ping Evictor\n"); | |
586 | pet_state = PET_READY; | |
587 | while (1) { | |
588 | l_wait_event(pet_waitq, (!list_empty(&pet_list)) || | |
589 | (pet_state == PET_TERMINATE), &lwi); | |
590 | ||
591 | /* loop until all obd's will be removed */ | |
592 | if ((pet_state == PET_TERMINATE) && list_empty(&pet_list)) | |
593 | break; | |
594 | ||
595 | /* we only get here if pet_exp != NULL, and the end of this | |
596 | * loop is the only place which sets it NULL again, so lock | |
597 | * is not strictly necessary. */ | |
598 | spin_lock(&pet_lock); | |
599 | obd = list_entry(pet_list.next, struct obd_device, | |
600 | obd_evict_list); | |
601 | spin_unlock(&pet_lock); | |
602 | ||
7264b8a5 | 603 | expire_time = get_seconds() - PING_EVICT_TIMEOUT; |
d7e09d03 PT |
604 | |
605 | CDEBUG(D_HA, "evicting all exports of obd %s older than %ld\n", | |
606 | obd->obd_name, expire_time); | |
607 | ||
608 | /* Exports can't be deleted out of the list while we hold | |
609 | * the obd lock (class_unlink_export), which means we can't | |
610 | * lose the last ref on the export. If they've already been | |
611 | * removed from the list, we won't find them here. */ | |
612 | spin_lock(&obd->obd_dev_lock); | |
613 | while (!list_empty(&obd->obd_exports_timed)) { | |
614 | exp = list_entry(obd->obd_exports_timed.next, | |
615 | struct obd_export, | |
616 | exp_obd_chain_timed); | |
617 | if (expire_time > exp->exp_last_request_time) { | |
618 | class_export_get(exp); | |
619 | spin_unlock(&obd->obd_dev_lock); | |
2d00bd17 | 620 | LCONSOLE_WARN("%s: haven't heard from client %s (at %s) in %ld seconds. I think it's dead, and I am evicting it. exp %p, cur %ld expire %ld last %ld\n", |
d7e09d03 PT |
621 | obd->obd_name, |
622 | obd_uuid2str(&exp->exp_client_uuid), | |
623 | obd_export_nid2str(exp), | |
7264b8a5 | 624 | (long)(get_seconds() - |
d7e09d03 | 625 | exp->exp_last_request_time), |
7264b8a5 | 626 | exp, (long)get_seconds(), |
d7e09d03 PT |
627 | (long)expire_time, |
628 | (long)exp->exp_last_request_time); | |
629 | CDEBUG(D_HA, "Last request was at %ld\n", | |
630 | exp->exp_last_request_time); | |
631 | class_fail_export(exp); | |
632 | class_export_put(exp); | |
633 | spin_lock(&obd->obd_dev_lock); | |
634 | } else { | |
635 | /* List is sorted, so everyone below is ok */ | |
636 | break; | |
637 | } | |
638 | } | |
639 | spin_unlock(&obd->obd_dev_lock); | |
640 | ||
641 | spin_lock(&pet_lock); | |
642 | list_del_init(&obd->obd_evict_list); | |
643 | spin_unlock(&pet_lock); | |
644 | ||
645 | class_decref(obd, "evictor", obd); | |
646 | } | |
647 | CDEBUG(D_HA, "Exiting Ping Evictor\n"); | |
648 | ||
0a3bdb00 | 649 | return 0; |
d7e09d03 PT |
650 | } |
651 | ||
652 | void ping_evictor_start(void) | |
653 | { | |
68b636b6 | 654 | struct task_struct *task; |
d7e09d03 PT |
655 | |
656 | if (++pet_refcount > 1) | |
657 | return; | |
658 | ||
659 | init_waitqueue_head(&pet_waitq); | |
660 | ||
661 | task = kthread_run(ping_evictor_main, NULL, "ll_evictor"); | |
662 | if (IS_ERR(task)) { | |
663 | pet_refcount--; | |
664 | CERROR("Cannot start ping evictor thread: %ld\n", | |
665 | PTR_ERR(task)); | |
666 | } | |
667 | } | |
668 | EXPORT_SYMBOL(ping_evictor_start); | |
669 | ||
670 | void ping_evictor_stop(void) | |
671 | { | |
672 | if (--pet_refcount > 0) | |
673 | return; | |
674 | ||
675 | pet_state = PET_TERMINATE; | |
676 | wake_up(&pet_waitq); | |
677 | } | |
678 | EXPORT_SYMBOL(ping_evictor_stop); |