hugetlbfs: fix races and page leaks during migration
[linux-2.6-block.git] / fs / autofs / expire.c
CommitLineData
ebc921ca
IK
1/*
2 * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
3 * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
4 * Copyright 2001-2006 Ian Kent <raven@themaw.net>
5 *
6 * This file is part of the Linux kernel and is made available under
7 * the terms of the GNU General Public License, version 2, or at your
8 * option, any later version, incorporated herein by reference.
9 */
10
11#include "autofs_i.h"
12
ebc921ca
IK
13/* Check if a dentry can be expired */
14static inline int autofs_can_expire(struct dentry *dentry,
e5c85e1f 15 unsigned long timeout, unsigned int how)
ebc921ca
IK
16{
17 struct autofs_info *ino = autofs_dentry_ino(dentry);
18
19 /* dentry in the process of being deleted */
20 if (ino == NULL)
21 return 0;
22
e5c85e1f 23 if (!(how & AUTOFS_EXP_IMMEDIATE)) {
ebc921ca 24 /* Too young to die */
2fd9944f 25 if (!timeout || time_after(ino->last_used + timeout, jiffies))
ebc921ca
IK
26 return 0;
27 }
28 return 1;
29}
30
31/* Check a mount point for busyness */
cbf6898f
IK
32static int autofs_mount_busy(struct vfsmount *mnt,
33 struct dentry *dentry, unsigned int how)
ebc921ca
IK
34{
35 struct dentry *top = dentry;
36 struct path path = {.mnt = mnt, .dentry = dentry};
37 int status = 1;
38
39 pr_debug("dentry %p %pd\n", dentry, dentry);
40
41 path_get(&path);
42
43 if (!follow_down_one(&path))
44 goto done;
45
46 if (is_autofs_dentry(path.dentry)) {
47 struct autofs_sb_info *sbi = autofs_sbi(path.dentry->d_sb);
48
49 /* This is an autofs submount, we can't expire it */
50 if (autofs_type_indirect(sbi->type))
51 goto done;
52 }
53
cbf6898f
IK
54 /* Not a submount, has a forced expire been requested */
55 if (how & AUTOFS_EXP_FORCED) {
56 status = 0;
57 goto done;
58 }
59
ebc921ca
IK
60 /* Update the expiry counter if fs is busy */
61 if (!may_umount_tree(path.mnt)) {
62 struct autofs_info *ino;
63
64 ino = autofs_dentry_ino(top);
65 ino->last_used = jiffies;
66 goto done;
67 }
68
69 status = 0;
70done:
71 pr_debug("returning = %d\n", status);
72 path_put(&path);
73 return status;
74}
75
76/*
77 * Calculate and dget next entry in the subdirs list under root.
78 */
79static struct dentry *get_next_positive_subdir(struct dentry *prev,
80 struct dentry *root)
81{
82 struct autofs_sb_info *sbi = autofs_sbi(root->d_sb);
83 struct list_head *next;
84 struct dentry *q;
85
86 spin_lock(&sbi->lookup_lock);
87 spin_lock(&root->d_lock);
88
89 if (prev)
90 next = prev->d_child.next;
91 else {
92 prev = dget_dlock(root);
93 next = prev->d_subdirs.next;
94 }
95
96cont:
97 if (next == &root->d_subdirs) {
98 spin_unlock(&root->d_lock);
99 spin_unlock(&sbi->lookup_lock);
100 dput(prev);
101 return NULL;
102 }
103
104 q = list_entry(next, struct dentry, d_child);
105
106 spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
107 /* Already gone or negative dentry (under construction) - try next */
108 if (!d_count(q) || !simple_positive(q)) {
109 spin_unlock(&q->d_lock);
110 next = q->d_child.next;
111 goto cont;
112 }
113 dget_dlock(q);
114 spin_unlock(&q->d_lock);
115 spin_unlock(&root->d_lock);
116 spin_unlock(&sbi->lookup_lock);
117
118 dput(prev);
119
120 return q;
121}
122
123/*
124 * Calculate and dget next entry in top down tree traversal.
125 */
126static struct dentry *get_next_positive_dentry(struct dentry *prev,
127 struct dentry *root)
128{
129 struct autofs_sb_info *sbi = autofs_sbi(root->d_sb);
130 struct list_head *next;
131 struct dentry *p, *ret;
132
133 if (prev == NULL)
134 return dget(root);
135
136 spin_lock(&sbi->lookup_lock);
137relock:
138 p = prev;
139 spin_lock(&p->d_lock);
140again:
141 next = p->d_subdirs.next;
142 if (next == &p->d_subdirs) {
143 while (1) {
144 struct dentry *parent;
145
146 if (p == root) {
147 spin_unlock(&p->d_lock);
148 spin_unlock(&sbi->lookup_lock);
149 dput(prev);
150 return NULL;
151 }
152
153 parent = p->d_parent;
154 if (!spin_trylock(&parent->d_lock)) {
155 spin_unlock(&p->d_lock);
156 cpu_relax();
157 goto relock;
158 }
159 spin_unlock(&p->d_lock);
160 next = p->d_child.next;
161 p = parent;
162 if (next != &parent->d_subdirs)
163 break;
164 }
165 }
166 ret = list_entry(next, struct dentry, d_child);
167
168 spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
169 /* Negative dentry - try next */
170 if (!simple_positive(ret)) {
171 spin_unlock(&p->d_lock);
172 lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
173 p = ret;
174 goto again;
175 }
176 dget_dlock(ret);
177 spin_unlock(&ret->d_lock);
178 spin_unlock(&p->d_lock);
179 spin_unlock(&sbi->lookup_lock);
180
181 dput(prev);
182
183 return ret;
184}
185
186/*
187 * Check a direct mount point for busyness.
188 * Direct mounts have similar expiry semantics to tree mounts.
189 * The tree is not busy iff no mountpoints are busy and there are no
190 * autofs submounts.
191 */
192static int autofs_direct_busy(struct vfsmount *mnt,
193 struct dentry *top,
194 unsigned long timeout,
e5c85e1f 195 unsigned int how)
ebc921ca
IK
196{
197 pr_debug("top %p %pd\n", top, top);
198
cbf6898f
IK
199 /* Forced expire, user space handles busy mounts */
200 if (how & AUTOFS_EXP_FORCED)
201 return 0;
202
ebc921ca
IK
203 /* If it's busy update the expiry counters */
204 if (!may_umount_tree(mnt)) {
205 struct autofs_info *ino;
206
207 ino = autofs_dentry_ino(top);
208 if (ino)
209 ino->last_used = jiffies;
210 return 1;
211 }
212
213 /* Timeout of a direct mount is determined by its top dentry */
e5c85e1f 214 if (!autofs_can_expire(top, timeout, how))
ebc921ca
IK
215 return 1;
216
217 return 0;
218}
219
220/*
221 * Check a directory tree of mount points for busyness
222 * The tree is not busy iff no mountpoints are busy
223 */
224static int autofs_tree_busy(struct vfsmount *mnt,
225 struct dentry *top,
226 unsigned long timeout,
e5c85e1f 227 unsigned int how)
ebc921ca
IK
228{
229 struct autofs_info *top_ino = autofs_dentry_ino(top);
230 struct dentry *p;
231
232 pr_debug("top %p %pd\n", top, top);
233
234 /* Negative dentry - give up */
235 if (!simple_positive(top))
236 return 1;
237
238 p = NULL;
239 while ((p = get_next_positive_dentry(p, top))) {
240 pr_debug("dentry %p %pd\n", p, p);
241
242 /*
243 * Is someone visiting anywhere in the subtree ?
244 * If there's no mount we need to check the usage
245 * count for the autofs dentry.
246 * If the fs is busy update the expiry counter.
247 */
248 if (d_mountpoint(p)) {
cbf6898f 249 if (autofs_mount_busy(mnt, p, how)) {
ebc921ca
IK
250 top_ino->last_used = jiffies;
251 dput(p);
252 return 1;
253 }
254 } else {
255 struct autofs_info *ino = autofs_dentry_ino(p);
256 unsigned int ino_count = atomic_read(&ino->count);
257
258 /* allow for dget above and top is already dgot */
259 if (p == top)
260 ino_count += 2;
261 else
262 ino_count++;
263
264 if (d_count(p) > ino_count) {
265 top_ino->last_used = jiffies;
266 dput(p);
267 return 1;
268 }
269 }
270 }
271
cbf6898f
IK
272 /* Forced expire, user space handles busy mounts */
273 if (how & AUTOFS_EXP_FORCED)
274 return 0;
275
ebc921ca 276 /* Timeout of a tree mount is ultimately determined by its top dentry */
e5c85e1f 277 if (!autofs_can_expire(top, timeout, how))
ebc921ca
IK
278 return 1;
279
280 return 0;
281}
282
283static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
284 struct dentry *parent,
285 unsigned long timeout,
e5c85e1f 286 unsigned int how)
ebc921ca
IK
287{
288 struct dentry *p;
289
290 pr_debug("parent %p %pd\n", parent, parent);
291
292 p = NULL;
293 while ((p = get_next_positive_dentry(p, parent))) {
294 pr_debug("dentry %p %pd\n", p, p);
295
296 if (d_mountpoint(p)) {
297 /* Can we umount this guy */
cbf6898f 298 if (autofs_mount_busy(mnt, p, how))
ebc921ca
IK
299 continue;
300
cbf6898f
IK
301 /* This isn't a submount so if a forced expire
302 * has been requested, user space handles busy
303 * mounts */
304 if (how & AUTOFS_EXP_FORCED)
305 return p;
306
ebc921ca 307 /* Can we expire this guy */
e5c85e1f 308 if (autofs_can_expire(p, timeout, how))
ebc921ca
IK
309 return p;
310 }
311 }
312 return NULL;
313}
314
315/* Check if we can expire a direct mount (possibly a tree) */
5d30517d
IK
316static struct dentry *autofs_expire_direct(struct super_block *sb,
317 struct vfsmount *mnt,
318 struct autofs_sb_info *sbi,
e5c85e1f 319 unsigned int how)
ebc921ca 320{
ebc921ca 321 struct dentry *root = dget(sb->s_root);
ebc921ca 322 struct autofs_info *ino;
e5c85e1f 323 unsigned long timeout;
ebc921ca
IK
324
325 if (!root)
326 return NULL;
327
ebc921ca
IK
328 timeout = sbi->exp_timeout;
329
e5c85e1f 330 if (!autofs_direct_busy(mnt, root, timeout, how)) {
ebc921ca
IK
331 spin_lock(&sbi->fs_lock);
332 ino = autofs_dentry_ino(root);
333 /* No point expiring a pending mount */
334 if (ino->flags & AUTOFS_INF_PENDING) {
335 spin_unlock(&sbi->fs_lock);
336 goto out;
337 }
338 ino->flags |= AUTOFS_INF_WANT_EXPIRE;
339 spin_unlock(&sbi->fs_lock);
340 synchronize_rcu();
e5c85e1f 341 if (!autofs_direct_busy(mnt, root, timeout, how)) {
ebc921ca
IK
342 spin_lock(&sbi->fs_lock);
343 ino->flags |= AUTOFS_INF_EXPIRING;
344 init_completion(&ino->expire_complete);
345 spin_unlock(&sbi->fs_lock);
346 return root;
347 }
348 spin_lock(&sbi->fs_lock);
349 ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
350 spin_unlock(&sbi->fs_lock);
351 }
352out:
353 dput(root);
354
355 return NULL;
356}
357
358/* Check if 'dentry' should expire, or return a nearby
359 * dentry that is suitable.
360 * If returned dentry is different from arg dentry,
361 * then a dget() reference was taken, else not.
362 */
363static struct dentry *should_expire(struct dentry *dentry,
364 struct vfsmount *mnt,
365 unsigned long timeout,
e5c85e1f 366 unsigned int how)
ebc921ca 367{
ebc921ca
IK
368 struct autofs_info *ino = autofs_dentry_ino(dentry);
369 unsigned int ino_count;
370
371 /* No point expiring a pending mount */
372 if (ino->flags & AUTOFS_INF_PENDING)
373 return NULL;
374
375 /*
376 * Case 1: (i) indirect mount or top level pseudo direct mount
377 * (autofs-4.1).
378 * (ii) indirect mount with offset mount, check the "/"
379 * offset (autofs-5.0+).
380 */
381 if (d_mountpoint(dentry)) {
382 pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
383
384 /* Can we umount this guy */
cbf6898f 385 if (autofs_mount_busy(mnt, dentry, how))
ebc921ca
IK
386 return NULL;
387
cbf6898f
IK
388 /* This isn't a submount so if a forced expire
389 * has been requested, user space handles busy
390 * mounts */
391 if (how & AUTOFS_EXP_FORCED)
392 return dentry;
393
ebc921ca 394 /* Can we expire this guy */
e5c85e1f 395 if (autofs_can_expire(dentry, timeout, how))
ebc921ca
IK
396 return dentry;
397 return NULL;
398 }
399
400 if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
401 pr_debug("checking symlink %p %pd\n", dentry, dentry);
cbf6898f
IK
402
403 /* Forced expire, user space handles busy mounts */
404 if (how & AUTOFS_EXP_FORCED)
405 return dentry;
406
ebc921ca
IK
407 /*
408 * A symlink can't be "busy" in the usual sense so
409 * just check last used for expire timeout.
410 */
e5c85e1f 411 if (autofs_can_expire(dentry, timeout, how))
ebc921ca
IK
412 return dentry;
413 return NULL;
414 }
415
416 if (simple_empty(dentry))
417 return NULL;
418
419 /* Case 2: tree mount, expire iff entire tree is not busy */
e5c85e1f 420 if (!(how & AUTOFS_EXP_LEAVES)) {
cbf6898f
IK
421 /* Not a forced expire? */
422 if (!(how & AUTOFS_EXP_FORCED)) {
423 /* ref-walk currently on this dentry? */
424 ino_count = atomic_read(&ino->count) + 1;
425 if (d_count(dentry) > ino_count)
426 return NULL;
427 }
ebc921ca 428
e5c85e1f 429 if (!autofs_tree_busy(mnt, dentry, timeout, how))
ebc921ca
IK
430 return dentry;
431 /*
432 * Case 3: pseudo direct mount, expire individual leaves
433 * (autofs-4.1).
434 */
435 } else {
ebc921ca
IK
436 struct dentry *expired;
437
cbf6898f
IK
438 /* Not a forced expire? */
439 if (!(how & AUTOFS_EXP_FORCED)) {
440 /* ref-walk currently on this dentry? */
441 ino_count = atomic_read(&ino->count) + 1;
442 if (d_count(dentry) > ino_count)
443 return NULL;
444 }
ebc921ca 445
e5c85e1f 446 expired = autofs_check_leaves(mnt, dentry, timeout, how);
ebc921ca
IK
447 if (expired) {
448 if (expired == dentry)
449 dput(dentry);
450 return expired;
451 }
452 }
453 return NULL;
454}
455
456/*
457 * Find an eligible tree to time-out
458 * A tree is eligible if :-
459 * - it is unused by any user process
460 * - it has been unused for exp_timeout time
461 */
571bc35c
IK
462static struct dentry *autofs_expire_indirect(struct super_block *sb,
463 struct vfsmount *mnt,
464 struct autofs_sb_info *sbi,
e5c85e1f 465 unsigned int how)
ebc921ca
IK
466{
467 unsigned long timeout;
468 struct dentry *root = sb->s_root;
469 struct dentry *dentry;
470 struct dentry *expired;
471 struct dentry *found;
472 struct autofs_info *ino;
473
474 if (!root)
475 return NULL;
476
ebc921ca
IK
477 timeout = sbi->exp_timeout;
478
479 dentry = NULL;
480 while ((dentry = get_next_positive_subdir(dentry, root))) {
ebc921ca
IK
481 spin_lock(&sbi->fs_lock);
482 ino = autofs_dentry_ino(dentry);
483 if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
484 spin_unlock(&sbi->fs_lock);
485 continue;
486 }
487 spin_unlock(&sbi->fs_lock);
488
e5c85e1f 489 expired = should_expire(dentry, mnt, timeout, how);
ebc921ca
IK
490 if (!expired)
491 continue;
492
493 spin_lock(&sbi->fs_lock);
494 ino = autofs_dentry_ino(expired);
495 ino->flags |= AUTOFS_INF_WANT_EXPIRE;
496 spin_unlock(&sbi->fs_lock);
497 synchronize_rcu();
498
499 /* Make sure a reference is not taken on found if
500 * things have changed.
501 */
e5c85e1f
IK
502 how &= ~AUTOFS_EXP_LEAVES;
503 found = should_expire(expired, mnt, timeout, how);
ebc921ca
IK
504 if (!found || found != expired)
505 /* Something has changed, continue */
506 goto next;
507
508 if (expired != dentry)
509 dput(dentry);
510
511 spin_lock(&sbi->fs_lock);
512 goto found;
513next:
514 spin_lock(&sbi->fs_lock);
515 ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
516 spin_unlock(&sbi->fs_lock);
517 if (expired != dentry)
518 dput(expired);
519 }
520 return NULL;
521
522found:
523 pr_debug("returning %p %pd\n", expired, expired);
524 ino->flags |= AUTOFS_INF_EXPIRING;
525 init_completion(&ino->expire_complete);
526 spin_unlock(&sbi->fs_lock);
527 return expired;
528}
529
530int autofs_expire_wait(const struct path *path, int rcu_walk)
531{
532 struct dentry *dentry = path->dentry;
533 struct autofs_sb_info *sbi = autofs_sbi(dentry->d_sb);
534 struct autofs_info *ino = autofs_dentry_ino(dentry);
535 int status;
536 int state;
537
538 /* Block on any pending expire */
539 if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
540 return 0;
541 if (rcu_walk)
542 return -ECHILD;
543
544retry:
545 spin_lock(&sbi->fs_lock);
546 state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
547 if (state == AUTOFS_INF_WANT_EXPIRE) {
548 spin_unlock(&sbi->fs_lock);
549 /*
550 * Possibly being selected for expire, wait until
551 * it's selected or not.
552 */
553 schedule_timeout_uninterruptible(HZ/10);
554 goto retry;
555 }
556 if (state & AUTOFS_INF_EXPIRING) {
557 spin_unlock(&sbi->fs_lock);
558
559 pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
560
561 status = autofs_wait(sbi, path, NFY_NONE);
562 wait_for_completion(&ino->expire_complete);
563
564 pr_debug("expire done status=%d\n", status);
565
566 if (d_unhashed(dentry))
567 return -EAGAIN;
568
569 return status;
570 }
571 spin_unlock(&sbi->fs_lock);
572
573 return 0;
574}
575
576/* Perform an expiry operation */
577int autofs_expire_run(struct super_block *sb,
578 struct vfsmount *mnt,
579 struct autofs_sb_info *sbi,
580 struct autofs_packet_expire __user *pkt_p)
581{
582 struct autofs_packet_expire pkt;
583 struct autofs_info *ino;
584 struct dentry *dentry;
585 int ret = 0;
586
587 memset(&pkt, 0, sizeof(pkt));
588
589 pkt.hdr.proto_version = sbi->version;
590 pkt.hdr.type = autofs_ptype_expire;
591
592 dentry = autofs_expire_indirect(sb, mnt, sbi, 0);
593 if (!dentry)
594 return -EAGAIN;
595
596 pkt.len = dentry->d_name.len;
597 memcpy(pkt.name, dentry->d_name.name, pkt.len);
598 pkt.name[pkt.len] = '\0';
ebc921ca
IK
599
600 if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
601 ret = -EFAULT;
602
603 spin_lock(&sbi->fs_lock);
604 ino = autofs_dentry_ino(dentry);
605 /* avoid rapid-fire expire attempts if expiry fails */
2fd9944f 606 ino->last_used = jiffies;
ebc921ca
IK
607 ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
608 complete_all(&ino->expire_complete);
609 spin_unlock(&sbi->fs_lock);
610
63ce5f55
PB
611 dput(dentry);
612
ebc921ca
IK
613 return ret;
614}
615
616int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
e5c85e1f 617 struct autofs_sb_info *sbi, unsigned int how)
ebc921ca
IK
618{
619 struct dentry *dentry;
620 int ret = -EAGAIN;
621
622 if (autofs_type_trigger(sbi->type))
e5c85e1f 623 dentry = autofs_expire_direct(sb, mnt, sbi, how);
ebc921ca 624 else
e5c85e1f 625 dentry = autofs_expire_indirect(sb, mnt, sbi, how);
ebc921ca
IK
626
627 if (dentry) {
628 struct autofs_info *ino = autofs_dentry_ino(dentry);
629 const struct path path = { .mnt = mnt, .dentry = dentry };
630
631 /* This is synchronous because it makes the daemon a
632 * little easier
633 */
634 ret = autofs_wait(sbi, &path, NFY_EXPIRE);
635
636 spin_lock(&sbi->fs_lock);
637 /* avoid rapid-fire expire attempts if expiry fails */
2fd9944f 638 ino->last_used = jiffies;
ebc921ca
IK
639 ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
640 complete_all(&ino->expire_complete);
641 spin_unlock(&sbi->fs_lock);
642 dput(dentry);
643 }
644
645 return ret;
646}
647
648/*
649 * Call repeatedly until it returns -EAGAIN, meaning there's nothing
650 * more to be done.
651 */
652int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt,
653 struct autofs_sb_info *sbi, int __user *arg)
654{
e5c85e1f 655 unsigned int how = 0;
ebc921ca 656
e5c85e1f 657 if (arg && get_user(how, arg))
ebc921ca
IK
658 return -EFAULT;
659
e5c85e1f 660 return autofs_do_expire_multi(sb, mnt, sbi, how);
ebc921ca 661}