fs: dcache scale subdirs
[linux-2.6-block.git] / fs / dcache.c
CommitLineData
1da177e4
LT
1/*
2 * fs/dcache.c
3 *
4 * Complete reimplementation
5 * (C) 1997 Thomas Schoebel-Theuer,
6 * with heavy changes by Linus Torvalds
7 */
8
9/*
10 * Notes on the allocation strategy:
11 *
12 * The dcache is a master of the icache - whenever a dcache entry
13 * exists, the inode will always exist. "iput()" is done either when
14 * the dcache entry is deleted or garbage collected.
15 */
16
1da177e4
LT
17#include <linux/syscalls.h>
18#include <linux/string.h>
19#include <linux/mm.h>
20#include <linux/fs.h>
7a91bf7f 21#include <linux/fsnotify.h>
1da177e4
LT
22#include <linux/slab.h>
23#include <linux/init.h>
1da177e4
LT
24#include <linux/hash.h>
25#include <linux/cache.h>
26#include <linux/module.h>
27#include <linux/mount.h>
28#include <linux/file.h>
29#include <asm/uaccess.h>
30#include <linux/security.h>
31#include <linux/seqlock.h>
32#include <linux/swap.h>
33#include <linux/bootmem.h>
5ad4e53b 34#include <linux/fs_struct.h>
613afbf8 35#include <linux/hardirq.h>
07f3f05c 36#include "internal.h"
1da177e4 37
789680d1
NP
38/*
39 * Usage:
23044507
NP
40 * dcache_hash_lock protects:
41 * - the dcache hash table, s_anon lists
42 * dcache_lru_lock protects:
43 * - the dcache lru lists and counters
44 * d_lock protects:
45 * - d_flags
46 * - d_name
47 * - d_lru
b7ab39f6 48 * - d_count
da502956 49 * - d_unhashed()
2fd6b7f5
NP
50 * - d_parent and d_subdirs
51 * - childrens' d_child and d_parent
789680d1
NP
52 *
53 * Ordering:
54 * dcache_lock
55 * dentry->d_lock
23044507 56 * dcache_lru_lock
789680d1
NP
57 * dcache_hash_lock
58 *
da502956
NP
59 * If there is an ancestor relationship:
60 * dentry->d_parent->...->d_parent->d_lock
61 * ...
62 * dentry->d_parent->d_lock
63 * dentry->d_lock
64 *
65 * If no ancestor relationship:
789680d1
NP
66 * if (dentry1 < dentry2)
67 * dentry1->d_lock
68 * dentry2->d_lock
69 */
fa3536cc 70int sysctl_vfs_cache_pressure __read_mostly = 100;
1da177e4
LT
71EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
72
789680d1 73static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_hash_lock);
23044507 74static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
789680d1 75__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
74c3cbe3 76__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
1da177e4
LT
77
78EXPORT_SYMBOL(dcache_lock);
79
e18b890b 80static struct kmem_cache *dentry_cache __read_mostly;
1da177e4
LT
81
82#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
83
84/*
85 * This is the single most critical data structure when it comes
86 * to the dcache: the hashtable for lookups. Somebody should try
87 * to make this good - I've just made it work.
88 *
89 * This hash-function tries to avoid losing too many bits of hash
90 * information, yet avoid using a prime hash-size or similar.
91 */
92#define D_HASHBITS d_hash_shift
93#define D_HASHMASK d_hash_mask
94
fa3536cc
ED
95static unsigned int d_hash_mask __read_mostly;
96static unsigned int d_hash_shift __read_mostly;
97static struct hlist_head *dentry_hashtable __read_mostly;
1da177e4
LT
98
99/* Statistics gathering. */
100struct dentry_stat_t dentry_stat = {
101 .age_limit = 45,
102};
103
3e880fb5 104static DEFINE_PER_CPU(unsigned int, nr_dentry);
312d3ca8
CH
105
106#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
3e880fb5
NP
107static int get_nr_dentry(void)
108{
109 int i;
110 int sum = 0;
111 for_each_possible_cpu(i)
112 sum += per_cpu(nr_dentry, i);
113 return sum < 0 ? 0 : sum;
114}
115
312d3ca8
CH
116int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
117 size_t *lenp, loff_t *ppos)
118{
3e880fb5 119 dentry_stat.nr_dentry = get_nr_dentry();
312d3ca8
CH
120 return proc_dointvec(table, write, buffer, lenp, ppos);
121}
122#endif
123
9c82ab9c 124static void __d_free(struct rcu_head *head)
1da177e4 125{
9c82ab9c
CH
126 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
127
fd217f4d 128 WARN_ON(!list_empty(&dentry->d_alias));
1da177e4
LT
129 if (dname_external(dentry))
130 kfree(dentry->d_name.name);
131 kmem_cache_free(dentry_cache, dentry);
132}
133
134/*
312d3ca8 135 * no dcache_lock, please.
1da177e4
LT
136 */
137static void d_free(struct dentry *dentry)
138{
b7ab39f6 139 BUG_ON(dentry->d_count);
3e880fb5 140 this_cpu_dec(nr_dentry);
1da177e4
LT
141 if (dentry->d_op && dentry->d_op->d_release)
142 dentry->d_op->d_release(dentry);
312d3ca8 143
b3423415 144 /* if dentry was never inserted into hash, immediate free is OK */
e8462caa 145 if (hlist_unhashed(&dentry->d_hash))
9c82ab9c 146 __d_free(&dentry->d_u.d_rcu);
b3423415 147 else
9c82ab9c 148 call_rcu(&dentry->d_u.d_rcu, __d_free);
1da177e4
LT
149}
150
151/*
152 * Release the dentry's inode, using the filesystem
153 * d_iput() operation if defined.
1da177e4 154 */
858119e1 155static void dentry_iput(struct dentry * dentry)
31f3e0b3
MS
156 __releases(dentry->d_lock)
157 __releases(dcache_lock)
1da177e4
LT
158{
159 struct inode *inode = dentry->d_inode;
160 if (inode) {
161 dentry->d_inode = NULL;
162 list_del_init(&dentry->d_alias);
163 spin_unlock(&dentry->d_lock);
164 spin_unlock(&dcache_lock);
f805fbda
LT
165 if (!inode->i_nlink)
166 fsnotify_inoderemove(inode);
1da177e4
LT
167 if (dentry->d_op && dentry->d_op->d_iput)
168 dentry->d_op->d_iput(dentry, inode);
169 else
170 iput(inode);
171 } else {
172 spin_unlock(&dentry->d_lock);
173 spin_unlock(&dcache_lock);
174 }
175}
176
da3bbdd4 177/*
23044507 178 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
da3bbdd4
KM
179 */
180static void dentry_lru_add(struct dentry *dentry)
181{
a4633357 182 if (list_empty(&dentry->d_lru)) {
23044507 183 spin_lock(&dcache_lru_lock);
a4633357
CH
184 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
185 dentry->d_sb->s_nr_dentry_unused++;
86c8749e 186 dentry_stat.nr_unused++;
23044507 187 spin_unlock(&dcache_lru_lock);
a4633357 188 }
da3bbdd4
KM
189}
190
23044507
NP
191static void __dentry_lru_del(struct dentry *dentry)
192{
193 list_del_init(&dentry->d_lru);
194 dentry->d_sb->s_nr_dentry_unused--;
195 dentry_stat.nr_unused--;
196}
197
da3bbdd4
KM
198static void dentry_lru_del(struct dentry *dentry)
199{
200 if (!list_empty(&dentry->d_lru)) {
23044507
NP
201 spin_lock(&dcache_lru_lock);
202 __dentry_lru_del(dentry);
203 spin_unlock(&dcache_lru_lock);
da3bbdd4
KM
204 }
205}
206
a4633357 207static void dentry_lru_move_tail(struct dentry *dentry)
da3bbdd4 208{
23044507 209 spin_lock(&dcache_lru_lock);
a4633357
CH
210 if (list_empty(&dentry->d_lru)) {
211 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
212 dentry->d_sb->s_nr_dentry_unused++;
86c8749e 213 dentry_stat.nr_unused++;
a4633357
CH
214 } else {
215 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
da3bbdd4 216 }
23044507 217 spin_unlock(&dcache_lru_lock);
da3bbdd4
KM
218}
219
d52b9086
MS
220/**
221 * d_kill - kill dentry and return parent
222 * @dentry: dentry to kill
223 *
31f3e0b3 224 * The dentry must already be unhashed and removed from the LRU.
d52b9086
MS
225 *
226 * If this is the root of the dentry tree, return NULL.
23044507 227 *
2fd6b7f5
NP
228 * dcache_lock and d_lock and d_parent->d_lock must be held by caller, and
229 * are dropped by d_kill.
d52b9086 230 */
2fd6b7f5 231static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
31f3e0b3 232 __releases(dentry->d_lock)
2fd6b7f5 233 __releases(parent->d_lock)
31f3e0b3 234 __releases(dcache_lock)
d52b9086 235{
d52b9086 236 list_del(&dentry->d_u.d_child);
2fd6b7f5
NP
237 if (parent)
238 spin_unlock(&parent->d_lock);
d52b9086 239 dentry_iput(dentry);
b7ab39f6
NP
240 /*
241 * dentry_iput drops the locks, at which point nobody (except
242 * transient RCU lookups) can reach this dentry.
243 */
d52b9086 244 d_free(dentry);
871c0067 245 return parent;
d52b9086
MS
246}
247
789680d1
NP
248/**
249 * d_drop - drop a dentry
250 * @dentry: dentry to drop
251 *
252 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
253 * be found through a VFS lookup any more. Note that this is different from
254 * deleting the dentry - d_delete will try to mark the dentry negative if
255 * possible, giving a successful _negative_ lookup, while d_drop will
256 * just make the cache lookup fail.
257 *
258 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
259 * reason (NFS timeouts or autofs deletes).
260 *
261 * __d_drop requires dentry->d_lock.
262 */
263void __d_drop(struct dentry *dentry)
264{
265 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
266 dentry->d_flags |= DCACHE_UNHASHED;
267 spin_lock(&dcache_hash_lock);
268 hlist_del_rcu(&dentry->d_hash);
269 spin_unlock(&dcache_hash_lock);
270 }
271}
272EXPORT_SYMBOL(__d_drop);
273
274void d_drop(struct dentry *dentry)
275{
276 spin_lock(&dcache_lock);
277 spin_lock(&dentry->d_lock);
278 __d_drop(dentry);
279 spin_unlock(&dentry->d_lock);
280 spin_unlock(&dcache_lock);
281}
282EXPORT_SYMBOL(d_drop);
283
1da177e4
LT
284/*
285 * This is dput
286 *
287 * This is complicated by the fact that we do not want to put
288 * dentries that are no longer on any hash chain on the unused
289 * list: we'd much rather just get rid of them immediately.
290 *
291 * However, that implies that we have to traverse the dentry
292 * tree upwards to the parents which might _also_ now be
293 * scheduled for deletion (it may have been only waiting for
294 * its last child to go away).
295 *
296 * This tail recursion is done by hand as we don't want to depend
297 * on the compiler to always get this right (gcc generally doesn't).
298 * Real recursion would eat up our stack space.
299 */
300
301/*
302 * dput - release a dentry
303 * @dentry: dentry to release
304 *
305 * Release a dentry. This will drop the usage count and if appropriate
306 * call the dentry unlink method as well as removing it from the queues and
307 * releasing its resources. If the parent dentries were scheduled for release
308 * they too may now get deleted.
309 *
310 * no dcache lock, please.
311 */
312
313void dput(struct dentry *dentry)
314{
2fd6b7f5 315 struct dentry *parent;
1da177e4
LT
316 if (!dentry)
317 return;
318
319repeat:
b7ab39f6 320 if (dentry->d_count == 1)
1da177e4 321 might_sleep();
1da177e4 322 spin_lock(&dentry->d_lock);
2fd6b7f5
NP
323 if (IS_ROOT(dentry))
324 parent = NULL;
325 else
326 parent = dentry->d_parent;
b7ab39f6
NP
327 if (dentry->d_count == 1) {
328 if (!spin_trylock(&dcache_lock)) {
329 /*
330 * Something of a livelock possibility we could avoid
331 * by taking dcache_lock and trying again, but we
332 * want to reduce dcache_lock anyway so this will
333 * get improved.
334 */
335 spin_unlock(&dentry->d_lock);
336 goto repeat;
337 }
2fd6b7f5
NP
338 if (parent && !spin_trylock(&parent->d_lock)) {
339 spin_unlock(&dentry->d_lock);
340 spin_unlock(&dcache_lock);
341 goto repeat;
342 }
b7ab39f6
NP
343 }
344 dentry->d_count--;
345 if (dentry->d_count) {
1da177e4 346 spin_unlock(&dentry->d_lock);
2fd6b7f5
NP
347 if (parent)
348 spin_unlock(&parent->d_lock);
1da177e4
LT
349 spin_unlock(&dcache_lock);
350 return;
351 }
352
353 /*
354 * AV: ->d_delete() is _NOT_ allowed to block now.
355 */
356 if (dentry->d_op && dentry->d_op->d_delete) {
357 if (dentry->d_op->d_delete(dentry))
358 goto unhash_it;
359 }
265ac902 360
1da177e4
LT
361 /* Unreachable? Get rid of it */
362 if (d_unhashed(dentry))
363 goto kill_it;
265ac902
NP
364
365 /* Otherwise leave it cached and ensure it's on the LRU */
366 dentry->d_flags |= DCACHE_REFERENCED;
a4633357 367 dentry_lru_add(dentry);
265ac902 368
1da177e4 369 spin_unlock(&dentry->d_lock);
2fd6b7f5
NP
370 if (parent)
371 spin_unlock(&parent->d_lock);
1da177e4
LT
372 spin_unlock(&dcache_lock);
373 return;
374
375unhash_it:
376 __d_drop(dentry);
d52b9086 377kill_it:
da3bbdd4
KM
378 /* if dentry was on the d_lru list delete it from there */
379 dentry_lru_del(dentry);
2fd6b7f5 380 dentry = d_kill(dentry, parent);
d52b9086
MS
381 if (dentry)
382 goto repeat;
1da177e4 383}
ec4f8605 384EXPORT_SYMBOL(dput);
1da177e4
LT
385
386/**
387 * d_invalidate - invalidate a dentry
388 * @dentry: dentry to invalidate
389 *
390 * Try to invalidate the dentry if it turns out to be
391 * possible. If there are other dentries that can be
392 * reached through this one we can't delete it and we
393 * return -EBUSY. On success we return 0.
394 *
395 * no dcache lock.
396 */
397
398int d_invalidate(struct dentry * dentry)
399{
400 /*
401 * If it's already been dropped, return OK.
402 */
403 spin_lock(&dcache_lock);
da502956 404 spin_lock(&dentry->d_lock);
1da177e4 405 if (d_unhashed(dentry)) {
da502956 406 spin_unlock(&dentry->d_lock);
1da177e4
LT
407 spin_unlock(&dcache_lock);
408 return 0;
409 }
410 /*
411 * Check whether to do a partial shrink_dcache
412 * to get rid of unused child entries.
413 */
414 if (!list_empty(&dentry->d_subdirs)) {
da502956 415 spin_unlock(&dentry->d_lock);
1da177e4
LT
416 spin_unlock(&dcache_lock);
417 shrink_dcache_parent(dentry);
418 spin_lock(&dcache_lock);
da502956 419 spin_lock(&dentry->d_lock);
1da177e4
LT
420 }
421
422 /*
423 * Somebody else still using it?
424 *
425 * If it's a directory, we can't drop it
426 * for fear of somebody re-populating it
427 * with children (even though dropping it
428 * would make it unreachable from the root,
429 * we might still populate it if it was a
430 * working directory or similar).
431 */
b7ab39f6 432 if (dentry->d_count > 1) {
1da177e4
LT
433 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
434 spin_unlock(&dentry->d_lock);
435 spin_unlock(&dcache_lock);
436 return -EBUSY;
437 }
438 }
439
440 __d_drop(dentry);
441 spin_unlock(&dentry->d_lock);
442 spin_unlock(&dcache_lock);
443 return 0;
444}
ec4f8605 445EXPORT_SYMBOL(d_invalidate);
1da177e4 446
b7ab39f6 447/* This must be called with dcache_lock and d_lock held */
23044507
NP
448static inline struct dentry * __dget_locked_dlock(struct dentry *dentry)
449{
b7ab39f6 450 dentry->d_count++;
23044507
NP
451 dentry_lru_del(dentry);
452 return dentry;
453}
454
b7ab39f6 455/* This should be called _only_ with dcache_lock held */
1da177e4
LT
456static inline struct dentry * __dget_locked(struct dentry *dentry)
457{
23044507 458 spin_lock(&dentry->d_lock);
b7ab39f6 459 __dget_locked_dlock(dentry);
23044507 460 spin_unlock(&dentry->d_lock);
1da177e4
LT
461 return dentry;
462}
463
b7ab39f6
NP
464struct dentry * dget_locked_dlock(struct dentry *dentry)
465{
466 return __dget_locked_dlock(dentry);
467}
468
1da177e4
LT
469struct dentry * dget_locked(struct dentry *dentry)
470{
471 return __dget_locked(dentry);
472}
ec4f8605 473EXPORT_SYMBOL(dget_locked);
1da177e4 474
b7ab39f6
NP
475struct dentry *dget_parent(struct dentry *dentry)
476{
477 struct dentry *ret;
478
479repeat:
480 spin_lock(&dentry->d_lock);
481 ret = dentry->d_parent;
482 if (!ret)
483 goto out;
484 if (dentry == ret) {
485 ret->d_count++;
486 goto out;
487 }
488 if (!spin_trylock(&ret->d_lock)) {
489 spin_unlock(&dentry->d_lock);
490 cpu_relax();
491 goto repeat;
492 }
493 BUG_ON(!ret->d_count);
494 ret->d_count++;
495 spin_unlock(&ret->d_lock);
496out:
497 spin_unlock(&dentry->d_lock);
498 return ret;
499}
500EXPORT_SYMBOL(dget_parent);
501
1da177e4
LT
502/**
503 * d_find_alias - grab a hashed alias of inode
504 * @inode: inode in question
505 * @want_discon: flag, used by d_splice_alias, to request
506 * that only a DISCONNECTED alias be returned.
507 *
508 * If inode has a hashed alias, or is a directory and has any alias,
509 * acquire the reference to alias and return it. Otherwise return NULL.
510 * Notice that if inode is a directory there can be only one alias and
511 * it can be unhashed only if it has no children, or if it is the root
512 * of a filesystem.
513 *
21c0d8fd 514 * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
1da177e4 515 * any other hashed alias over that one unless @want_discon is set,
21c0d8fd 516 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
1da177e4 517 */
da502956 518static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
1da177e4 519{
da502956 520 struct dentry *alias, *discon_alias;
1da177e4 521
da502956
NP
522again:
523 discon_alias = NULL;
524 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
525 spin_lock(&alias->d_lock);
1da177e4 526 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
21c0d8fd 527 if (IS_ROOT(alias) &&
da502956 528 (alias->d_flags & DCACHE_DISCONNECTED)) {
1da177e4 529 discon_alias = alias;
da502956
NP
530 } else if (!want_discon) {
531 __dget_locked_dlock(alias);
532 spin_unlock(&alias->d_lock);
533 return alias;
534 }
535 }
536 spin_unlock(&alias->d_lock);
537 }
538 if (discon_alias) {
539 alias = discon_alias;
540 spin_lock(&alias->d_lock);
541 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
542 if (IS_ROOT(alias) &&
543 (alias->d_flags & DCACHE_DISCONNECTED)) {
544 __dget_locked_dlock(alias);
545 spin_unlock(&alias->d_lock);
1da177e4
LT
546 return alias;
547 }
548 }
da502956
NP
549 spin_unlock(&alias->d_lock);
550 goto again;
1da177e4 551 }
da502956 552 return NULL;
1da177e4
LT
553}
554
da502956 555struct dentry *d_find_alias(struct inode *inode)
1da177e4 556{
214fda1f
DH
557 struct dentry *de = NULL;
558
559 if (!list_empty(&inode->i_dentry)) {
560 spin_lock(&dcache_lock);
561 de = __d_find_alias(inode, 0);
562 spin_unlock(&dcache_lock);
563 }
1da177e4
LT
564 return de;
565}
ec4f8605 566EXPORT_SYMBOL(d_find_alias);
1da177e4
LT
567
568/*
569 * Try to kill dentries associated with this inode.
570 * WARNING: you must own a reference to inode.
571 */
572void d_prune_aliases(struct inode *inode)
573{
0cdca3f9 574 struct dentry *dentry;
1da177e4
LT
575restart:
576 spin_lock(&dcache_lock);
0cdca3f9 577 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
1da177e4 578 spin_lock(&dentry->d_lock);
b7ab39f6 579 if (!dentry->d_count) {
23044507 580 __dget_locked_dlock(dentry);
1da177e4
LT
581 __d_drop(dentry);
582 spin_unlock(&dentry->d_lock);
583 spin_unlock(&dcache_lock);
584 dput(dentry);
585 goto restart;
586 }
587 spin_unlock(&dentry->d_lock);
588 }
589 spin_unlock(&dcache_lock);
590}
ec4f8605 591EXPORT_SYMBOL(d_prune_aliases);
1da177e4
LT
592
593/*
d702ccb3
AM
594 * Throw away a dentry - free the inode, dput the parent. This requires that
595 * the LRU list has already been removed.
596 *
85864e10
MS
597 * Try to prune ancestors as well. This is necessary to prevent
598 * quadratic behavior of shrink_dcache_parent(), but is also expected
599 * to be beneficial in reducing dentry cache fragmentation.
1da177e4 600 */
2fd6b7f5 601static void prune_one_dentry(struct dentry *dentry, struct dentry *parent)
31f3e0b3 602 __releases(dentry->d_lock)
2fd6b7f5 603 __releases(parent->d_lock)
31f3e0b3 604 __releases(dcache_lock)
1da177e4 605{
1da177e4 606 __d_drop(dentry);
2fd6b7f5 607 dentry = d_kill(dentry, parent);
d52b9086
MS
608
609 /*
610 * Prune ancestors. Locking is simpler than in dput(),
611 * because dcache_lock needs to be taken anyway.
612 */
d52b9086 613 while (dentry) {
23044507 614 spin_lock(&dcache_lock);
2fd6b7f5 615again:
b7ab39f6 616 spin_lock(&dentry->d_lock);
2fd6b7f5
NP
617 if (IS_ROOT(dentry))
618 parent = NULL;
619 else
620 parent = dentry->d_parent;
621 if (parent && !spin_trylock(&parent->d_lock)) {
622 spin_unlock(&dentry->d_lock);
623 goto again;
624 }
b7ab39f6
NP
625 dentry->d_count--;
626 if (dentry->d_count) {
2fd6b7f5
NP
627 if (parent)
628 spin_unlock(&parent->d_lock);
b7ab39f6 629 spin_unlock(&dentry->d_lock);
23044507 630 spin_unlock(&dcache_lock);
d52b9086 631 return;
23044507 632 }
d52b9086 633
a4633357 634 dentry_lru_del(dentry);
d52b9086 635 __d_drop(dentry);
2fd6b7f5 636 dentry = d_kill(dentry, parent);
d52b9086 637 }
1da177e4
LT
638}
639
3049cfe2 640static void shrink_dentry_list(struct list_head *list)
1da177e4 641{
da3bbdd4 642 struct dentry *dentry;
da3bbdd4 643
3049cfe2 644 while (!list_empty(list)) {
2fd6b7f5
NP
645 struct dentry *parent;
646
3049cfe2 647 dentry = list_entry(list->prev, struct dentry, d_lru);
23044507
NP
648
649 if (!spin_trylock(&dentry->d_lock)) {
2fd6b7f5 650relock:
23044507
NP
651 spin_unlock(&dcache_lru_lock);
652 cpu_relax();
653 spin_lock(&dcache_lru_lock);
654 continue;
655 }
656
1da177e4
LT
657 /*
658 * We found an inuse dentry which was not removed from
da3bbdd4
KM
659 * the LRU because of laziness during lookup. Do not free
660 * it - just keep it off the LRU list.
1da177e4 661 */
b7ab39f6 662 if (dentry->d_count) {
2fd6b7f5 663 __dentry_lru_del(dentry);
da3bbdd4 664 spin_unlock(&dentry->d_lock);
1da177e4
LT
665 continue;
666 }
2fd6b7f5
NP
667 if (IS_ROOT(dentry))
668 parent = NULL;
669 else
670 parent = dentry->d_parent;
671 if (parent && !spin_trylock(&parent->d_lock)) {
672 spin_unlock(&dentry->d_lock);
673 goto relock;
674 }
675 __dentry_lru_del(dentry);
23044507
NP
676 spin_unlock(&dcache_lru_lock);
677
2fd6b7f5 678 prune_one_dentry(dentry, parent);
23044507
NP
679 /* dcache_lock and dentry->d_lock dropped */
680 spin_lock(&dcache_lock);
681 spin_lock(&dcache_lru_lock);
da3bbdd4 682 }
3049cfe2
CH
683}
684
685/**
686 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
687 * @sb: superblock to shrink dentry LRU.
688 * @count: number of entries to prune
689 * @flags: flags to control the dentry processing
690 *
691 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
692 */
693static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
694{
695 /* called from prune_dcache() and shrink_dcache_parent() */
696 struct dentry *dentry;
697 LIST_HEAD(referenced);
698 LIST_HEAD(tmp);
699 int cnt = *count;
700
701 spin_lock(&dcache_lock);
23044507
NP
702relock:
703 spin_lock(&dcache_lru_lock);
3049cfe2
CH
704 while (!list_empty(&sb->s_dentry_lru)) {
705 dentry = list_entry(sb->s_dentry_lru.prev,
706 struct dentry, d_lru);
707 BUG_ON(dentry->d_sb != sb);
708
23044507
NP
709 if (!spin_trylock(&dentry->d_lock)) {
710 spin_unlock(&dcache_lru_lock);
711 cpu_relax();
712 goto relock;
713 }
714
3049cfe2
CH
715 /*
716 * If we are honouring the DCACHE_REFERENCED flag and the
717 * dentry has this flag set, don't free it. Clear the flag
718 * and put it back on the LRU.
719 */
23044507
NP
720 if (flags & DCACHE_REFERENCED &&
721 dentry->d_flags & DCACHE_REFERENCED) {
722 dentry->d_flags &= ~DCACHE_REFERENCED;
723 list_move(&dentry->d_lru, &referenced);
3049cfe2 724 spin_unlock(&dentry->d_lock);
23044507
NP
725 } else {
726 list_move_tail(&dentry->d_lru, &tmp);
727 spin_unlock(&dentry->d_lock);
728 if (!--cnt)
729 break;
3049cfe2 730 }
23044507 731 /* XXX: re-add cond_resched_lock when dcache_lock goes away */
3049cfe2
CH
732 }
733
734 *count = cnt;
735 shrink_dentry_list(&tmp);
736
da3bbdd4
KM
737 if (!list_empty(&referenced))
738 list_splice(&referenced, &sb->s_dentry_lru);
23044507 739 spin_unlock(&dcache_lru_lock);
da3bbdd4 740 spin_unlock(&dcache_lock);
3049cfe2 741
da3bbdd4
KM
742}
743
744/**
745 * prune_dcache - shrink the dcache
746 * @count: number of entries to try to free
747 *
748 * Shrink the dcache. This is done when we need more memory, or simply when we
749 * need to unmount something (at which point we need to unuse all dentries).
750 *
751 * This function may fail to free any resources if all the dentries are in use.
752 */
753static void prune_dcache(int count)
754{
dca33252 755 struct super_block *sb, *p = NULL;
da3bbdd4 756 int w_count;
86c8749e 757 int unused = dentry_stat.nr_unused;
da3bbdd4
KM
758 int prune_ratio;
759 int pruned;
760
761 if (unused == 0 || count == 0)
762 return;
763 spin_lock(&dcache_lock);
da3bbdd4
KM
764 if (count >= unused)
765 prune_ratio = 1;
766 else
767 prune_ratio = unused / count;
768 spin_lock(&sb_lock);
dca33252 769 list_for_each_entry(sb, &super_blocks, s_list) {
551de6f3
AV
770 if (list_empty(&sb->s_instances))
771 continue;
da3bbdd4 772 if (sb->s_nr_dentry_unused == 0)
1da177e4 773 continue;
da3bbdd4
KM
774 sb->s_count++;
775 /* Now, we reclaim unused dentrins with fairness.
776 * We reclaim them same percentage from each superblock.
777 * We calculate number of dentries to scan on this sb
778 * as follows, but the implementation is arranged to avoid
779 * overflows:
780 * number of dentries to scan on this sb =
781 * count * (number of dentries on this sb /
782 * number of dentries in the machine)
0feae5c4 783 */
da3bbdd4
KM
784 spin_unlock(&sb_lock);
785 if (prune_ratio != 1)
786 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
787 else
788 w_count = sb->s_nr_dentry_unused;
789 pruned = w_count;
0feae5c4 790 /*
da3bbdd4
KM
791 * We need to be sure this filesystem isn't being unmounted,
792 * otherwise we could race with generic_shutdown_super(), and
793 * end up holding a reference to an inode while the filesystem
794 * is unmounted. So we try to get s_umount, and make sure
795 * s_root isn't NULL.
0feae5c4 796 */
da3bbdd4
KM
797 if (down_read_trylock(&sb->s_umount)) {
798 if ((sb->s_root != NULL) &&
799 (!list_empty(&sb->s_dentry_lru))) {
800 spin_unlock(&dcache_lock);
801 __shrink_dcache_sb(sb, &w_count,
802 DCACHE_REFERENCED);
803 pruned -= w_count;
804 spin_lock(&dcache_lock);
0feae5c4 805 }
da3bbdd4 806 up_read(&sb->s_umount);
0feae5c4 807 }
da3bbdd4 808 spin_lock(&sb_lock);
dca33252
AV
809 if (p)
810 __put_super(p);
da3bbdd4 811 count -= pruned;
dca33252 812 p = sb;
79893c17
AV
813 /* more work left to do? */
814 if (count <= 0)
815 break;
1da177e4 816 }
dca33252
AV
817 if (p)
818 __put_super(p);
da3bbdd4 819 spin_unlock(&sb_lock);
1da177e4
LT
820 spin_unlock(&dcache_lock);
821}
822
1da177e4
LT
823/**
824 * shrink_dcache_sb - shrink dcache for a superblock
825 * @sb: superblock
826 *
3049cfe2
CH
827 * Shrink the dcache for the specified super block. This is used to free
828 * the dcache before unmounting a file system.
1da177e4 829 */
3049cfe2 830void shrink_dcache_sb(struct super_block *sb)
1da177e4 831{
3049cfe2
CH
832 LIST_HEAD(tmp);
833
834 spin_lock(&dcache_lock);
23044507 835 spin_lock(&dcache_lru_lock);
3049cfe2
CH
836 while (!list_empty(&sb->s_dentry_lru)) {
837 list_splice_init(&sb->s_dentry_lru, &tmp);
838 shrink_dentry_list(&tmp);
839 }
23044507 840 spin_unlock(&dcache_lru_lock);
3049cfe2 841 spin_unlock(&dcache_lock);
1da177e4 842}
ec4f8605 843EXPORT_SYMBOL(shrink_dcache_sb);
1da177e4 844
c636ebdb
DH
845/*
846 * destroy a single subtree of dentries for unmount
847 * - see the comments on shrink_dcache_for_umount() for a description of the
848 * locking
849 */
850static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
851{
852 struct dentry *parent;
f8713576 853 unsigned detached = 0;
c636ebdb
DH
854
855 BUG_ON(!IS_ROOT(dentry));
856
857 /* detach this root from the system */
858 spin_lock(&dcache_lock);
23044507 859 spin_lock(&dentry->d_lock);
a4633357 860 dentry_lru_del(dentry);
c636ebdb 861 __d_drop(dentry);
da502956 862 spin_unlock(&dentry->d_lock);
c636ebdb
DH
863 spin_unlock(&dcache_lock);
864
865 for (;;) {
866 /* descend to the first leaf in the current subtree */
867 while (!list_empty(&dentry->d_subdirs)) {
868 struct dentry *loop;
869
870 /* this is a branch with children - detach all of them
871 * from the system in one go */
872 spin_lock(&dcache_lock);
2fd6b7f5 873 spin_lock(&dentry->d_lock);
c636ebdb
DH
874 list_for_each_entry(loop, &dentry->d_subdirs,
875 d_u.d_child) {
2fd6b7f5
NP
876 spin_lock_nested(&loop->d_lock,
877 DENTRY_D_LOCK_NESTED);
a4633357 878 dentry_lru_del(loop);
c636ebdb 879 __d_drop(loop);
da502956 880 spin_unlock(&loop->d_lock);
c636ebdb 881 }
2fd6b7f5 882 spin_unlock(&dentry->d_lock);
c636ebdb
DH
883 spin_unlock(&dcache_lock);
884
885 /* move to the first child */
886 dentry = list_entry(dentry->d_subdirs.next,
887 struct dentry, d_u.d_child);
888 }
889
890 /* consume the dentries from this leaf up through its parents
891 * until we find one with children or run out altogether */
892 do {
893 struct inode *inode;
894
b7ab39f6 895 if (dentry->d_count != 0) {
c636ebdb
DH
896 printk(KERN_ERR
897 "BUG: Dentry %p{i=%lx,n=%s}"
898 " still in use (%d)"
899 " [unmount of %s %s]\n",
900 dentry,
901 dentry->d_inode ?
902 dentry->d_inode->i_ino : 0UL,
903 dentry->d_name.name,
b7ab39f6 904 dentry->d_count,
c636ebdb
DH
905 dentry->d_sb->s_type->name,
906 dentry->d_sb->s_id);
907 BUG();
908 }
909
2fd6b7f5 910 if (IS_ROOT(dentry)) {
c636ebdb 911 parent = NULL;
2fd6b7f5
NP
912 list_del(&dentry->d_u.d_child);
913 } else {
871c0067 914 parent = dentry->d_parent;
b7ab39f6
NP
915 spin_lock(&parent->d_lock);
916 parent->d_count--;
2fd6b7f5 917 list_del(&dentry->d_u.d_child);
b7ab39f6 918 spin_unlock(&parent->d_lock);
871c0067 919 }
c636ebdb 920
f8713576 921 detached++;
c636ebdb
DH
922
923 inode = dentry->d_inode;
924 if (inode) {
925 dentry->d_inode = NULL;
926 list_del_init(&dentry->d_alias);
927 if (dentry->d_op && dentry->d_op->d_iput)
928 dentry->d_op->d_iput(dentry, inode);
929 else
930 iput(inode);
931 }
932
933 d_free(dentry);
934
935 /* finished when we fall off the top of the tree,
936 * otherwise we ascend to the parent and move to the
937 * next sibling if there is one */
938 if (!parent)
312d3ca8 939 return;
c636ebdb 940 dentry = parent;
c636ebdb
DH
941 } while (list_empty(&dentry->d_subdirs));
942
943 dentry = list_entry(dentry->d_subdirs.next,
944 struct dentry, d_u.d_child);
945 }
946}
947
948/*
949 * destroy the dentries attached to a superblock on unmounting
950 * - we don't need to use dentry->d_lock, and only need dcache_lock when
951 * removing the dentry from the system lists and hashes because:
952 * - the superblock is detached from all mountings and open files, so the
953 * dentry trees will not be rearranged by the VFS
954 * - s_umount is write-locked, so the memory pressure shrinker will ignore
955 * any dentries belonging to this superblock that it comes across
956 * - the filesystem itself is no longer permitted to rearrange the dentries
957 * in this superblock
958 */
959void shrink_dcache_for_umount(struct super_block *sb)
960{
961 struct dentry *dentry;
962
963 if (down_read_trylock(&sb->s_umount))
964 BUG();
965
966 dentry = sb->s_root;
967 sb->s_root = NULL;
b7ab39f6
NP
968 spin_lock(&dentry->d_lock);
969 dentry->d_count--;
970 spin_unlock(&dentry->d_lock);
c636ebdb
DH
971 shrink_dcache_for_umount_subtree(dentry);
972
973 while (!hlist_empty(&sb->s_anon)) {
974 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
975 shrink_dcache_for_umount_subtree(dentry);
976 }
977}
978
1da177e4
LT
979/*
980 * Search for at least 1 mount point in the dentry's subdirs.
981 * We descend to the next level whenever the d_subdirs
982 * list is non-empty and continue searching.
983 */
984
985/**
986 * have_submounts - check for mounts over a dentry
987 * @parent: dentry to check.
988 *
989 * Return true if the parent or its subdirectories contain
990 * a mount point
991 */
992
993int have_submounts(struct dentry *parent)
994{
995 struct dentry *this_parent = parent;
996 struct list_head *next;
997
998 spin_lock(&dcache_lock);
999 if (d_mountpoint(parent))
1000 goto positive;
2fd6b7f5 1001 spin_lock(&this_parent->d_lock);
1da177e4
LT
1002repeat:
1003 next = this_parent->d_subdirs.next;
1004resume:
1005 while (next != &this_parent->d_subdirs) {
1006 struct list_head *tmp = next;
5160ee6f 1007 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1da177e4 1008 next = tmp->next;
2fd6b7f5
NP
1009
1010 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1da177e4 1011 /* Have we found a mount point ? */
2fd6b7f5
NP
1012 if (d_mountpoint(dentry)) {
1013 spin_unlock(&dentry->d_lock);
1014 spin_unlock(&this_parent->d_lock);
1da177e4 1015 goto positive;
2fd6b7f5 1016 }
1da177e4 1017 if (!list_empty(&dentry->d_subdirs)) {
2fd6b7f5
NP
1018 spin_unlock(&this_parent->d_lock);
1019 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1da177e4 1020 this_parent = dentry;
2fd6b7f5 1021 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1da177e4
LT
1022 goto repeat;
1023 }
2fd6b7f5 1024 spin_unlock(&dentry->d_lock);
1da177e4
LT
1025 }
1026 /*
1027 * All done at this level ... ascend and resume the search.
1028 */
1029 if (this_parent != parent) {
5160ee6f 1030 next = this_parent->d_u.d_child.next;
2fd6b7f5 1031 spin_unlock(&this_parent->d_lock);
1da177e4 1032 this_parent = this_parent->d_parent;
2fd6b7f5 1033 spin_lock(&this_parent->d_lock);
1da177e4
LT
1034 goto resume;
1035 }
2fd6b7f5 1036 spin_unlock(&this_parent->d_lock);
1da177e4
LT
1037 spin_unlock(&dcache_lock);
1038 return 0; /* No mount points found in tree */
1039positive:
1040 spin_unlock(&dcache_lock);
1041 return 1;
1042}
ec4f8605 1043EXPORT_SYMBOL(have_submounts);
1da177e4
LT
1044
1045/*
1046 * Search the dentry child list for the specified parent,
1047 * and move any unused dentries to the end of the unused
1048 * list for prune_dcache(). We descend to the next level
1049 * whenever the d_subdirs list is non-empty and continue
1050 * searching.
1051 *
1052 * It returns zero iff there are no unused children,
1053 * otherwise it returns the number of children moved to
1054 * the end of the unused list. This may not be the total
1055 * number of unused children, because select_parent can
1056 * drop the lock and return early due to latency
1057 * constraints.
1058 */
1059static int select_parent(struct dentry * parent)
1060{
1061 struct dentry *this_parent = parent;
1062 struct list_head *next;
1063 int found = 0;
1064
1065 spin_lock(&dcache_lock);
2fd6b7f5 1066 spin_lock(&this_parent->d_lock);
1da177e4
LT
1067repeat:
1068 next = this_parent->d_subdirs.next;
1069resume:
1070 while (next != &this_parent->d_subdirs) {
1071 struct list_head *tmp = next;
5160ee6f 1072 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1da177e4 1073 next = tmp->next;
2fd6b7f5 1074 BUG_ON(this_parent == dentry);
1da177e4 1075
2fd6b7f5 1076 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
23044507 1077
1da177e4
LT
1078 /*
1079 * move only zero ref count dentries to the end
1080 * of the unused list for prune_dcache
1081 */
b7ab39f6 1082 if (!dentry->d_count) {
a4633357 1083 dentry_lru_move_tail(dentry);
1da177e4 1084 found++;
a4633357
CH
1085 } else {
1086 dentry_lru_del(dentry);
1da177e4
LT
1087 }
1088
1089 /*
1090 * We can return to the caller if we have found some (this
1091 * ensures forward progress). We'll be coming back to find
1092 * the rest.
1093 */
2fd6b7f5
NP
1094 if (found && need_resched()) {
1095 spin_unlock(&dentry->d_lock);
1da177e4 1096 goto out;
2fd6b7f5 1097 }
1da177e4
LT
1098
1099 /*
1100 * Descend a level if the d_subdirs list is non-empty.
1101 */
1102 if (!list_empty(&dentry->d_subdirs)) {
2fd6b7f5
NP
1103 spin_unlock(&this_parent->d_lock);
1104 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1da177e4 1105 this_parent = dentry;
2fd6b7f5 1106 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1da177e4
LT
1107 goto repeat;
1108 }
2fd6b7f5
NP
1109
1110 spin_unlock(&dentry->d_lock);
1da177e4
LT
1111 }
1112 /*
1113 * All done at this level ... ascend and resume the search.
1114 */
1115 if (this_parent != parent) {
2fd6b7f5 1116 struct dentry *tmp;
5160ee6f 1117 next = this_parent->d_u.d_child.next;
2fd6b7f5
NP
1118 tmp = this_parent->d_parent;
1119 spin_unlock(&this_parent->d_lock);
1120 BUG_ON(tmp == this_parent);
1121 this_parent = tmp;
1122 spin_lock(&this_parent->d_lock);
1da177e4
LT
1123 goto resume;
1124 }
1125out:
2fd6b7f5 1126 spin_unlock(&this_parent->d_lock);
1da177e4
LT
1127 spin_unlock(&dcache_lock);
1128 return found;
1129}
1130
1131/**
1132 * shrink_dcache_parent - prune dcache
1133 * @parent: parent of entries to prune
1134 *
1135 * Prune the dcache to remove unused children of the parent dentry.
1136 */
1137
1138void shrink_dcache_parent(struct dentry * parent)
1139{
da3bbdd4 1140 struct super_block *sb = parent->d_sb;
1da177e4
LT
1141 int found;
1142
1143 while ((found = select_parent(parent)) != 0)
da3bbdd4 1144 __shrink_dcache_sb(sb, &found, 0);
1da177e4 1145}
ec4f8605 1146EXPORT_SYMBOL(shrink_dcache_parent);
1da177e4 1147
1da177e4
LT
1148/*
1149 * Scan `nr' dentries and return the number which remain.
1150 *
1151 * We need to avoid reentering the filesystem if the caller is performing a
1152 * GFP_NOFS allocation attempt. One example deadlock is:
1153 *
1154 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1155 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1156 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1157 *
1158 * In this case we return -1 to tell the caller that we baled.
1159 */
7f8275d0 1160static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1da177e4
LT
1161{
1162 if (nr) {
1163 if (!(gfp_mask & __GFP_FS))
1164 return -1;
da3bbdd4 1165 prune_dcache(nr);
1da177e4 1166 }
312d3ca8 1167
86c8749e 1168 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1da177e4
LT
1169}
1170
8e1f936b
RR
1171static struct shrinker dcache_shrinker = {
1172 .shrink = shrink_dcache_memory,
1173 .seeks = DEFAULT_SEEKS,
1174};
1175
1da177e4
LT
1176/**
1177 * d_alloc - allocate a dcache entry
1178 * @parent: parent of entry to allocate
1179 * @name: qstr of the name
1180 *
1181 * Allocates a dentry. It returns %NULL if there is insufficient memory
1182 * available. On a success the dentry is returned. The name passed in is
1183 * copied and the copy passed in may be reused after this call.
1184 */
1185
1186struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1187{
1188 struct dentry *dentry;
1189 char *dname;
1190
e12ba74d 1191 dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1da177e4
LT
1192 if (!dentry)
1193 return NULL;
1194
1195 if (name->len > DNAME_INLINE_LEN-1) {
1196 dname = kmalloc(name->len + 1, GFP_KERNEL);
1197 if (!dname) {
1198 kmem_cache_free(dentry_cache, dentry);
1199 return NULL;
1200 }
1201 } else {
1202 dname = dentry->d_iname;
1203 }
1204 dentry->d_name.name = dname;
1205
1206 dentry->d_name.len = name->len;
1207 dentry->d_name.hash = name->hash;
1208 memcpy(dname, name->name, name->len);
1209 dname[name->len] = 0;
1210
b7ab39f6 1211 dentry->d_count = 1;
1da177e4
LT
1212 dentry->d_flags = DCACHE_UNHASHED;
1213 spin_lock_init(&dentry->d_lock);
1214 dentry->d_inode = NULL;
1215 dentry->d_parent = NULL;
1216 dentry->d_sb = NULL;
1217 dentry->d_op = NULL;
1218 dentry->d_fsdata = NULL;
1219 dentry->d_mounted = 0;
1da177e4
LT
1220 INIT_HLIST_NODE(&dentry->d_hash);
1221 INIT_LIST_HEAD(&dentry->d_lru);
1222 INIT_LIST_HEAD(&dentry->d_subdirs);
1223 INIT_LIST_HEAD(&dentry->d_alias);
2fd6b7f5 1224 INIT_LIST_HEAD(&dentry->d_u.d_child);
1da177e4
LT
1225
1226 if (parent) {
2fd6b7f5
NP
1227 spin_lock(&dcache_lock);
1228 spin_lock(&parent->d_lock);
1229 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1230 dentry->d_parent = dget_dlock(parent);
1da177e4 1231 dentry->d_sb = parent->d_sb;
5160ee6f 1232 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
2fd6b7f5
NP
1233 spin_unlock(&dentry->d_lock);
1234 spin_unlock(&parent->d_lock);
1235 spin_unlock(&dcache_lock);
1236 }
1da177e4 1237
3e880fb5 1238 this_cpu_inc(nr_dentry);
312d3ca8 1239
1da177e4
LT
1240 return dentry;
1241}
ec4f8605 1242EXPORT_SYMBOL(d_alloc);
1da177e4
LT
1243
1244struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1245{
1246 struct qstr q;
1247
1248 q.name = name;
1249 q.len = strlen(name);
1250 q.hash = full_name_hash(q.name, q.len);
1251 return d_alloc(parent, &q);
1252}
ef26ca97 1253EXPORT_SYMBOL(d_alloc_name);
1da177e4 1254
360da900
OH
1255/* the caller must hold dcache_lock */
1256static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1257{
1258 if (inode)
1259 list_add(&dentry->d_alias, &inode->i_dentry);
1260 dentry->d_inode = inode;
1261 fsnotify_d_instantiate(dentry, inode);
1262}
1263
1da177e4
LT
1264/**
1265 * d_instantiate - fill in inode information for a dentry
1266 * @entry: dentry to complete
1267 * @inode: inode to attach to this dentry
1268 *
1269 * Fill in inode information in the entry.
1270 *
1271 * This turns negative dentries into productive full members
1272 * of society.
1273 *
1274 * NOTE! This assumes that the inode count has been incremented
1275 * (or otherwise set) by the caller to indicate that it is now
1276 * in use by the dcache.
1277 */
1278
1279void d_instantiate(struct dentry *entry, struct inode * inode)
1280{
28133c7b 1281 BUG_ON(!list_empty(&entry->d_alias));
1da177e4 1282 spin_lock(&dcache_lock);
360da900 1283 __d_instantiate(entry, inode);
1da177e4
LT
1284 spin_unlock(&dcache_lock);
1285 security_d_instantiate(entry, inode);
1286}
ec4f8605 1287EXPORT_SYMBOL(d_instantiate);
1da177e4
LT
1288
1289/**
1290 * d_instantiate_unique - instantiate a non-aliased dentry
1291 * @entry: dentry to instantiate
1292 * @inode: inode to attach to this dentry
1293 *
1294 * Fill in inode information in the entry. On success, it returns NULL.
1295 * If an unhashed alias of "entry" already exists, then we return the
e866cfa9 1296 * aliased dentry instead and drop one reference to inode.
1da177e4
LT
1297 *
1298 * Note that in order to avoid conflicts with rename() etc, the caller
1299 * had better be holding the parent directory semaphore.
e866cfa9
OD
1300 *
1301 * This also assumes that the inode count has been incremented
1302 * (or otherwise set) by the caller to indicate that it is now
1303 * in use by the dcache.
1da177e4 1304 */
770bfad8
DH
1305static struct dentry *__d_instantiate_unique(struct dentry *entry,
1306 struct inode *inode)
1da177e4
LT
1307{
1308 struct dentry *alias;
1309 int len = entry->d_name.len;
1310 const char *name = entry->d_name.name;
1311 unsigned int hash = entry->d_name.hash;
1312
770bfad8 1313 if (!inode) {
360da900 1314 __d_instantiate(entry, NULL);
770bfad8
DH
1315 return NULL;
1316 }
1317
1da177e4
LT
1318 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1319 struct qstr *qstr = &alias->d_name;
1320
1321 if (qstr->hash != hash)
1322 continue;
1323 if (alias->d_parent != entry->d_parent)
1324 continue;
1325 if (qstr->len != len)
1326 continue;
1327 if (memcmp(qstr->name, name, len))
1328 continue;
1329 dget_locked(alias);
1da177e4
LT
1330 return alias;
1331 }
770bfad8 1332
360da900 1333 __d_instantiate(entry, inode);
1da177e4
LT
1334 return NULL;
1335}
770bfad8
DH
1336
1337struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1338{
1339 struct dentry *result;
1340
1341 BUG_ON(!list_empty(&entry->d_alias));
1342
1343 spin_lock(&dcache_lock);
1344 result = __d_instantiate_unique(entry, inode);
1345 spin_unlock(&dcache_lock);
1346
1347 if (!result) {
1348 security_d_instantiate(entry, inode);
1349 return NULL;
1350 }
1351
1352 BUG_ON(!d_unhashed(result));
1353 iput(inode);
1354 return result;
1355}
1356
1da177e4
LT
1357EXPORT_SYMBOL(d_instantiate_unique);
1358
1359/**
1360 * d_alloc_root - allocate root dentry
1361 * @root_inode: inode to allocate the root for
1362 *
1363 * Allocate a root ("/") dentry for the inode given. The inode is
1364 * instantiated and returned. %NULL is returned if there is insufficient
1365 * memory or the inode passed is %NULL.
1366 */
1367
1368struct dentry * d_alloc_root(struct inode * root_inode)
1369{
1370 struct dentry *res = NULL;
1371
1372 if (root_inode) {
1373 static const struct qstr name = { .name = "/", .len = 1 };
1374
1375 res = d_alloc(NULL, &name);
1376 if (res) {
1377 res->d_sb = root_inode->i_sb;
1378 res->d_parent = res;
1379 d_instantiate(res, root_inode);
1380 }
1381 }
1382 return res;
1383}
ec4f8605 1384EXPORT_SYMBOL(d_alloc_root);
1da177e4
LT
1385
1386static inline struct hlist_head *d_hash(struct dentry *parent,
1387 unsigned long hash)
1388{
1389 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1390 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1391 return dentry_hashtable + (hash & D_HASHMASK);
1392}
1393
4ea3ada2
CH
1394/**
1395 * d_obtain_alias - find or allocate a dentry for a given inode
1396 * @inode: inode to allocate the dentry for
1397 *
1398 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1399 * similar open by handle operations. The returned dentry may be anonymous,
1400 * or may have a full name (if the inode was already in the cache).
1401 *
1402 * When called on a directory inode, we must ensure that the inode only ever
1403 * has one dentry. If a dentry is found, that is returned instead of
1404 * allocating a new one.
1405 *
1406 * On successful return, the reference to the inode has been transferred
44003728
CH
1407 * to the dentry. In case of an error the reference on the inode is released.
1408 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1409 * be passed in and will be the error will be propagate to the return value,
1410 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
4ea3ada2
CH
1411 */
1412struct dentry *d_obtain_alias(struct inode *inode)
1413{
9308a612
CH
1414 static const struct qstr anonstring = { .name = "" };
1415 struct dentry *tmp;
1416 struct dentry *res;
4ea3ada2
CH
1417
1418 if (!inode)
44003728 1419 return ERR_PTR(-ESTALE);
4ea3ada2
CH
1420 if (IS_ERR(inode))
1421 return ERR_CAST(inode);
1422
9308a612
CH
1423 res = d_find_alias(inode);
1424 if (res)
1425 goto out_iput;
1426
1427 tmp = d_alloc(NULL, &anonstring);
1428 if (!tmp) {
1429 res = ERR_PTR(-ENOMEM);
1430 goto out_iput;
4ea3ada2 1431 }
9308a612
CH
1432 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1433
1434 spin_lock(&dcache_lock);
1435 res = __d_find_alias(inode, 0);
1436 if (res) {
1437 spin_unlock(&dcache_lock);
1438 dput(tmp);
1439 goto out_iput;
1440 }
1441
1442 /* attach a disconnected dentry */
1443 spin_lock(&tmp->d_lock);
1444 tmp->d_sb = inode->i_sb;
1445 tmp->d_inode = inode;
1446 tmp->d_flags |= DCACHE_DISCONNECTED;
1447 tmp->d_flags &= ~DCACHE_UNHASHED;
1448 list_add(&tmp->d_alias, &inode->i_dentry);
789680d1 1449 spin_lock(&dcache_hash_lock);
9308a612 1450 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
789680d1 1451 spin_unlock(&dcache_hash_lock);
9308a612
CH
1452 spin_unlock(&tmp->d_lock);
1453
1454 spin_unlock(&dcache_lock);
1455 return tmp;
1456
1457 out_iput:
1458 iput(inode);
1459 return res;
4ea3ada2 1460}
adc48720 1461EXPORT_SYMBOL(d_obtain_alias);
1da177e4
LT
1462
1463/**
1464 * d_splice_alias - splice a disconnected dentry into the tree if one exists
1465 * @inode: the inode which may have a disconnected dentry
1466 * @dentry: a negative dentry which we want to point to the inode.
1467 *
1468 * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
1469 * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
1470 * and return it, else simply d_add the inode to the dentry and return NULL.
1471 *
1472 * This is needed in the lookup routine of any filesystem that is exportable
1473 * (via knfsd) so that we can build dcache paths to directories effectively.
1474 *
1475 * If a dentry was found and moved, then it is returned. Otherwise NULL
1476 * is returned. This matches the expected return value of ->lookup.
1477 *
1478 */
1479struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1480{
1481 struct dentry *new = NULL;
1482
21c0d8fd 1483 if (inode && S_ISDIR(inode->i_mode)) {
1da177e4
LT
1484 spin_lock(&dcache_lock);
1485 new = __d_find_alias(inode, 1);
1486 if (new) {
1487 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1488 spin_unlock(&dcache_lock);
1489 security_d_instantiate(new, inode);
1da177e4
LT
1490 d_move(new, dentry);
1491 iput(inode);
1492 } else {
360da900
OH
1493 /* already taking dcache_lock, so d_add() by hand */
1494 __d_instantiate(dentry, inode);
1da177e4
LT
1495 spin_unlock(&dcache_lock);
1496 security_d_instantiate(dentry, inode);
1497 d_rehash(dentry);
1498 }
1499 } else
1500 d_add(dentry, inode);
1501 return new;
1502}
ec4f8605 1503EXPORT_SYMBOL(d_splice_alias);
1da177e4 1504
9403540c
BN
1505/**
1506 * d_add_ci - lookup or allocate new dentry with case-exact name
1507 * @inode: the inode case-insensitive lookup has found
1508 * @dentry: the negative dentry that was passed to the parent's lookup func
1509 * @name: the case-exact name to be associated with the returned dentry
1510 *
1511 * This is to avoid filling the dcache with case-insensitive names to the
1512 * same inode, only the actual correct case is stored in the dcache for
1513 * case-insensitive filesystems.
1514 *
1515 * For a case-insensitive lookup match and if the the case-exact dentry
1516 * already exists in in the dcache, use it and return it.
1517 *
1518 * If no entry exists with the exact case name, allocate new dentry with
1519 * the exact case, and return the spliced entry.
1520 */
e45b590b 1521struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
9403540c
BN
1522 struct qstr *name)
1523{
1524 int error;
1525 struct dentry *found;
1526 struct dentry *new;
1527
b6520c81
CH
1528 /*
1529 * First check if a dentry matching the name already exists,
1530 * if not go ahead and create it now.
1531 */
9403540c 1532 found = d_hash_and_lookup(dentry->d_parent, name);
9403540c
BN
1533 if (!found) {
1534 new = d_alloc(dentry->d_parent, name);
1535 if (!new) {
1536 error = -ENOMEM;
1537 goto err_out;
1538 }
b6520c81 1539
9403540c
BN
1540 found = d_splice_alias(inode, new);
1541 if (found) {
1542 dput(new);
1543 return found;
1544 }
1545 return new;
1546 }
b6520c81
CH
1547
1548 /*
1549 * If a matching dentry exists, and it's not negative use it.
1550 *
1551 * Decrement the reference count to balance the iget() done
1552 * earlier on.
1553 */
9403540c
BN
1554 if (found->d_inode) {
1555 if (unlikely(found->d_inode != inode)) {
1556 /* This can't happen because bad inodes are unhashed. */
1557 BUG_ON(!is_bad_inode(inode));
1558 BUG_ON(!is_bad_inode(found->d_inode));
1559 }
9403540c
BN
1560 iput(inode);
1561 return found;
1562 }
b6520c81 1563
9403540c
BN
1564 /*
1565 * Negative dentry: instantiate it unless the inode is a directory and
b6520c81 1566 * already has a dentry.
9403540c 1567 */
9403540c 1568 spin_lock(&dcache_lock);
b6520c81 1569 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
360da900 1570 __d_instantiate(found, inode);
9403540c
BN
1571 spin_unlock(&dcache_lock);
1572 security_d_instantiate(found, inode);
1573 return found;
1574 }
b6520c81 1575
9403540c 1576 /*
b6520c81
CH
1577 * In case a directory already has a (disconnected) entry grab a
1578 * reference to it, move it in place and use it.
9403540c
BN
1579 */
1580 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1581 dget_locked(new);
1582 spin_unlock(&dcache_lock);
9403540c 1583 security_d_instantiate(found, inode);
9403540c 1584 d_move(new, found);
9403540c 1585 iput(inode);
9403540c 1586 dput(found);
9403540c
BN
1587 return new;
1588
1589err_out:
1590 iput(inode);
1591 return ERR_PTR(error);
1592}
ec4f8605 1593EXPORT_SYMBOL(d_add_ci);
1da177e4
LT
1594
1595/**
1596 * d_lookup - search for a dentry
1597 * @parent: parent dentry
1598 * @name: qstr of name we wish to find
b04f784e 1599 * Returns: dentry, or NULL
1da177e4 1600 *
b04f784e
NP
1601 * d_lookup searches the children of the parent dentry for the name in
1602 * question. If the dentry is found its reference count is incremented and the
1603 * dentry is returned. The caller must use dput to free the entry when it has
1604 * finished using it. %NULL is returned if the dentry does not exist.
1da177e4 1605 */
1da177e4
LT
1606struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1607{
1608 struct dentry * dentry = NULL;
1609 unsigned long seq;
1610
1611 do {
1612 seq = read_seqbegin(&rename_lock);
1613 dentry = __d_lookup(parent, name);
1614 if (dentry)
1615 break;
1616 } while (read_seqretry(&rename_lock, seq));
1617 return dentry;
1618}
ec4f8605 1619EXPORT_SYMBOL(d_lookup);
1da177e4 1620
b04f784e
NP
1621/*
1622 * __d_lookup - search for a dentry (racy)
1623 * @parent: parent dentry
1624 * @name: qstr of name we wish to find
1625 * Returns: dentry, or NULL
1626 *
1627 * __d_lookup is like d_lookup, however it may (rarely) return a
1628 * false-negative result due to unrelated rename activity.
1629 *
1630 * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
1631 * however it must be used carefully, eg. with a following d_lookup in
1632 * the case of failure.
1633 *
1634 * __d_lookup callers must be commented.
1635 */
1da177e4
LT
1636struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1637{
1638 unsigned int len = name->len;
1639 unsigned int hash = name->hash;
1640 const unsigned char *str = name->name;
1641 struct hlist_head *head = d_hash(parent,hash);
1642 struct dentry *found = NULL;
1643 struct hlist_node *node;
665a7583 1644 struct dentry *dentry;
1da177e4 1645
b04f784e
NP
1646 /*
1647 * The hash list is protected using RCU.
1648 *
1649 * Take d_lock when comparing a candidate dentry, to avoid races
1650 * with d_move().
1651 *
1652 * It is possible that concurrent renames can mess up our list
1653 * walk here and result in missing our dentry, resulting in the
1654 * false-negative result. d_lookup() protects against concurrent
1655 * renames using rename_lock seqlock.
1656 *
1657 * See Documentation/vfs/dcache-locking.txt for more details.
1658 */
1da177e4
LT
1659 rcu_read_lock();
1660
665a7583 1661 hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
1da177e4
LT
1662 struct qstr *qstr;
1663
1da177e4
LT
1664 if (dentry->d_name.hash != hash)
1665 continue;
1666 if (dentry->d_parent != parent)
1667 continue;
1668
1669 spin_lock(&dentry->d_lock);
1670
1671 /*
1672 * Recheck the dentry after taking the lock - d_move may have
b04f784e
NP
1673 * changed things. Don't bother checking the hash because
1674 * we're about to compare the whole name anyway.
1da177e4
LT
1675 */
1676 if (dentry->d_parent != parent)
1677 goto next;
1678
d0185c08
LT
1679 /* non-existing due to RCU? */
1680 if (d_unhashed(dentry))
1681 goto next;
1682
1da177e4
LT
1683 /*
1684 * It is safe to compare names since d_move() cannot
1685 * change the qstr (protected by d_lock).
1686 */
1687 qstr = &dentry->d_name;
1688 if (parent->d_op && parent->d_op->d_compare) {
621e155a
NP
1689 if (parent->d_op->d_compare(parent, parent->d_inode,
1690 dentry, dentry->d_inode,
1691 qstr->len, qstr->name, name))
1da177e4
LT
1692 goto next;
1693 } else {
1694 if (qstr->len != len)
1695 goto next;
1696 if (memcmp(qstr->name, str, len))
1697 goto next;
1698 }
1699
b7ab39f6 1700 dentry->d_count++;
d0185c08 1701 found = dentry;
1da177e4
LT
1702 spin_unlock(&dentry->d_lock);
1703 break;
1704next:
1705 spin_unlock(&dentry->d_lock);
1706 }
1707 rcu_read_unlock();
1708
1709 return found;
1710}
1711
3e7e241f
EB
1712/**
1713 * d_hash_and_lookup - hash the qstr then search for a dentry
1714 * @dir: Directory to search in
1715 * @name: qstr of name we wish to find
1716 *
1717 * On hash failure or on lookup failure NULL is returned.
1718 */
1719struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1720{
1721 struct dentry *dentry = NULL;
1722
1723 /*
1724 * Check for a fs-specific hash function. Note that we must
1725 * calculate the standard hash first, as the d_op->d_hash()
1726 * routine may choose to leave the hash value unchanged.
1727 */
1728 name->hash = full_name_hash(name->name, name->len);
1729 if (dir->d_op && dir->d_op->d_hash) {
b1e6a015 1730 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
3e7e241f
EB
1731 goto out;
1732 }
1733 dentry = d_lookup(dir, name);
1734out:
1735 return dentry;
1736}
1737
1da177e4 1738/**
786a5e15 1739 * d_validate - verify dentry provided from insecure source (deprecated)
1da177e4
LT
1740 * @dentry: The dentry alleged to be valid child of @dparent
1741 * @dparent: The parent dentry (known to be valid)
1da177e4
LT
1742 *
1743 * An insecure source has sent us a dentry, here we verify it and dget() it.
1744 * This is used by ncpfs in its readdir implementation.
1745 * Zero is returned in the dentry is invalid.
786a5e15
NP
1746 *
1747 * This function is slow for big directories, and deprecated, do not use it.
1da177e4 1748 */
d3a23e16 1749int d_validate(struct dentry *dentry, struct dentry *dparent)
1da177e4 1750{
786a5e15 1751 struct dentry *child;
d3a23e16
NP
1752
1753 spin_lock(&dcache_lock);
2fd6b7f5 1754 spin_lock(&dparent->d_lock);
786a5e15
NP
1755 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1756 if (dentry == child) {
2fd6b7f5
NP
1757 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1758 __dget_locked_dlock(dentry);
1759 spin_unlock(&dentry->d_lock);
1760 spin_unlock(&dparent->d_lock);
d3a23e16 1761 spin_unlock(&dcache_lock);
1da177e4
LT
1762 return 1;
1763 }
1764 }
2fd6b7f5 1765 spin_unlock(&dparent->d_lock);
d3a23e16 1766 spin_unlock(&dcache_lock);
786a5e15 1767
1da177e4
LT
1768 return 0;
1769}
ec4f8605 1770EXPORT_SYMBOL(d_validate);
1da177e4
LT
1771
1772/*
1773 * When a file is deleted, we have two options:
1774 * - turn this dentry into a negative dentry
1775 * - unhash this dentry and free it.
1776 *
1777 * Usually, we want to just turn this into
1778 * a negative dentry, but if anybody else is
1779 * currently using the dentry or the inode
1780 * we can't do that and we fall back on removing
1781 * it from the hash queues and waiting for
1782 * it to be deleted later when it has no users
1783 */
1784
1785/**
1786 * d_delete - delete a dentry
1787 * @dentry: The dentry to delete
1788 *
1789 * Turn the dentry into a negative dentry if possible, otherwise
1790 * remove it from the hash queues so it can be deleted later
1791 */
1792
1793void d_delete(struct dentry * dentry)
1794{
7a91bf7f 1795 int isdir = 0;
1da177e4
LT
1796 /*
1797 * Are we the only user?
1798 */
1799 spin_lock(&dcache_lock);
1800 spin_lock(&dentry->d_lock);
7a91bf7f 1801 isdir = S_ISDIR(dentry->d_inode->i_mode);
b7ab39f6 1802 if (dentry->d_count == 1) {
13e3c5e5 1803 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1da177e4 1804 dentry_iput(dentry);
7a91bf7f 1805 fsnotify_nameremove(dentry, isdir);
1da177e4
LT
1806 return;
1807 }
1808
1809 if (!d_unhashed(dentry))
1810 __d_drop(dentry);
1811
1812 spin_unlock(&dentry->d_lock);
1813 spin_unlock(&dcache_lock);
7a91bf7f
JM
1814
1815 fsnotify_nameremove(dentry, isdir);
1da177e4 1816}
ec4f8605 1817EXPORT_SYMBOL(d_delete);
1da177e4
LT
1818
1819static void __d_rehash(struct dentry * entry, struct hlist_head *list)
1820{
1821
1822 entry->d_flags &= ~DCACHE_UNHASHED;
1823 hlist_add_head_rcu(&entry->d_hash, list);
1824}
1825
770bfad8
DH
1826static void _d_rehash(struct dentry * entry)
1827{
1828 __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
1829}
1830
1da177e4
LT
1831/**
1832 * d_rehash - add an entry back to the hash
1833 * @entry: dentry to add to the hash
1834 *
1835 * Adds a dentry to the hash according to its name.
1836 */
1837
1838void d_rehash(struct dentry * entry)
1839{
1da177e4
LT
1840 spin_lock(&dcache_lock);
1841 spin_lock(&entry->d_lock);
789680d1 1842 spin_lock(&dcache_hash_lock);
770bfad8 1843 _d_rehash(entry);
789680d1 1844 spin_unlock(&dcache_hash_lock);
1da177e4
LT
1845 spin_unlock(&entry->d_lock);
1846 spin_unlock(&dcache_lock);
1847}
ec4f8605 1848EXPORT_SYMBOL(d_rehash);
1da177e4 1849
fb2d5b86
NP
1850/**
1851 * dentry_update_name_case - update case insensitive dentry with a new name
1852 * @dentry: dentry to be updated
1853 * @name: new name
1854 *
1855 * Update a case insensitive dentry with new case of name.
1856 *
1857 * dentry must have been returned by d_lookup with name @name. Old and new
1858 * name lengths must match (ie. no d_compare which allows mismatched name
1859 * lengths).
1860 *
1861 * Parent inode i_mutex must be held over d_lookup and into this call (to
1862 * keep renames and concurrent inserts, and readdir(2) away).
1863 */
1864void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
1865{
1866 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
1867 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
1868
1869 spin_lock(&dcache_lock);
1870 spin_lock(&dentry->d_lock);
1871 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
1872 spin_unlock(&dentry->d_lock);
1873 spin_unlock(&dcache_lock);
1874}
1875EXPORT_SYMBOL(dentry_update_name_case);
1876
1da177e4
LT
1877static void switch_names(struct dentry *dentry, struct dentry *target)
1878{
1879 if (dname_external(target)) {
1880 if (dname_external(dentry)) {
1881 /*
1882 * Both external: swap the pointers
1883 */
9a8d5bb4 1884 swap(target->d_name.name, dentry->d_name.name);
1da177e4
LT
1885 } else {
1886 /*
1887 * dentry:internal, target:external. Steal target's
1888 * storage and make target internal.
1889 */
321bcf92
BF
1890 memcpy(target->d_iname, dentry->d_name.name,
1891 dentry->d_name.len + 1);
1da177e4
LT
1892 dentry->d_name.name = target->d_name.name;
1893 target->d_name.name = target->d_iname;
1894 }
1895 } else {
1896 if (dname_external(dentry)) {
1897 /*
1898 * dentry:external, target:internal. Give dentry's
1899 * storage to target and make dentry internal
1900 */
1901 memcpy(dentry->d_iname, target->d_name.name,
1902 target->d_name.len + 1);
1903 target->d_name.name = dentry->d_name.name;
1904 dentry->d_name.name = dentry->d_iname;
1905 } else {
1906 /*
1907 * Both are internal. Just copy target to dentry
1908 */
1909 memcpy(dentry->d_iname, target->d_name.name,
1910 target->d_name.len + 1);
dc711ca3
AV
1911 dentry->d_name.len = target->d_name.len;
1912 return;
1da177e4
LT
1913 }
1914 }
9a8d5bb4 1915 swap(dentry->d_name.len, target->d_name.len);
1da177e4
LT
1916}
1917
2fd6b7f5
NP
1918static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
1919{
1920 /*
1921 * XXXX: do we really need to take target->d_lock?
1922 */
1923 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
1924 spin_lock(&target->d_parent->d_lock);
1925 else {
1926 if (d_ancestor(dentry->d_parent, target->d_parent)) {
1927 spin_lock(&dentry->d_parent->d_lock);
1928 spin_lock_nested(&target->d_parent->d_lock,
1929 DENTRY_D_LOCK_NESTED);
1930 } else {
1931 spin_lock(&target->d_parent->d_lock);
1932 spin_lock_nested(&dentry->d_parent->d_lock,
1933 DENTRY_D_LOCK_NESTED);
1934 }
1935 }
1936 if (target < dentry) {
1937 spin_lock_nested(&target->d_lock, 2);
1938 spin_lock_nested(&dentry->d_lock, 3);
1939 } else {
1940 spin_lock_nested(&dentry->d_lock, 2);
1941 spin_lock_nested(&target->d_lock, 3);
1942 }
1943}
1944
1945static void dentry_unlock_parents_for_move(struct dentry *dentry,
1946 struct dentry *target)
1947{
1948 if (target->d_parent != dentry->d_parent)
1949 spin_unlock(&dentry->d_parent->d_lock);
1950 if (target->d_parent != target)
1951 spin_unlock(&target->d_parent->d_lock);
1952}
1953
1da177e4 1954/*
2fd6b7f5
NP
1955 * When switching names, the actual string doesn't strictly have to
1956 * be preserved in the target - because we're dropping the target
1957 * anyway. As such, we can just do a simple memcpy() to copy over
1958 * the new name before we switch.
1959 *
1960 * Note that we have to be a lot more careful about getting the hash
1961 * switched - we have to switch the hash value properly even if it
1962 * then no longer matches the actual (corrupted) string of the target.
1963 * The hash value has to match the hash queue that the dentry is on..
1da177e4 1964 */
9eaef27b
TM
1965/*
1966 * d_move_locked - move a dentry
1da177e4
LT
1967 * @dentry: entry to move
1968 * @target: new dentry
1969 *
1970 * Update the dcache to reflect the move of a file name. Negative
1971 * dcache entries should not be moved in this way.
1972 */
9eaef27b 1973static void d_move_locked(struct dentry * dentry, struct dentry * target)
1da177e4 1974{
1da177e4
LT
1975 if (!dentry->d_inode)
1976 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1977
2fd6b7f5
NP
1978 BUG_ON(d_ancestor(dentry, target));
1979 BUG_ON(d_ancestor(target, dentry));
1980
1da177e4 1981 write_seqlock(&rename_lock);
2fd6b7f5
NP
1982
1983 dentry_lock_for_move(dentry, target);
1da177e4
LT
1984
1985 /* Move the dentry to the target hash queue, if on different bucket */
789680d1
NP
1986 spin_lock(&dcache_hash_lock);
1987 if (!d_unhashed(dentry))
1988 hlist_del_rcu(&dentry->d_hash);
1989 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1990 spin_unlock(&dcache_hash_lock);
1da177e4
LT
1991
1992 /* Unhash the target: dput() will then get rid of it */
1993 __d_drop(target);
1994
5160ee6f
ED
1995 list_del(&dentry->d_u.d_child);
1996 list_del(&target->d_u.d_child);
1da177e4
LT
1997
1998 /* Switch the names.. */
1999 switch_names(dentry, target);
9a8d5bb4 2000 swap(dentry->d_name.hash, target->d_name.hash);
1da177e4
LT
2001
2002 /* ... and switch the parents */
2003 if (IS_ROOT(dentry)) {
2004 dentry->d_parent = target->d_parent;
2005 target->d_parent = target;
5160ee6f 2006 INIT_LIST_HEAD(&target->d_u.d_child);
1da177e4 2007 } else {
9a8d5bb4 2008 swap(dentry->d_parent, target->d_parent);
1da177e4
LT
2009
2010 /* And add them back to the (new) parent lists */
5160ee6f 2011 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
1da177e4
LT
2012 }
2013
5160ee6f 2014 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2fd6b7f5
NP
2015
2016 dentry_unlock_parents_for_move(dentry, target);
1da177e4 2017 spin_unlock(&target->d_lock);
c32ccd87 2018 fsnotify_d_move(dentry);
1da177e4
LT
2019 spin_unlock(&dentry->d_lock);
2020 write_sequnlock(&rename_lock);
9eaef27b
TM
2021}
2022
2023/**
2024 * d_move - move a dentry
2025 * @dentry: entry to move
2026 * @target: new dentry
2027 *
2028 * Update the dcache to reflect the move of a file name. Negative
2029 * dcache entries should not be moved in this way.
2030 */
2031
2032void d_move(struct dentry * dentry, struct dentry * target)
2033{
2034 spin_lock(&dcache_lock);
2035 d_move_locked(dentry, target);
1da177e4
LT
2036 spin_unlock(&dcache_lock);
2037}
ec4f8605 2038EXPORT_SYMBOL(d_move);
1da177e4 2039
e2761a11
OH
2040/**
2041 * d_ancestor - search for an ancestor
2042 * @p1: ancestor dentry
2043 * @p2: child dentry
2044 *
2045 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2046 * an ancestor of p2, else NULL.
9eaef27b 2047 */
e2761a11 2048struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
9eaef27b
TM
2049{
2050 struct dentry *p;
2051
871c0067 2052 for (p = p2; !IS_ROOT(p); p = p->d_parent) {
9eaef27b 2053 if (p->d_parent == p1)
e2761a11 2054 return p;
9eaef27b 2055 }
e2761a11 2056 return NULL;
9eaef27b
TM
2057}
2058
2059/*
2060 * This helper attempts to cope with remotely renamed directories
2061 *
2062 * It assumes that the caller is already holding
2063 * dentry->d_parent->d_inode->i_mutex and the dcache_lock
2064 *
2065 * Note: If ever the locking in lock_rename() changes, then please
2066 * remember to update this too...
9eaef27b
TM
2067 */
2068static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
31f3e0b3 2069 __releases(dcache_lock)
9eaef27b
TM
2070{
2071 struct mutex *m1 = NULL, *m2 = NULL;
2072 struct dentry *ret;
2073
2074 /* If alias and dentry share a parent, then no extra locks required */
2075 if (alias->d_parent == dentry->d_parent)
2076 goto out_unalias;
2077
2078 /* Check for loops */
2079 ret = ERR_PTR(-ELOOP);
e2761a11 2080 if (d_ancestor(alias, dentry))
9eaef27b
TM
2081 goto out_err;
2082
2083 /* See lock_rename() */
2084 ret = ERR_PTR(-EBUSY);
2085 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2086 goto out_err;
2087 m1 = &dentry->d_sb->s_vfs_rename_mutex;
2088 if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
2089 goto out_err;
2090 m2 = &alias->d_parent->d_inode->i_mutex;
2091out_unalias:
2092 d_move_locked(alias, dentry);
2093 ret = alias;
2094out_err:
2095 spin_unlock(&dcache_lock);
2096 if (m2)
2097 mutex_unlock(m2);
2098 if (m1)
2099 mutex_unlock(m1);
2100 return ret;
2101}
2102
770bfad8
DH
2103/*
2104 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
2105 * named dentry in place of the dentry to be replaced.
2fd6b7f5 2106 * returns with anon->d_lock held!
770bfad8
DH
2107 */
2108static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
2109{
2110 struct dentry *dparent, *aparent;
2111
2fd6b7f5 2112 dentry_lock_for_move(anon, dentry);
770bfad8
DH
2113
2114 dparent = dentry->d_parent;
2115 aparent = anon->d_parent;
2116
2fd6b7f5
NP
2117 switch_names(dentry, anon);
2118 swap(dentry->d_name.hash, anon->d_name.hash);
2119
770bfad8
DH
2120 dentry->d_parent = (aparent == anon) ? dentry : aparent;
2121 list_del(&dentry->d_u.d_child);
2122 if (!IS_ROOT(dentry))
2123 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2124 else
2125 INIT_LIST_HEAD(&dentry->d_u.d_child);
2126
2127 anon->d_parent = (dparent == dentry) ? anon : dparent;
2128 list_del(&anon->d_u.d_child);
2129 if (!IS_ROOT(anon))
2130 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
2131 else
2132 INIT_LIST_HEAD(&anon->d_u.d_child);
2133
2fd6b7f5
NP
2134 dentry_unlock_parents_for_move(anon, dentry);
2135 spin_unlock(&dentry->d_lock);
2136
2137 /* anon->d_lock still locked, returns locked */
770bfad8
DH
2138 anon->d_flags &= ~DCACHE_DISCONNECTED;
2139}
2140
2141/**
2142 * d_materialise_unique - introduce an inode into the tree
2143 * @dentry: candidate dentry
2144 * @inode: inode to bind to the dentry, to which aliases may be attached
2145 *
2146 * Introduces an dentry into the tree, substituting an extant disconnected
2147 * root directory alias in its place if there is one
2148 */
2149struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2150{
9eaef27b 2151 struct dentry *actual;
770bfad8
DH
2152
2153 BUG_ON(!d_unhashed(dentry));
2154
2155 spin_lock(&dcache_lock);
2156
2157 if (!inode) {
2158 actual = dentry;
360da900 2159 __d_instantiate(dentry, NULL);
770bfad8
DH
2160 goto found_lock;
2161 }
2162
9eaef27b
TM
2163 if (S_ISDIR(inode->i_mode)) {
2164 struct dentry *alias;
2165
2166 /* Does an aliased dentry already exist? */
2167 alias = __d_find_alias(inode, 0);
2168 if (alias) {
2169 actual = alias;
2170 /* Is this an anonymous mountpoint that we could splice
2171 * into our tree? */
2172 if (IS_ROOT(alias)) {
9eaef27b
TM
2173 __d_materialise_dentry(dentry, alias);
2174 __d_drop(alias);
2175 goto found;
2176 }
2177 /* Nope, but we must(!) avoid directory aliasing */
2178 actual = __d_unalias(dentry, alias);
2179 if (IS_ERR(actual))
2180 dput(alias);
2181 goto out_nolock;
2182 }
770bfad8
DH
2183 }
2184
2185 /* Add a unique reference */
2186 actual = __d_instantiate_unique(dentry, inode);
2187 if (!actual)
2188 actual = dentry;
2189 else if (unlikely(!d_unhashed(actual)))
2190 goto shouldnt_be_hashed;
2191
2192found_lock:
2193 spin_lock(&actual->d_lock);
2194found:
789680d1 2195 spin_lock(&dcache_hash_lock);
770bfad8 2196 _d_rehash(actual);
789680d1 2197 spin_unlock(&dcache_hash_lock);
770bfad8
DH
2198 spin_unlock(&actual->d_lock);
2199 spin_unlock(&dcache_lock);
9eaef27b 2200out_nolock:
770bfad8
DH
2201 if (actual == dentry) {
2202 security_d_instantiate(dentry, inode);
2203 return NULL;
2204 }
2205
2206 iput(inode);
2207 return actual;
2208
770bfad8
DH
2209shouldnt_be_hashed:
2210 spin_unlock(&dcache_lock);
2211 BUG();
770bfad8 2212}
ec4f8605 2213EXPORT_SYMBOL_GPL(d_materialise_unique);
770bfad8 2214
cdd16d02 2215static int prepend(char **buffer, int *buflen, const char *str, int namelen)
6092d048
RP
2216{
2217 *buflen -= namelen;
2218 if (*buflen < 0)
2219 return -ENAMETOOLONG;
2220 *buffer -= namelen;
2221 memcpy(*buffer, str, namelen);
2222 return 0;
2223}
2224
cdd16d02
MS
2225static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2226{
2227 return prepend(buffer, buflen, name->name, name->len);
2228}
2229
1da177e4 2230/**
f2eb6575
MS
2231 * Prepend path string to a buffer
2232 *
9d1bc601
MS
2233 * @path: the dentry/vfsmount to report
2234 * @root: root vfsmnt/dentry (may be modified by this function)
f2eb6575
MS
2235 * @buffer: pointer to the end of the buffer
2236 * @buflen: pointer to buffer length
552ce544 2237 *
f2eb6575 2238 * Caller holds the dcache_lock.
9d1bc601
MS
2239 *
2240 * If path is not reachable from the supplied root, then the value of
2241 * root is changed (without modifying refcounts).
1da177e4 2242 */
f2eb6575
MS
2243static int prepend_path(const struct path *path, struct path *root,
2244 char **buffer, int *buflen)
1da177e4 2245{
9d1bc601
MS
2246 struct dentry *dentry = path->dentry;
2247 struct vfsmount *vfsmnt = path->mnt;
f2eb6575
MS
2248 bool slash = false;
2249 int error = 0;
6092d048 2250
99b7db7b 2251 br_read_lock(vfsmount_lock);
f2eb6575 2252 while (dentry != root->dentry || vfsmnt != root->mnt) {
1da177e4
LT
2253 struct dentry * parent;
2254
1da177e4 2255 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
552ce544 2256 /* Global root? */
1da177e4 2257 if (vfsmnt->mnt_parent == vfsmnt) {
1da177e4
LT
2258 goto global_root;
2259 }
2260 dentry = vfsmnt->mnt_mountpoint;
2261 vfsmnt = vfsmnt->mnt_parent;
1da177e4
LT
2262 continue;
2263 }
2264 parent = dentry->d_parent;
2265 prefetch(parent);
f2eb6575
MS
2266 error = prepend_name(buffer, buflen, &dentry->d_name);
2267 if (!error)
2268 error = prepend(buffer, buflen, "/", 1);
2269 if (error)
2270 break;
2271
2272 slash = true;
1da177e4
LT
2273 dentry = parent;
2274 }
2275
be285c71 2276out:
f2eb6575
MS
2277 if (!error && !slash)
2278 error = prepend(buffer, buflen, "/", 1);
2279
99b7db7b 2280 br_read_unlock(vfsmount_lock);
f2eb6575 2281 return error;
1da177e4
LT
2282
2283global_root:
98dc568b
MS
2284 /*
2285 * Filesystems needing to implement special "root names"
2286 * should do so with ->d_dname()
2287 */
2288 if (IS_ROOT(dentry) &&
2289 (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
2290 WARN(1, "Root dentry has weird name <%.*s>\n",
2291 (int) dentry->d_name.len, dentry->d_name.name);
2292 }
9d1bc601
MS
2293 root->mnt = vfsmnt;
2294 root->dentry = dentry;
be285c71 2295 goto out;
f2eb6575 2296}
be285c71 2297
f2eb6575
MS
2298/**
2299 * __d_path - return the path of a dentry
2300 * @path: the dentry/vfsmount to report
2301 * @root: root vfsmnt/dentry (may be modified by this function)
cd956a1c 2302 * @buf: buffer to return value in
f2eb6575
MS
2303 * @buflen: buffer length
2304 *
ffd1f4ed 2305 * Convert a dentry into an ASCII path name.
f2eb6575
MS
2306 *
2307 * Returns a pointer into the buffer or an error code if the
2308 * path was too long.
2309 *
be148247 2310 * "buflen" should be positive.
f2eb6575
MS
2311 *
2312 * If path is not reachable from the supplied root, then the value of
2313 * root is changed (without modifying refcounts).
2314 */
2315char *__d_path(const struct path *path, struct path *root,
2316 char *buf, int buflen)
2317{
2318 char *res = buf + buflen;
2319 int error;
2320
2321 prepend(&res, &buflen, "\0", 1);
be148247 2322 spin_lock(&dcache_lock);
f2eb6575 2323 error = prepend_path(path, root, &res, &buflen);
be148247
CH
2324 spin_unlock(&dcache_lock);
2325
f2eb6575
MS
2326 if (error)
2327 return ERR_PTR(error);
f2eb6575 2328 return res;
1da177e4
LT
2329}
2330
ffd1f4ed
MS
2331/*
2332 * same as __d_path but appends "(deleted)" for unlinked files.
2333 */
2334static int path_with_deleted(const struct path *path, struct path *root,
2335 char **buf, int *buflen)
2336{
2337 prepend(buf, buflen, "\0", 1);
2338 if (d_unlinked(path->dentry)) {
2339 int error = prepend(buf, buflen, " (deleted)", 10);
2340 if (error)
2341 return error;
2342 }
2343
2344 return prepend_path(path, root, buf, buflen);
2345}
2346
8df9d1a4
MS
2347static int prepend_unreachable(char **buffer, int *buflen)
2348{
2349 return prepend(buffer, buflen, "(unreachable)", 13);
2350}
2351
a03a8a70
JB
2352/**
2353 * d_path - return the path of a dentry
cf28b486 2354 * @path: path to report
a03a8a70
JB
2355 * @buf: buffer to return value in
2356 * @buflen: buffer length
2357 *
2358 * Convert a dentry into an ASCII path name. If the entry has been deleted
2359 * the string " (deleted)" is appended. Note that this is ambiguous.
2360 *
52afeefb
AV
2361 * Returns a pointer into the buffer or an error code if the path was
2362 * too long. Note: Callers should use the returned pointer, not the passed
2363 * in buffer, to use the name! The implementation often starts at an offset
2364 * into the buffer, and may leave 0 bytes at the start.
a03a8a70 2365 *
31f3e0b3 2366 * "buflen" should be positive.
a03a8a70 2367 */
20d4fdc1 2368char *d_path(const struct path *path, char *buf, int buflen)
1da177e4 2369{
ffd1f4ed 2370 char *res = buf + buflen;
6ac08c39 2371 struct path root;
9d1bc601 2372 struct path tmp;
ffd1f4ed 2373 int error;
1da177e4 2374
c23fbb6b
ED
2375 /*
2376 * We have various synthetic filesystems that never get mounted. On
2377 * these filesystems dentries are never used for lookup purposes, and
2378 * thus don't need to be hashed. They also don't need a name until a
2379 * user wants to identify the object in /proc/pid/fd/. The little hack
2380 * below allows us to generate a name for these objects on demand:
2381 */
cf28b486
JB
2382 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2383 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
c23fbb6b 2384
f7ad3c6b 2385 get_fs_root(current->fs, &root);
552ce544 2386 spin_lock(&dcache_lock);
9d1bc601 2387 tmp = root;
ffd1f4ed
MS
2388 error = path_with_deleted(path, &tmp, &res, &buflen);
2389 if (error)
2390 res = ERR_PTR(error);
552ce544 2391 spin_unlock(&dcache_lock);
6ac08c39 2392 path_put(&root);
1da177e4
LT
2393 return res;
2394}
ec4f8605 2395EXPORT_SYMBOL(d_path);
1da177e4 2396
8df9d1a4
MS
2397/**
2398 * d_path_with_unreachable - return the path of a dentry
2399 * @path: path to report
2400 * @buf: buffer to return value in
2401 * @buflen: buffer length
2402 *
2403 * The difference from d_path() is that this prepends "(unreachable)"
2404 * to paths which are unreachable from the current process' root.
2405 */
2406char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2407{
2408 char *res = buf + buflen;
2409 struct path root;
2410 struct path tmp;
2411 int error;
2412
2413 if (path->dentry->d_op && path->dentry->d_op->d_dname)
2414 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2415
2416 get_fs_root(current->fs, &root);
2417 spin_lock(&dcache_lock);
2418 tmp = root;
2419 error = path_with_deleted(path, &tmp, &res, &buflen);
2420 if (!error && !path_equal(&tmp, &root))
2421 error = prepend_unreachable(&res, &buflen);
2422 spin_unlock(&dcache_lock);
2423 path_put(&root);
2424 if (error)
2425 res = ERR_PTR(error);
2426
2427 return res;
2428}
2429
c23fbb6b
ED
2430/*
2431 * Helper function for dentry_operations.d_dname() members
2432 */
2433char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2434 const char *fmt, ...)
2435{
2436 va_list args;
2437 char temp[64];
2438 int sz;
2439
2440 va_start(args, fmt);
2441 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
2442 va_end(args);
2443
2444 if (sz > sizeof(temp) || sz > buflen)
2445 return ERR_PTR(-ENAMETOOLONG);
2446
2447 buffer += buflen - sz;
2448 return memcpy(buffer, temp, sz);
2449}
2450
6092d048
RP
2451/*
2452 * Write full pathname from the root of the filesystem into the buffer.
2453 */
ec2447c2 2454static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
6092d048
RP
2455{
2456 char *end = buf + buflen;
2457 char *retval;
2458
6092d048 2459 prepend(&end, &buflen, "\0", 1);
6092d048
RP
2460 if (buflen < 1)
2461 goto Elong;
2462 /* Get '/' right */
2463 retval = end-1;
2464 *retval = '/';
2465
cdd16d02
MS
2466 while (!IS_ROOT(dentry)) {
2467 struct dentry *parent = dentry->d_parent;
6092d048 2468
6092d048 2469 prefetch(parent);
cdd16d02 2470 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
6092d048
RP
2471 (prepend(&end, &buflen, "/", 1) != 0))
2472 goto Elong;
2473
2474 retval = end;
2475 dentry = parent;
2476 }
c103135c
AV
2477 return retval;
2478Elong:
2479 return ERR_PTR(-ENAMETOOLONG);
2480}
ec2447c2
NP
2481
2482char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2483{
2484 char *retval;
2485
2486 spin_lock(&dcache_lock);
2487 retval = __dentry_path(dentry, buf, buflen);
2488 spin_unlock(&dcache_lock);
2489
2490 return retval;
2491}
2492EXPORT_SYMBOL(dentry_path_raw);
c103135c
AV
2493
2494char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2495{
2496 char *p = NULL;
2497 char *retval;
2498
2499 spin_lock(&dcache_lock);
2500 if (d_unlinked(dentry)) {
2501 p = buf + buflen;
2502 if (prepend(&p, &buflen, "//deleted", 10) != 0)
2503 goto Elong;
2504 buflen++;
2505 }
2506 retval = __dentry_path(dentry, buf, buflen);
6092d048 2507 spin_unlock(&dcache_lock);
c103135c
AV
2508 if (!IS_ERR(retval) && p)
2509 *p = '/'; /* restore '/' overriden with '\0' */
6092d048
RP
2510 return retval;
2511Elong:
2512 spin_unlock(&dcache_lock);
2513 return ERR_PTR(-ENAMETOOLONG);
2514}
2515
1da177e4
LT
2516/*
2517 * NOTE! The user-level library version returns a
2518 * character pointer. The kernel system call just
2519 * returns the length of the buffer filled (which
2520 * includes the ending '\0' character), or a negative
2521 * error value. So libc would do something like
2522 *
2523 * char *getcwd(char * buf, size_t size)
2524 * {
2525 * int retval;
2526 *
2527 * retval = sys_getcwd(buf, size);
2528 * if (retval >= 0)
2529 * return buf;
2530 * errno = -retval;
2531 * return NULL;
2532 * }
2533 */
3cdad428 2534SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
1da177e4 2535{
552ce544 2536 int error;
6ac08c39 2537 struct path pwd, root;
552ce544 2538 char *page = (char *) __get_free_page(GFP_USER);
1da177e4
LT
2539
2540 if (!page)
2541 return -ENOMEM;
2542
f7ad3c6b 2543 get_fs_root_and_pwd(current->fs, &root, &pwd);
1da177e4 2544
552ce544 2545 error = -ENOENT;
552ce544 2546 spin_lock(&dcache_lock);
f3da392e 2547 if (!d_unlinked(pwd.dentry)) {
552ce544 2548 unsigned long len;
9d1bc601 2549 struct path tmp = root;
8df9d1a4
MS
2550 char *cwd = page + PAGE_SIZE;
2551 int buflen = PAGE_SIZE;
1da177e4 2552
8df9d1a4
MS
2553 prepend(&cwd, &buflen, "\0", 1);
2554 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
552ce544
LT
2555 spin_unlock(&dcache_lock);
2556
8df9d1a4 2557 if (error)
552ce544
LT
2558 goto out;
2559
8df9d1a4
MS
2560 /* Unreachable from current root */
2561 if (!path_equal(&tmp, &root)) {
2562 error = prepend_unreachable(&cwd, &buflen);
2563 if (error)
2564 goto out;
2565 }
2566
552ce544
LT
2567 error = -ERANGE;
2568 len = PAGE_SIZE + page - cwd;
2569 if (len <= size) {
2570 error = len;
2571 if (copy_to_user(buf, cwd, len))
2572 error = -EFAULT;
2573 }
2574 } else
2575 spin_unlock(&dcache_lock);
1da177e4
LT
2576
2577out:
6ac08c39
JB
2578 path_put(&pwd);
2579 path_put(&root);
1da177e4
LT
2580 free_page((unsigned long) page);
2581 return error;
2582}
2583
2584/*
2585 * Test whether new_dentry is a subdirectory of old_dentry.
2586 *
2587 * Trivially implemented using the dcache structure
2588 */
2589
2590/**
2591 * is_subdir - is new dentry a subdirectory of old_dentry
2592 * @new_dentry: new dentry
2593 * @old_dentry: old dentry
2594 *
2595 * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
2596 * Returns 0 otherwise.
2597 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2598 */
2599
e2761a11 2600int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
1da177e4
LT
2601{
2602 int result;
1da177e4
LT
2603 unsigned long seq;
2604
e2761a11
OH
2605 if (new_dentry == old_dentry)
2606 return 1;
2607
2608 /*
2609 * Need rcu_readlock to protect against the d_parent trashing
2610 * due to d_move
1da177e4
LT
2611 */
2612 rcu_read_lock();
e2761a11 2613 do {
1da177e4 2614 /* for restarting inner loop in case of seq retry */
1da177e4 2615 seq = read_seqbegin(&rename_lock);
e2761a11 2616 if (d_ancestor(old_dentry, new_dentry))
1da177e4 2617 result = 1;
e2761a11
OH
2618 else
2619 result = 0;
1da177e4
LT
2620 } while (read_seqretry(&rename_lock, seq));
2621 rcu_read_unlock();
2622
2623 return result;
2624}
2625
2096f759
AV
2626int path_is_under(struct path *path1, struct path *path2)
2627{
2628 struct vfsmount *mnt = path1->mnt;
2629 struct dentry *dentry = path1->dentry;
2630 int res;
99b7db7b
NP
2631
2632 br_read_lock(vfsmount_lock);
2096f759
AV
2633 if (mnt != path2->mnt) {
2634 for (;;) {
2635 if (mnt->mnt_parent == mnt) {
99b7db7b 2636 br_read_unlock(vfsmount_lock);
2096f759
AV
2637 return 0;
2638 }
2639 if (mnt->mnt_parent == path2->mnt)
2640 break;
2641 mnt = mnt->mnt_parent;
2642 }
2643 dentry = mnt->mnt_mountpoint;
2644 }
2645 res = is_subdir(dentry, path2->dentry);
99b7db7b 2646 br_read_unlock(vfsmount_lock);
2096f759
AV
2647 return res;
2648}
2649EXPORT_SYMBOL(path_is_under);
2650
1da177e4
LT
2651void d_genocide(struct dentry *root)
2652{
2653 struct dentry *this_parent = root;
2654 struct list_head *next;
2655
2656 spin_lock(&dcache_lock);
2fd6b7f5 2657 spin_lock(&this_parent->d_lock);
1da177e4
LT
2658repeat:
2659 next = this_parent->d_subdirs.next;
2660resume:
2661 while (next != &this_parent->d_subdirs) {
2662 struct list_head *tmp = next;
5160ee6f 2663 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
1da177e4 2664 next = tmp->next;
da502956
NP
2665 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2666 if (d_unhashed(dentry) || !dentry->d_inode) {
2667 spin_unlock(&dentry->d_lock);
1da177e4 2668 continue;
da502956 2669 }
1da177e4 2670 if (!list_empty(&dentry->d_subdirs)) {
2fd6b7f5
NP
2671 spin_unlock(&this_parent->d_lock);
2672 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1da177e4 2673 this_parent = dentry;
2fd6b7f5 2674 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1da177e4
LT
2675 goto repeat;
2676 }
b7ab39f6
NP
2677 dentry->d_count--;
2678 spin_unlock(&dentry->d_lock);
1da177e4
LT
2679 }
2680 if (this_parent != root) {
5160ee6f 2681 next = this_parent->d_u.d_child.next;
b7ab39f6
NP
2682 this_parent->d_count--;
2683 spin_unlock(&this_parent->d_lock);
1da177e4 2684 this_parent = this_parent->d_parent;
2fd6b7f5 2685 spin_lock(&this_parent->d_lock);
1da177e4
LT
2686 goto resume;
2687 }
2fd6b7f5 2688 spin_unlock(&this_parent->d_lock);
1da177e4
LT
2689 spin_unlock(&dcache_lock);
2690}
2691
2692/**
2693 * find_inode_number - check for dentry with name
2694 * @dir: directory to check
2695 * @name: Name to find.
2696 *
2697 * Check whether a dentry already exists for the given name,
2698 * and return the inode number if it has an inode. Otherwise
2699 * 0 is returned.
2700 *
2701 * This routine is used to post-process directory listings for
2702 * filesystems using synthetic inode numbers, and is necessary
2703 * to keep getcwd() working.
2704 */
2705
2706ino_t find_inode_number(struct dentry *dir, struct qstr *name)
2707{
2708 struct dentry * dentry;
2709 ino_t ino = 0;
2710
3e7e241f
EB
2711 dentry = d_hash_and_lookup(dir, name);
2712 if (dentry) {
1da177e4
LT
2713 if (dentry->d_inode)
2714 ino = dentry->d_inode->i_ino;
2715 dput(dentry);
2716 }
1da177e4
LT
2717 return ino;
2718}
ec4f8605 2719EXPORT_SYMBOL(find_inode_number);
1da177e4
LT
2720
2721static __initdata unsigned long dhash_entries;
2722static int __init set_dhash_entries(char *str)
2723{
2724 if (!str)
2725 return 0;
2726 dhash_entries = simple_strtoul(str, &str, 0);
2727 return 1;
2728}
2729__setup("dhash_entries=", set_dhash_entries);
2730
2731static void __init dcache_init_early(void)
2732{
2733 int loop;
2734
2735 /* If hashes are distributed across NUMA nodes, defer
2736 * hash allocation until vmalloc space is available.
2737 */
2738 if (hashdist)
2739 return;
2740
2741 dentry_hashtable =
2742 alloc_large_system_hash("Dentry cache",
2743 sizeof(struct hlist_head),
2744 dhash_entries,
2745 13,
2746 HASH_EARLY,
2747 &d_hash_shift,
2748 &d_hash_mask,
2749 0);
2750
2751 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2752 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2753}
2754
74bf17cf 2755static void __init dcache_init(void)
1da177e4
LT
2756{
2757 int loop;
2758
2759 /*
2760 * A constructor could be added for stable state like the lists,
2761 * but it is probably not worth it because of the cache nature
2762 * of the dcache.
2763 */
0a31bd5f
CL
2764 dentry_cache = KMEM_CACHE(dentry,
2765 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
1da177e4 2766
8e1f936b 2767 register_shrinker(&dcache_shrinker);
1da177e4
LT
2768
2769 /* Hash may have been set up in dcache_init_early */
2770 if (!hashdist)
2771 return;
2772
2773 dentry_hashtable =
2774 alloc_large_system_hash("Dentry cache",
2775 sizeof(struct hlist_head),
2776 dhash_entries,
2777 13,
2778 0,
2779 &d_hash_shift,
2780 &d_hash_mask,
2781 0);
2782
2783 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2784 INIT_HLIST_HEAD(&dentry_hashtable[loop]);
2785}
2786
2787/* SLAB cache for __getname() consumers */
e18b890b 2788struct kmem_cache *names_cachep __read_mostly;
ec4f8605 2789EXPORT_SYMBOL(names_cachep);
1da177e4 2790
1da177e4
LT
2791EXPORT_SYMBOL(d_genocide);
2792
1da177e4
LT
2793void __init vfs_caches_init_early(void)
2794{
2795 dcache_init_early();
2796 inode_init_early();
2797}
2798
2799void __init vfs_caches_init(unsigned long mempages)
2800{
2801 unsigned long reserve;
2802
2803 /* Base hash sizes on available memory, with a reserve equal to
2804 150% of current kernel size */
2805
2806 reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
2807 mempages -= reserve;
2808
2809 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
20c2df83 2810 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1da177e4 2811
74bf17cf
DC
2812 dcache_init();
2813 inode_init();
1da177e4 2814 files_init(mempages);
74bf17cf 2815 mnt_init();
1da177e4
LT
2816 bdev_cache_init();
2817 chrdev_init();
2818}