fs, fscrypt: clear DCACHE_ENCRYPTED_NAME when unaliasing directory
[linux-2.6-block.git] / fs / dcache.c
1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16
17 #include <linux/ratelimit.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/fscrypt.h>
22 #include <linux/fsnotify.h>
23 #include <linux/slab.h>
24 #include <linux/init.h>
25 #include <linux/hash.h>
26 #include <linux/cache.h>
27 #include <linux/export.h>
28 #include <linux/security.h>
29 #include <linux/seqlock.h>
30 #include <linux/memblock.h>
31 #include <linux/bit_spinlock.h>
32 #include <linux/rculist_bl.h>
33 #include <linux/list_lru.h>
34 #include "internal.h"
35 #include "mount.h"
36
37 /*
38  * Usage:
39  * dcache->d_inode->i_lock protects:
40  *   - i_dentry, d_u.d_alias, d_inode of aliases
41  * dcache_hash_bucket lock protects:
42  *   - the dcache hash table
43  * s_roots bl list spinlock protects:
44  *   - the s_roots list (see __d_drop)
45  * dentry->d_sb->s_dentry_lru_lock protects:
46  *   - the dcache lru lists and counters
47  * d_lock protects:
48  *   - d_flags
49  *   - d_name
50  *   - d_lru
51  *   - d_count
52  *   - d_unhashed()
53  *   - d_parent and d_subdirs
54  *   - childrens' d_child and d_parent
55  *   - d_u.d_alias, d_inode
56  *
57  * Ordering:
58  * dentry->d_inode->i_lock
59  *   dentry->d_lock
60  *     dentry->d_sb->s_dentry_lru_lock
61  *     dcache_hash_bucket lock
62  *     s_roots lock
63  *
64  * If there is an ancestor relationship:
65  * dentry->d_parent->...->d_parent->d_lock
66  *   ...
67  *     dentry->d_parent->d_lock
68  *       dentry->d_lock
69  *
70  * If no ancestor relationship:
71  * arbitrary, since it's serialized on rename_lock
72  */
73 int sysctl_vfs_cache_pressure __read_mostly = 100;
74 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
75
76 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
77
78 EXPORT_SYMBOL(rename_lock);
79
80 static struct kmem_cache *dentry_cache __read_mostly;
81
82 const struct qstr empty_name = QSTR_INIT("", 0);
83 EXPORT_SYMBOL(empty_name);
84 const struct qstr slash_name = QSTR_INIT("/", 1);
85 EXPORT_SYMBOL(slash_name);
86
87 /*
88  * This is the single most critical data structure when it comes
89  * to the dcache: the hashtable for lookups. Somebody should try
90  * to make this good - I've just made it work.
91  *
92  * This hash-function tries to avoid losing too many bits of hash
93  * information, yet avoid using a prime hash-size or similar.
94  */
95
96 static unsigned int d_hash_shift __read_mostly;
97
98 static struct hlist_bl_head *dentry_hashtable __read_mostly;
99
100 static inline struct hlist_bl_head *d_hash(unsigned int hash)
101 {
102         return dentry_hashtable + (hash >> d_hash_shift);
103 }
104
105 #define IN_LOOKUP_SHIFT 10
106 static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
107
108 static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
109                                         unsigned int hash)
110 {
111         hash += (unsigned long) parent / L1_CACHE_BYTES;
112         return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
113 }
114
115
116 /* Statistics gathering. */
117 struct dentry_stat_t dentry_stat = {
118         .age_limit = 45,
119 };
120
121 static DEFINE_PER_CPU(long, nr_dentry);
122 static DEFINE_PER_CPU(long, nr_dentry_unused);
123 static DEFINE_PER_CPU(long, nr_dentry_negative);
124
125 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
126
127 /*
128  * Here we resort to our own counters instead of using generic per-cpu counters
129  * for consistency with what the vfs inode code does. We are expected to harvest
130  * better code and performance by having our own specialized counters.
131  *
132  * Please note that the loop is done over all possible CPUs, not over all online
133  * CPUs. The reason for this is that we don't want to play games with CPUs going
134  * on and off. If one of them goes off, we will just keep their counters.
135  *
136  * glommer: See cffbc8a for details, and if you ever intend to change this,
137  * please update all vfs counters to match.
138  */
139 static long get_nr_dentry(void)
140 {
141         int i;
142         long sum = 0;
143         for_each_possible_cpu(i)
144                 sum += per_cpu(nr_dentry, i);
145         return sum < 0 ? 0 : sum;
146 }
147
148 static long get_nr_dentry_unused(void)
149 {
150         int i;
151         long sum = 0;
152         for_each_possible_cpu(i)
153                 sum += per_cpu(nr_dentry_unused, i);
154         return sum < 0 ? 0 : sum;
155 }
156
157 static long get_nr_dentry_negative(void)
158 {
159         int i;
160         long sum = 0;
161
162         for_each_possible_cpu(i)
163                 sum += per_cpu(nr_dentry_negative, i);
164         return sum < 0 ? 0 : sum;
165 }
166
167 int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
168                    size_t *lenp, loff_t *ppos)
169 {
170         dentry_stat.nr_dentry = get_nr_dentry();
171         dentry_stat.nr_unused = get_nr_dentry_unused();
172         dentry_stat.nr_negative = get_nr_dentry_negative();
173         return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
174 }
175 #endif
176
177 /*
178  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
179  * The strings are both count bytes long, and count is non-zero.
180  */
181 #ifdef CONFIG_DCACHE_WORD_ACCESS
182
183 #include <asm/word-at-a-time.h>
184 /*
185  * NOTE! 'cs' and 'scount' come from a dentry, so it has a
186  * aligned allocation for this particular component. We don't
187  * strictly need the load_unaligned_zeropad() safety, but it
188  * doesn't hurt either.
189  *
190  * In contrast, 'ct' and 'tcount' can be from a pathname, and do
191  * need the careful unaligned handling.
192  */
193 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
194 {
195         unsigned long a,b,mask;
196
197         for (;;) {
198                 a = read_word_at_a_time(cs);
199                 b = load_unaligned_zeropad(ct);
200                 if (tcount < sizeof(unsigned long))
201                         break;
202                 if (unlikely(a != b))
203                         return 1;
204                 cs += sizeof(unsigned long);
205                 ct += sizeof(unsigned long);
206                 tcount -= sizeof(unsigned long);
207                 if (!tcount)
208                         return 0;
209         }
210         mask = bytemask_from_count(tcount);
211         return unlikely(!!((a ^ b) & mask));
212 }
213
214 #else
215
216 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
217 {
218         do {
219                 if (*cs != *ct)
220                         return 1;
221                 cs++;
222                 ct++;
223                 tcount--;
224         } while (tcount);
225         return 0;
226 }
227
228 #endif
229
230 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
231 {
232         /*
233          * Be careful about RCU walk racing with rename:
234          * use 'READ_ONCE' to fetch the name pointer.
235          *
236          * NOTE! Even if a rename will mean that the length
237          * was not loaded atomically, we don't care. The
238          * RCU walk will check the sequence count eventually,
239          * and catch it. And we won't overrun the buffer,
240          * because we're reading the name pointer atomically,
241          * and a dentry name is guaranteed to be properly
242          * terminated with a NUL byte.
243          *
244          * End result: even if 'len' is wrong, we'll exit
245          * early because the data cannot match (there can
246          * be no NUL in the ct/tcount data)
247          */
248         const unsigned char *cs = READ_ONCE(dentry->d_name.name);
249
250         return dentry_string_cmp(cs, ct, tcount);
251 }
252
253 struct external_name {
254         union {
255                 atomic_t count;
256                 struct rcu_head head;
257         } u;
258         unsigned char name[];
259 };
260
261 static inline struct external_name *external_name(struct dentry *dentry)
262 {
263         return container_of(dentry->d_name.name, struct external_name, name[0]);
264 }
265
266 static void __d_free(struct rcu_head *head)
267 {
268         struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
269
270         kmem_cache_free(dentry_cache, dentry); 
271 }
272
273 static void __d_free_external(struct rcu_head *head)
274 {
275         struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
276         kfree(external_name(dentry));
277         kmem_cache_free(dentry_cache, dentry);
278 }
279
280 static inline int dname_external(const struct dentry *dentry)
281 {
282         return dentry->d_name.name != dentry->d_iname;
283 }
284
285 void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
286 {
287         spin_lock(&dentry->d_lock);
288         if (unlikely(dname_external(dentry))) {
289                 struct external_name *p = external_name(dentry);
290                 atomic_inc(&p->u.count);
291                 spin_unlock(&dentry->d_lock);
292                 name->name = p->name;
293         } else {
294                 memcpy(name->inline_name, dentry->d_iname,
295                        dentry->d_name.len + 1);
296                 spin_unlock(&dentry->d_lock);
297                 name->name = name->inline_name;
298         }
299 }
300 EXPORT_SYMBOL(take_dentry_name_snapshot);
301
302 void release_dentry_name_snapshot(struct name_snapshot *name)
303 {
304         if (unlikely(name->name != name->inline_name)) {
305                 struct external_name *p;
306                 p = container_of(name->name, struct external_name, name[0]);
307                 if (unlikely(atomic_dec_and_test(&p->u.count)))
308                         kfree_rcu(p, u.head);
309         }
310 }
311 EXPORT_SYMBOL(release_dentry_name_snapshot);
312
313 static inline void __d_set_inode_and_type(struct dentry *dentry,
314                                           struct inode *inode,
315                                           unsigned type_flags)
316 {
317         unsigned flags;
318
319         dentry->d_inode = inode;
320         flags = READ_ONCE(dentry->d_flags);
321         flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
322         flags |= type_flags;
323         WRITE_ONCE(dentry->d_flags, flags);
324 }
325
326 static inline void __d_clear_type_and_inode(struct dentry *dentry)
327 {
328         unsigned flags = READ_ONCE(dentry->d_flags);
329
330         flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
331         WRITE_ONCE(dentry->d_flags, flags);
332         dentry->d_inode = NULL;
333         if (dentry->d_flags & DCACHE_LRU_LIST)
334                 this_cpu_inc(nr_dentry_negative);
335 }
336
337 static void dentry_free(struct dentry *dentry)
338 {
339         WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
340         if (unlikely(dname_external(dentry))) {
341                 struct external_name *p = external_name(dentry);
342                 if (likely(atomic_dec_and_test(&p->u.count))) {
343                         call_rcu(&dentry->d_u.d_rcu, __d_free_external);
344                         return;
345                 }
346         }
347         /* if dentry was never visible to RCU, immediate free is OK */
348         if (!(dentry->d_flags & DCACHE_RCUACCESS))
349                 __d_free(&dentry->d_u.d_rcu);
350         else
351                 call_rcu(&dentry->d_u.d_rcu, __d_free);
352 }
353
354 /*
355  * Release the dentry's inode, using the filesystem
356  * d_iput() operation if defined.
357  */
358 static void dentry_unlink_inode(struct dentry * dentry)
359         __releases(dentry->d_lock)
360         __releases(dentry->d_inode->i_lock)
361 {
362         struct inode *inode = dentry->d_inode;
363
364         raw_write_seqcount_begin(&dentry->d_seq);
365         __d_clear_type_and_inode(dentry);
366         hlist_del_init(&dentry->d_u.d_alias);
367         raw_write_seqcount_end(&dentry->d_seq);
368         spin_unlock(&dentry->d_lock);
369         spin_unlock(&inode->i_lock);
370         if (!inode->i_nlink)
371                 fsnotify_inoderemove(inode);
372         if (dentry->d_op && dentry->d_op->d_iput)
373                 dentry->d_op->d_iput(dentry, inode);
374         else
375                 iput(inode);
376 }
377
378 /*
379  * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
380  * is in use - which includes both the "real" per-superblock
381  * LRU list _and_ the DCACHE_SHRINK_LIST use.
382  *
383  * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
384  * on the shrink list (ie not on the superblock LRU list).
385  *
386  * The per-cpu "nr_dentry_unused" counters are updated with
387  * the DCACHE_LRU_LIST bit.
388  *
389  * The per-cpu "nr_dentry_negative" counters are only updated
390  * when deleted from or added to the per-superblock LRU list, not
391  * from/to the shrink list. That is to avoid an unneeded dec/inc
392  * pair when moving from LRU to shrink list in select_collect().
393  *
394  * These helper functions make sure we always follow the
395  * rules. d_lock must be held by the caller.
396  */
397 #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
398 static void d_lru_add(struct dentry *dentry)
399 {
400         D_FLAG_VERIFY(dentry, 0);
401         dentry->d_flags |= DCACHE_LRU_LIST;
402         this_cpu_inc(nr_dentry_unused);
403         if (d_is_negative(dentry))
404                 this_cpu_inc(nr_dentry_negative);
405         WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
406 }
407
408 static void d_lru_del(struct dentry *dentry)
409 {
410         D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
411         dentry->d_flags &= ~DCACHE_LRU_LIST;
412         this_cpu_dec(nr_dentry_unused);
413         if (d_is_negative(dentry))
414                 this_cpu_dec(nr_dentry_negative);
415         WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
416 }
417
418 static void d_shrink_del(struct dentry *dentry)
419 {
420         D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
421         list_del_init(&dentry->d_lru);
422         dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
423         this_cpu_dec(nr_dentry_unused);
424 }
425
426 static void d_shrink_add(struct dentry *dentry, struct list_head *list)
427 {
428         D_FLAG_VERIFY(dentry, 0);
429         list_add(&dentry->d_lru, list);
430         dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
431         this_cpu_inc(nr_dentry_unused);
432 }
433
434 /*
435  * These can only be called under the global LRU lock, ie during the
436  * callback for freeing the LRU list. "isolate" removes it from the
437  * LRU lists entirely, while shrink_move moves it to the indicated
438  * private list.
439  */
440 static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
441 {
442         D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
443         dentry->d_flags &= ~DCACHE_LRU_LIST;
444         this_cpu_dec(nr_dentry_unused);
445         if (d_is_negative(dentry))
446                 this_cpu_dec(nr_dentry_negative);
447         list_lru_isolate(lru, &dentry->d_lru);
448 }
449
450 static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
451                               struct list_head *list)
452 {
453         D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
454         dentry->d_flags |= DCACHE_SHRINK_LIST;
455         if (d_is_negative(dentry))
456                 this_cpu_dec(nr_dentry_negative);
457         list_lru_isolate_move(lru, &dentry->d_lru, list);
458 }
459
460 /**
461  * d_drop - drop a dentry
462  * @dentry: dentry to drop
463  *
464  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
465  * be found through a VFS lookup any more. Note that this is different from
466  * deleting the dentry - d_delete will try to mark the dentry negative if
467  * possible, giving a successful _negative_ lookup, while d_drop will
468  * just make the cache lookup fail.
469  *
470  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
471  * reason (NFS timeouts or autofs deletes).
472  *
473  * __d_drop requires dentry->d_lock
474  * ___d_drop doesn't mark dentry as "unhashed"
475  *   (dentry->d_hash.pprev will be LIST_POISON2, not NULL).
476  */
477 static void ___d_drop(struct dentry *dentry)
478 {
479         struct hlist_bl_head *b;
480         /*
481          * Hashed dentries are normally on the dentry hashtable,
482          * with the exception of those newly allocated by
483          * d_obtain_root, which are always IS_ROOT:
484          */
485         if (unlikely(IS_ROOT(dentry)))
486                 b = &dentry->d_sb->s_roots;
487         else
488                 b = d_hash(dentry->d_name.hash);
489
490         hlist_bl_lock(b);
491         __hlist_bl_del(&dentry->d_hash);
492         hlist_bl_unlock(b);
493 }
494
495 void __d_drop(struct dentry *dentry)
496 {
497         if (!d_unhashed(dentry)) {
498                 ___d_drop(dentry);
499                 dentry->d_hash.pprev = NULL;
500                 write_seqcount_invalidate(&dentry->d_seq);
501         }
502 }
503 EXPORT_SYMBOL(__d_drop);
504
505 void d_drop(struct dentry *dentry)
506 {
507         spin_lock(&dentry->d_lock);
508         __d_drop(dentry);
509         spin_unlock(&dentry->d_lock);
510 }
511 EXPORT_SYMBOL(d_drop);
512
513 static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
514 {
515         struct dentry *next;
516         /*
517          * Inform d_walk() and shrink_dentry_list() that we are no longer
518          * attached to the dentry tree
519          */
520         dentry->d_flags |= DCACHE_DENTRY_KILLED;
521         if (unlikely(list_empty(&dentry->d_child)))
522                 return;
523         __list_del_entry(&dentry->d_child);
524         /*
525          * Cursors can move around the list of children.  While we'd been
526          * a normal list member, it didn't matter - ->d_child.next would've
527          * been updated.  However, from now on it won't be and for the
528          * things like d_walk() it might end up with a nasty surprise.
529          * Normally d_walk() doesn't care about cursors moving around -
530          * ->d_lock on parent prevents that and since a cursor has no children
531          * of its own, we get through it without ever unlocking the parent.
532          * There is one exception, though - if we ascend from a child that
533          * gets killed as soon as we unlock it, the next sibling is found
534          * using the value left in its ->d_child.next.  And if _that_
535          * pointed to a cursor, and cursor got moved (e.g. by lseek())
536          * before d_walk() regains parent->d_lock, we'll end up skipping
537          * everything the cursor had been moved past.
538          *
539          * Solution: make sure that the pointer left behind in ->d_child.next
540          * points to something that won't be moving around.  I.e. skip the
541          * cursors.
542          */
543         while (dentry->d_child.next != &parent->d_subdirs) {
544                 next = list_entry(dentry->d_child.next, struct dentry, d_child);
545                 if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
546                         break;
547                 dentry->d_child.next = next->d_child.next;
548         }
549 }
550
551 static void __dentry_kill(struct dentry *dentry)
552 {
553         struct dentry *parent = NULL;
554         bool can_free = true;
555         if (!IS_ROOT(dentry))
556                 parent = dentry->d_parent;
557
558         /*
559          * The dentry is now unrecoverably dead to the world.
560          */
561         lockref_mark_dead(&dentry->d_lockref);
562
563         /*
564          * inform the fs via d_prune that this dentry is about to be
565          * unhashed and destroyed.
566          */
567         if (dentry->d_flags & DCACHE_OP_PRUNE)
568                 dentry->d_op->d_prune(dentry);
569
570         if (dentry->d_flags & DCACHE_LRU_LIST) {
571                 if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
572                         d_lru_del(dentry);
573         }
574         /* if it was on the hash then remove it */
575         __d_drop(dentry);
576         dentry_unlist(dentry, parent);
577         if (parent)
578                 spin_unlock(&parent->d_lock);
579         if (dentry->d_inode)
580                 dentry_unlink_inode(dentry);
581         else
582                 spin_unlock(&dentry->d_lock);
583         this_cpu_dec(nr_dentry);
584         if (dentry->d_op && dentry->d_op->d_release)
585                 dentry->d_op->d_release(dentry);
586
587         spin_lock(&dentry->d_lock);
588         if (dentry->d_flags & DCACHE_SHRINK_LIST) {
589                 dentry->d_flags |= DCACHE_MAY_FREE;
590                 can_free = false;
591         }
592         spin_unlock(&dentry->d_lock);
593         if (likely(can_free))
594                 dentry_free(dentry);
595         cond_resched();
596 }
597
598 static struct dentry *__lock_parent(struct dentry *dentry)
599 {
600         struct dentry *parent;
601         rcu_read_lock();
602         spin_unlock(&dentry->d_lock);
603 again:
604         parent = READ_ONCE(dentry->d_parent);
605         spin_lock(&parent->d_lock);
606         /*
607          * We can't blindly lock dentry until we are sure
608          * that we won't violate the locking order.
609          * Any changes of dentry->d_parent must have
610          * been done with parent->d_lock held, so
611          * spin_lock() above is enough of a barrier
612          * for checking if it's still our child.
613          */
614         if (unlikely(parent != dentry->d_parent)) {
615                 spin_unlock(&parent->d_lock);
616                 goto again;
617         }
618         rcu_read_unlock();
619         if (parent != dentry)
620                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
621         else
622                 parent = NULL;
623         return parent;
624 }
625
626 static inline struct dentry *lock_parent(struct dentry *dentry)
627 {
628         struct dentry *parent = dentry->d_parent;
629         if (IS_ROOT(dentry))
630                 return NULL;
631         if (likely(spin_trylock(&parent->d_lock)))
632                 return parent;
633         return __lock_parent(dentry);
634 }
635
636 static inline bool retain_dentry(struct dentry *dentry)
637 {
638         WARN_ON(d_in_lookup(dentry));
639
640         /* Unreachable? Get rid of it */
641         if (unlikely(d_unhashed(dentry)))
642                 return false;
643
644         if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
645                 return false;
646
647         if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
648                 if (dentry->d_op->d_delete(dentry))
649                         return false;
650         }
651         /* retain; LRU fodder */
652         dentry->d_lockref.count--;
653         if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
654                 d_lru_add(dentry);
655         else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
656                 dentry->d_flags |= DCACHE_REFERENCED;
657         return true;
658 }
659
660 /*
661  * Finish off a dentry we've decided to kill.
662  * dentry->d_lock must be held, returns with it unlocked.
663  * Returns dentry requiring refcount drop, or NULL if we're done.
664  */
665 static struct dentry *dentry_kill(struct dentry *dentry)
666         __releases(dentry->d_lock)
667 {
668         struct inode *inode = dentry->d_inode;
669         struct dentry *parent = NULL;
670
671         if (inode && unlikely(!spin_trylock(&inode->i_lock)))
672                 goto slow_positive;
673
674         if (!IS_ROOT(dentry)) {
675                 parent = dentry->d_parent;
676                 if (unlikely(!spin_trylock(&parent->d_lock))) {
677                         parent = __lock_parent(dentry);
678                         if (likely(inode || !dentry->d_inode))
679                                 goto got_locks;
680                         /* negative that became positive */
681                         if (parent)
682                                 spin_unlock(&parent->d_lock);
683                         inode = dentry->d_inode;
684                         goto slow_positive;
685                 }
686         }
687         __dentry_kill(dentry);
688         return parent;
689
690 slow_positive:
691         spin_unlock(&dentry->d_lock);
692         spin_lock(&inode->i_lock);
693         spin_lock(&dentry->d_lock);
694         parent = lock_parent(dentry);
695 got_locks:
696         if (unlikely(dentry->d_lockref.count != 1)) {
697                 dentry->d_lockref.count--;
698         } else if (likely(!retain_dentry(dentry))) {
699                 __dentry_kill(dentry);
700                 return parent;
701         }
702         /* we are keeping it, after all */
703         if (inode)
704                 spin_unlock(&inode->i_lock);
705         if (parent)
706                 spin_unlock(&parent->d_lock);
707         spin_unlock(&dentry->d_lock);
708         return NULL;
709 }
710
711 /*
712  * Try to do a lockless dput(), and return whether that was successful.
713  *
714  * If unsuccessful, we return false, having already taken the dentry lock.
715  *
716  * The caller needs to hold the RCU read lock, so that the dentry is
717  * guaranteed to stay around even if the refcount goes down to zero!
718  */
719 static inline bool fast_dput(struct dentry *dentry)
720 {
721         int ret;
722         unsigned int d_flags;
723
724         /*
725          * If we have a d_op->d_delete() operation, we sould not
726          * let the dentry count go to zero, so use "put_or_lock".
727          */
728         if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
729                 return lockref_put_or_lock(&dentry->d_lockref);
730
731         /*
732          * .. otherwise, we can try to just decrement the
733          * lockref optimistically.
734          */
735         ret = lockref_put_return(&dentry->d_lockref);
736
737         /*
738          * If the lockref_put_return() failed due to the lock being held
739          * by somebody else, the fast path has failed. We will need to
740          * get the lock, and then check the count again.
741          */
742         if (unlikely(ret < 0)) {
743                 spin_lock(&dentry->d_lock);
744                 if (dentry->d_lockref.count > 1) {
745                         dentry->d_lockref.count--;
746                         spin_unlock(&dentry->d_lock);
747                         return true;
748                 }
749                 return false;
750         }
751
752         /*
753          * If we weren't the last ref, we're done.
754          */
755         if (ret)
756                 return true;
757
758         /*
759          * Careful, careful. The reference count went down
760          * to zero, but we don't hold the dentry lock, so
761          * somebody else could get it again, and do another
762          * dput(), and we need to not race with that.
763          *
764          * However, there is a very special and common case
765          * where we don't care, because there is nothing to
766          * do: the dentry is still hashed, it does not have
767          * a 'delete' op, and it's referenced and already on
768          * the LRU list.
769          *
770          * NOTE! Since we aren't locked, these values are
771          * not "stable". However, it is sufficient that at
772          * some point after we dropped the reference the
773          * dentry was hashed and the flags had the proper
774          * value. Other dentry users may have re-gotten
775          * a reference to the dentry and change that, but
776          * our work is done - we can leave the dentry
777          * around with a zero refcount.
778          */
779         smp_rmb();
780         d_flags = READ_ONCE(dentry->d_flags);
781         d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
782
783         /* Nothing to do? Dropping the reference was all we needed? */
784         if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
785                 return true;
786
787         /*
788          * Not the fast normal case? Get the lock. We've already decremented
789          * the refcount, but we'll need to re-check the situation after
790          * getting the lock.
791          */
792         spin_lock(&dentry->d_lock);
793
794         /*
795          * Did somebody else grab a reference to it in the meantime, and
796          * we're no longer the last user after all? Alternatively, somebody
797          * else could have killed it and marked it dead. Either way, we
798          * don't need to do anything else.
799          */
800         if (dentry->d_lockref.count) {
801                 spin_unlock(&dentry->d_lock);
802                 return true;
803         }
804
805         /*
806          * Re-get the reference we optimistically dropped. We hold the
807          * lock, and we just tested that it was zero, so we can just
808          * set it to 1.
809          */
810         dentry->d_lockref.count = 1;
811         return false;
812 }
813
814
815 /* 
816  * This is dput
817  *
818  * This is complicated by the fact that we do not want to put
819  * dentries that are no longer on any hash chain on the unused
820  * list: we'd much rather just get rid of them immediately.
821  *
822  * However, that implies that we have to traverse the dentry
823  * tree upwards to the parents which might _also_ now be
824  * scheduled for deletion (it may have been only waiting for
825  * its last child to go away).
826  *
827  * This tail recursion is done by hand as we don't want to depend
828  * on the compiler to always get this right (gcc generally doesn't).
829  * Real recursion would eat up our stack space.
830  */
831
832 /*
833  * dput - release a dentry
834  * @dentry: dentry to release 
835  *
836  * Release a dentry. This will drop the usage count and if appropriate
837  * call the dentry unlink method as well as removing it from the queues and
838  * releasing its resources. If the parent dentries were scheduled for release
839  * they too may now get deleted.
840  */
841 void dput(struct dentry *dentry)
842 {
843         while (dentry) {
844                 might_sleep();
845
846                 rcu_read_lock();
847                 if (likely(fast_dput(dentry))) {
848                         rcu_read_unlock();
849                         return;
850                 }
851
852                 /* Slow case: now with the dentry lock held */
853                 rcu_read_unlock();
854
855                 if (likely(retain_dentry(dentry))) {
856                         spin_unlock(&dentry->d_lock);
857                         return;
858                 }
859
860                 dentry = dentry_kill(dentry);
861         }
862 }
863 EXPORT_SYMBOL(dput);
864
865
866 /* This must be called with d_lock held */
867 static inline void __dget_dlock(struct dentry *dentry)
868 {
869         dentry->d_lockref.count++;
870 }
871
872 static inline void __dget(struct dentry *dentry)
873 {
874         lockref_get(&dentry->d_lockref);
875 }
876
877 struct dentry *dget_parent(struct dentry *dentry)
878 {
879         int gotref;
880         struct dentry *ret;
881
882         /*
883          * Do optimistic parent lookup without any
884          * locking.
885          */
886         rcu_read_lock();
887         ret = READ_ONCE(dentry->d_parent);
888         gotref = lockref_get_not_zero(&ret->d_lockref);
889         rcu_read_unlock();
890         if (likely(gotref)) {
891                 if (likely(ret == READ_ONCE(dentry->d_parent)))
892                         return ret;
893                 dput(ret);
894         }
895
896 repeat:
897         /*
898          * Don't need rcu_dereference because we re-check it was correct under
899          * the lock.
900          */
901         rcu_read_lock();
902         ret = dentry->d_parent;
903         spin_lock(&ret->d_lock);
904         if (unlikely(ret != dentry->d_parent)) {
905                 spin_unlock(&ret->d_lock);
906                 rcu_read_unlock();
907                 goto repeat;
908         }
909         rcu_read_unlock();
910         BUG_ON(!ret->d_lockref.count);
911         ret->d_lockref.count++;
912         spin_unlock(&ret->d_lock);
913         return ret;
914 }
915 EXPORT_SYMBOL(dget_parent);
916
917 static struct dentry * __d_find_any_alias(struct inode *inode)
918 {
919         struct dentry *alias;
920
921         if (hlist_empty(&inode->i_dentry))
922                 return NULL;
923         alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
924         __dget(alias);
925         return alias;
926 }
927
928 /**
929  * d_find_any_alias - find any alias for a given inode
930  * @inode: inode to find an alias for
931  *
932  * If any aliases exist for the given inode, take and return a
933  * reference for one of them.  If no aliases exist, return %NULL.
934  */
935 struct dentry *d_find_any_alias(struct inode *inode)
936 {
937         struct dentry *de;
938
939         spin_lock(&inode->i_lock);
940         de = __d_find_any_alias(inode);
941         spin_unlock(&inode->i_lock);
942         return de;
943 }
944 EXPORT_SYMBOL(d_find_any_alias);
945
946 /**
947  * d_find_alias - grab a hashed alias of inode
948  * @inode: inode in question
949  *
950  * If inode has a hashed alias, or is a directory and has any alias,
951  * acquire the reference to alias and return it. Otherwise return NULL.
952  * Notice that if inode is a directory there can be only one alias and
953  * it can be unhashed only if it has no children, or if it is the root
954  * of a filesystem, or if the directory was renamed and d_revalidate
955  * was the first vfs operation to notice.
956  *
957  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
958  * any other hashed alias over that one.
959  */
960 static struct dentry *__d_find_alias(struct inode *inode)
961 {
962         struct dentry *alias;
963
964         if (S_ISDIR(inode->i_mode))
965                 return __d_find_any_alias(inode);
966
967         hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
968                 spin_lock(&alias->d_lock);
969                 if (!d_unhashed(alias)) {
970                         __dget_dlock(alias);
971                         spin_unlock(&alias->d_lock);
972                         return alias;
973                 }
974                 spin_unlock(&alias->d_lock);
975         }
976         return NULL;
977 }
978
979 struct dentry *d_find_alias(struct inode *inode)
980 {
981         struct dentry *de = NULL;
982
983         if (!hlist_empty(&inode->i_dentry)) {
984                 spin_lock(&inode->i_lock);
985                 de = __d_find_alias(inode);
986                 spin_unlock(&inode->i_lock);
987         }
988         return de;
989 }
990 EXPORT_SYMBOL(d_find_alias);
991
992 /*
993  *      Try to kill dentries associated with this inode.
994  * WARNING: you must own a reference to inode.
995  */
996 void d_prune_aliases(struct inode *inode)
997 {
998         struct dentry *dentry;
999 restart:
1000         spin_lock(&inode->i_lock);
1001         hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
1002                 spin_lock(&dentry->d_lock);
1003                 if (!dentry->d_lockref.count) {
1004                         struct dentry *parent = lock_parent(dentry);
1005                         if (likely(!dentry->d_lockref.count)) {
1006                                 __dentry_kill(dentry);
1007                                 dput(parent);
1008                                 goto restart;
1009                         }
1010                         if (parent)
1011                                 spin_unlock(&parent->d_lock);
1012                 }
1013                 spin_unlock(&dentry->d_lock);
1014         }
1015         spin_unlock(&inode->i_lock);
1016 }
1017 EXPORT_SYMBOL(d_prune_aliases);
1018
1019 /*
1020  * Lock a dentry from shrink list.
1021  * Called under rcu_read_lock() and dentry->d_lock; the former
1022  * guarantees that nothing we access will be freed under us.
1023  * Note that dentry is *not* protected from concurrent dentry_kill(),
1024  * d_delete(), etc.
1025  *
1026  * Return false if dentry has been disrupted or grabbed, leaving
1027  * the caller to kick it off-list.  Otherwise, return true and have
1028  * that dentry's inode and parent both locked.
1029  */
1030 static bool shrink_lock_dentry(struct dentry *dentry)
1031 {
1032         struct inode *inode;
1033         struct dentry *parent;
1034
1035         if (dentry->d_lockref.count)
1036                 return false;
1037
1038         inode = dentry->d_inode;
1039         if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
1040                 spin_unlock(&dentry->d_lock);
1041                 spin_lock(&inode->i_lock);
1042                 spin_lock(&dentry->d_lock);
1043                 if (unlikely(dentry->d_lockref.count))
1044                         goto out;
1045                 /* changed inode means that somebody had grabbed it */
1046                 if (unlikely(inode != dentry->d_inode))
1047                         goto out;
1048         }
1049
1050         parent = dentry->d_parent;
1051         if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
1052                 return true;
1053
1054         spin_unlock(&dentry->d_lock);
1055         spin_lock(&parent->d_lock);
1056         if (unlikely(parent != dentry->d_parent)) {
1057                 spin_unlock(&parent->d_lock);
1058                 spin_lock(&dentry->d_lock);
1059                 goto out;
1060         }
1061         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1062         if (likely(!dentry->d_lockref.count))
1063                 return true;
1064         spin_unlock(&parent->d_lock);
1065 out:
1066         if (inode)
1067                 spin_unlock(&inode->i_lock);
1068         return false;
1069 }
1070
1071 static void shrink_dentry_list(struct list_head *list)
1072 {
1073         while (!list_empty(list)) {
1074                 struct dentry *dentry, *parent;
1075
1076                 dentry = list_entry(list->prev, struct dentry, d_lru);
1077                 spin_lock(&dentry->d_lock);
1078                 rcu_read_lock();
1079                 if (!shrink_lock_dentry(dentry)) {
1080                         bool can_free = false;
1081                         rcu_read_unlock();
1082                         d_shrink_del(dentry);
1083                         if (dentry->d_lockref.count < 0)
1084                                 can_free = dentry->d_flags & DCACHE_MAY_FREE;
1085                         spin_unlock(&dentry->d_lock);
1086                         if (can_free)
1087                                 dentry_free(dentry);
1088                         continue;
1089                 }
1090                 rcu_read_unlock();
1091                 d_shrink_del(dentry);
1092                 parent = dentry->d_parent;
1093                 __dentry_kill(dentry);
1094                 if (parent == dentry)
1095                         continue;
1096                 /*
1097                  * We need to prune ancestors too. This is necessary to prevent
1098                  * quadratic behavior of shrink_dcache_parent(), but is also
1099                  * expected to be beneficial in reducing dentry cache
1100                  * fragmentation.
1101                  */
1102                 dentry = parent;
1103                 while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
1104                         dentry = dentry_kill(dentry);
1105         }
1106 }
1107
1108 static enum lru_status dentry_lru_isolate(struct list_head *item,
1109                 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1110 {
1111         struct list_head *freeable = arg;
1112         struct dentry   *dentry = container_of(item, struct dentry, d_lru);
1113
1114
1115         /*
1116          * we are inverting the lru lock/dentry->d_lock here,
1117          * so use a trylock. If we fail to get the lock, just skip
1118          * it
1119          */
1120         if (!spin_trylock(&dentry->d_lock))
1121                 return LRU_SKIP;
1122
1123         /*
1124          * Referenced dentries are still in use. If they have active
1125          * counts, just remove them from the LRU. Otherwise give them
1126          * another pass through the LRU.
1127          */
1128         if (dentry->d_lockref.count) {
1129                 d_lru_isolate(lru, dentry);
1130                 spin_unlock(&dentry->d_lock);
1131                 return LRU_REMOVED;
1132         }
1133
1134         if (dentry->d_flags & DCACHE_REFERENCED) {
1135                 dentry->d_flags &= ~DCACHE_REFERENCED;
1136                 spin_unlock(&dentry->d_lock);
1137
1138                 /*
1139                  * The list move itself will be made by the common LRU code. At
1140                  * this point, we've dropped the dentry->d_lock but keep the
1141                  * lru lock. This is safe to do, since every list movement is
1142                  * protected by the lru lock even if both locks are held.
1143                  *
1144                  * This is guaranteed by the fact that all LRU management
1145                  * functions are intermediated by the LRU API calls like
1146                  * list_lru_add and list_lru_del. List movement in this file
1147                  * only ever occur through this functions or through callbacks
1148                  * like this one, that are called from the LRU API.
1149                  *
1150                  * The only exceptions to this are functions like
1151                  * shrink_dentry_list, and code that first checks for the
1152                  * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
1153                  * operating only with stack provided lists after they are
1154                  * properly isolated from the main list.  It is thus, always a
1155                  * local access.
1156                  */
1157                 return LRU_ROTATE;
1158         }
1159
1160         d_lru_shrink_move(lru, dentry, freeable);
1161         spin_unlock(&dentry->d_lock);
1162
1163         return LRU_REMOVED;
1164 }
1165
1166 /**
1167  * prune_dcache_sb - shrink the dcache
1168  * @sb: superblock
1169  * @sc: shrink control, passed to list_lru_shrink_walk()
1170  *
1171  * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
1172  * is done when we need more memory and called from the superblock shrinker
1173  * function.
1174  *
1175  * This function may fail to free any resources if all the dentries are in
1176  * use.
1177  */
1178 long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
1179 {
1180         LIST_HEAD(dispose);
1181         long freed;
1182
1183         freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
1184                                      dentry_lru_isolate, &dispose);
1185         shrink_dentry_list(&dispose);
1186         return freed;
1187 }
1188
1189 static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1190                 struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1191 {
1192         struct list_head *freeable = arg;
1193         struct dentry   *dentry = container_of(item, struct dentry, d_lru);
1194
1195         /*
1196          * we are inverting the lru lock/dentry->d_lock here,
1197          * so use a trylock. If we fail to get the lock, just skip
1198          * it
1199          */
1200         if (!spin_trylock(&dentry->d_lock))
1201                 return LRU_SKIP;
1202
1203         d_lru_shrink_move(lru, dentry, freeable);
1204         spin_unlock(&dentry->d_lock);
1205
1206         return LRU_REMOVED;
1207 }
1208
1209
1210 /**
1211  * shrink_dcache_sb - shrink dcache for a superblock
1212  * @sb: superblock
1213  *
1214  * Shrink the dcache for the specified super block. This is used to free
1215  * the dcache before unmounting a file system.
1216  */
1217 void shrink_dcache_sb(struct super_block *sb)
1218 {
1219         do {
1220                 LIST_HEAD(dispose);
1221
1222                 list_lru_walk(&sb->s_dentry_lru,
1223                         dentry_lru_isolate_shrink, &dispose, 1024);
1224                 shrink_dentry_list(&dispose);
1225         } while (list_lru_count(&sb->s_dentry_lru) > 0);
1226 }
1227 EXPORT_SYMBOL(shrink_dcache_sb);
1228
1229 /**
1230  * enum d_walk_ret - action to talke during tree walk
1231  * @D_WALK_CONTINUE:    contrinue walk
1232  * @D_WALK_QUIT:        quit walk
1233  * @D_WALK_NORETRY:     quit when retry is needed
1234  * @D_WALK_SKIP:        skip this dentry and its children
1235  */
1236 enum d_walk_ret {
1237         D_WALK_CONTINUE,
1238         D_WALK_QUIT,
1239         D_WALK_NORETRY,
1240         D_WALK_SKIP,
1241 };
1242
1243 /**
1244  * d_walk - walk the dentry tree
1245  * @parent:     start of walk
1246  * @data:       data passed to @enter() and @finish()
1247  * @enter:      callback when first entering the dentry
1248  *
1249  * The @enter() callbacks are called with d_lock held.
1250  */
1251 static void d_walk(struct dentry *parent, void *data,
1252                    enum d_walk_ret (*enter)(void *, struct dentry *))
1253 {
1254         struct dentry *this_parent;
1255         struct list_head *next;
1256         unsigned seq = 0;
1257         enum d_walk_ret ret;
1258         bool retry = true;
1259
1260 again:
1261         read_seqbegin_or_lock(&rename_lock, &seq);
1262         this_parent = parent;
1263         spin_lock(&this_parent->d_lock);
1264
1265         ret = enter(data, this_parent);
1266         switch (ret) {
1267         case D_WALK_CONTINUE:
1268                 break;
1269         case D_WALK_QUIT:
1270         case D_WALK_SKIP:
1271                 goto out_unlock;
1272         case D_WALK_NORETRY:
1273                 retry = false;
1274                 break;
1275         }
1276 repeat:
1277         next = this_parent->d_subdirs.next;
1278 resume:
1279         while (next != &this_parent->d_subdirs) {
1280                 struct list_head *tmp = next;
1281                 struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1282                 next = tmp->next;
1283
1284                 if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1285                         continue;
1286
1287                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1288
1289                 ret = enter(data, dentry);
1290                 switch (ret) {
1291                 case D_WALK_CONTINUE:
1292                         break;
1293                 case D_WALK_QUIT:
1294                         spin_unlock(&dentry->d_lock);
1295                         goto out_unlock;
1296                 case D_WALK_NORETRY:
1297                         retry = false;
1298                         break;
1299                 case D_WALK_SKIP:
1300                         spin_unlock(&dentry->d_lock);
1301                         continue;
1302                 }
1303
1304                 if (!list_empty(&dentry->d_subdirs)) {
1305                         spin_unlock(&this_parent->d_lock);
1306                         spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1307                         this_parent = dentry;
1308                         spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1309                         goto repeat;
1310                 }
1311                 spin_unlock(&dentry->d_lock);
1312         }
1313         /*
1314          * All done at this level ... ascend and resume the search.
1315          */
1316         rcu_read_lock();
1317 ascend:
1318         if (this_parent != parent) {
1319                 struct dentry *child = this_parent;
1320                 this_parent = child->d_parent;
1321
1322                 spin_unlock(&child->d_lock);
1323                 spin_lock(&this_parent->d_lock);
1324
1325                 /* might go back up the wrong parent if we have had a rename. */
1326                 if (need_seqretry(&rename_lock, seq))
1327                         goto rename_retry;
1328                 /* go into the first sibling still alive */
1329                 do {
1330                         next = child->d_child.next;
1331                         if (next == &this_parent->d_subdirs)
1332                                 goto ascend;
1333                         child = list_entry(next, struct dentry, d_child);
1334                 } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
1335                 rcu_read_unlock();
1336                 goto resume;
1337         }
1338         if (need_seqretry(&rename_lock, seq))
1339                 goto rename_retry;
1340         rcu_read_unlock();
1341
1342 out_unlock:
1343         spin_unlock(&this_parent->d_lock);
1344         done_seqretry(&rename_lock, seq);
1345         return;
1346
1347 rename_retry:
1348         spin_unlock(&this_parent->d_lock);
1349         rcu_read_unlock();
1350         BUG_ON(seq & 1);
1351         if (!retry)
1352                 return;
1353         seq = 1;
1354         goto again;
1355 }
1356
1357 struct check_mount {
1358         struct vfsmount *mnt;
1359         unsigned int mounted;
1360 };
1361
1362 static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
1363 {
1364         struct check_mount *info = data;
1365         struct path path = { .mnt = info->mnt, .dentry = dentry };
1366
1367         if (likely(!d_mountpoint(dentry)))
1368                 return D_WALK_CONTINUE;
1369         if (__path_is_mountpoint(&path)) {
1370                 info->mounted = 1;
1371                 return D_WALK_QUIT;
1372         }
1373         return D_WALK_CONTINUE;
1374 }
1375
1376 /**
1377  * path_has_submounts - check for mounts over a dentry in the
1378  *                      current namespace.
1379  * @parent: path to check.
1380  *
1381  * Return true if the parent or its subdirectories contain
1382  * a mount point in the current namespace.
1383  */
1384 int path_has_submounts(const struct path *parent)
1385 {
1386         struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
1387
1388         read_seqlock_excl(&mount_lock);
1389         d_walk(parent->dentry, &data, path_check_mount);
1390         read_sequnlock_excl(&mount_lock);
1391
1392         return data.mounted;
1393 }
1394 EXPORT_SYMBOL(path_has_submounts);
1395
1396 /*
1397  * Called by mount code to set a mountpoint and check if the mountpoint is
1398  * reachable (e.g. NFS can unhash a directory dentry and then the complete
1399  * subtree can become unreachable).
1400  *
1401  * Only one of d_invalidate() and d_set_mounted() must succeed.  For
1402  * this reason take rename_lock and d_lock on dentry and ancestors.
1403  */
1404 int d_set_mounted(struct dentry *dentry)
1405 {
1406         struct dentry *p;
1407         int ret = -ENOENT;
1408         write_seqlock(&rename_lock);
1409         for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1410                 /* Need exclusion wrt. d_invalidate() */
1411                 spin_lock(&p->d_lock);
1412                 if (unlikely(d_unhashed(p))) {
1413                         spin_unlock(&p->d_lock);
1414                         goto out;
1415                 }
1416                 spin_unlock(&p->d_lock);
1417         }
1418         spin_lock(&dentry->d_lock);
1419         if (!d_unlinked(dentry)) {
1420                 ret = -EBUSY;
1421                 if (!d_mountpoint(dentry)) {
1422                         dentry->d_flags |= DCACHE_MOUNTED;
1423                         ret = 0;
1424                 }
1425         }
1426         spin_unlock(&dentry->d_lock);
1427 out:
1428         write_sequnlock(&rename_lock);
1429         return ret;
1430 }
1431
1432 /*
1433  * Search the dentry child list of the specified parent,
1434  * and move any unused dentries to the end of the unused
1435  * list for prune_dcache(). We descend to the next level
1436  * whenever the d_subdirs list is non-empty and continue
1437  * searching.
1438  *
1439  * It returns zero iff there are no unused children,
1440  * otherwise  it returns the number of children moved to
1441  * the end of the unused list. This may not be the total
1442  * number of unused children, because select_parent can
1443  * drop the lock and return early due to latency
1444  * constraints.
1445  */
1446
1447 struct select_data {
1448         struct dentry *start;
1449         struct list_head dispose;
1450         int found;
1451 };
1452
1453 static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1454 {
1455         struct select_data *data = _data;
1456         enum d_walk_ret ret = D_WALK_CONTINUE;
1457
1458         if (data->start == dentry)
1459                 goto out;
1460
1461         if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1462                 data->found++;
1463         } else {
1464                 if (dentry->d_flags & DCACHE_LRU_LIST)
1465                         d_lru_del(dentry);
1466                 if (!dentry->d_lockref.count) {
1467                         d_shrink_add(dentry, &data->dispose);
1468                         data->found++;
1469                 }
1470         }
1471         /*
1472          * We can return to the caller if we have found some (this
1473          * ensures forward progress). We'll be coming back to find
1474          * the rest.
1475          */
1476         if (!list_empty(&data->dispose))
1477                 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1478 out:
1479         return ret;
1480 }
1481
1482 /**
1483  * shrink_dcache_parent - prune dcache
1484  * @parent: parent of entries to prune
1485  *
1486  * Prune the dcache to remove unused children of the parent dentry.
1487  */
1488 void shrink_dcache_parent(struct dentry *parent)
1489 {
1490         for (;;) {
1491                 struct select_data data;
1492
1493                 INIT_LIST_HEAD(&data.dispose);
1494                 data.start = parent;
1495                 data.found = 0;
1496
1497                 d_walk(parent, &data, select_collect);
1498
1499                 if (!list_empty(&data.dispose)) {
1500                         shrink_dentry_list(&data.dispose);
1501                         continue;
1502                 }
1503
1504                 cond_resched();
1505                 if (!data.found)
1506                         break;
1507         }
1508 }
1509 EXPORT_SYMBOL(shrink_dcache_parent);
1510
1511 static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1512 {
1513         /* it has busy descendents; complain about those instead */
1514         if (!list_empty(&dentry->d_subdirs))
1515                 return D_WALK_CONTINUE;
1516
1517         /* root with refcount 1 is fine */
1518         if (dentry == _data && dentry->d_lockref.count == 1)
1519                 return D_WALK_CONTINUE;
1520
1521         printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
1522                         " still in use (%d) [unmount of %s %s]\n",
1523                        dentry,
1524                        dentry->d_inode ?
1525                        dentry->d_inode->i_ino : 0UL,
1526                        dentry,
1527                        dentry->d_lockref.count,
1528                        dentry->d_sb->s_type->name,
1529                        dentry->d_sb->s_id);
1530         WARN_ON(1);
1531         return D_WALK_CONTINUE;
1532 }
1533
1534 static void do_one_tree(struct dentry *dentry)
1535 {
1536         shrink_dcache_parent(dentry);
1537         d_walk(dentry, dentry, umount_check);
1538         d_drop(dentry);
1539         dput(dentry);
1540 }
1541
1542 /*
1543  * destroy the dentries attached to a superblock on unmounting
1544  */
1545 void shrink_dcache_for_umount(struct super_block *sb)
1546 {
1547         struct dentry *dentry;
1548
1549         WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1550
1551         dentry = sb->s_root;
1552         sb->s_root = NULL;
1553         do_one_tree(dentry);
1554
1555         while (!hlist_bl_empty(&sb->s_roots)) {
1556                 dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_roots), struct dentry, d_hash));
1557                 do_one_tree(dentry);
1558         }
1559 }
1560
1561 static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
1562 {
1563         struct dentry **victim = _data;
1564         if (d_mountpoint(dentry)) {
1565                 __dget_dlock(dentry);
1566                 *victim = dentry;
1567                 return D_WALK_QUIT;
1568         }
1569         return D_WALK_CONTINUE;
1570 }
1571
1572 /**
1573  * d_invalidate - detach submounts, prune dcache, and drop
1574  * @dentry: dentry to invalidate (aka detach, prune and drop)
1575  */
1576 void d_invalidate(struct dentry *dentry)
1577 {
1578         bool had_submounts = false;
1579         spin_lock(&dentry->d_lock);
1580         if (d_unhashed(dentry)) {
1581                 spin_unlock(&dentry->d_lock);
1582                 return;
1583         }
1584         __d_drop(dentry);
1585         spin_unlock(&dentry->d_lock);
1586
1587         /* Negative dentries can be dropped without further checks */
1588         if (!dentry->d_inode)
1589                 return;
1590
1591         shrink_dcache_parent(dentry);
1592         for (;;) {
1593                 struct dentry *victim = NULL;
1594                 d_walk(dentry, &victim, find_submount);
1595                 if (!victim) {
1596                         if (had_submounts)
1597                                 shrink_dcache_parent(dentry);
1598                         return;
1599                 }
1600                 had_submounts = true;
1601                 detach_mounts(victim);
1602                 dput(victim);
1603         }
1604 }
1605 EXPORT_SYMBOL(d_invalidate);
1606
1607 /**
1608  * __d_alloc    -       allocate a dcache entry
1609  * @sb: filesystem it will belong to
1610  * @name: qstr of the name
1611  *
1612  * Allocates a dentry. It returns %NULL if there is insufficient memory
1613  * available. On a success the dentry is returned. The name passed in is
1614  * copied and the copy passed in may be reused after this call.
1615  */
1616  
1617 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1618 {
1619         struct dentry *dentry;
1620         char *dname;
1621         int err;
1622
1623         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1624         if (!dentry)
1625                 return NULL;
1626
1627         /*
1628          * We guarantee that the inline name is always NUL-terminated.
1629          * This way the memcpy() done by the name switching in rename
1630          * will still always have a NUL at the end, even if we might
1631          * be overwriting an internal NUL character
1632          */
1633         dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1634         if (unlikely(!name)) {
1635                 name = &slash_name;
1636                 dname = dentry->d_iname;
1637         } else if (name->len > DNAME_INLINE_LEN-1) {
1638                 size_t size = offsetof(struct external_name, name[1]);
1639                 struct external_name *p = kmalloc(size + name->len,
1640                                                   GFP_KERNEL_ACCOUNT |
1641                                                   __GFP_RECLAIMABLE);
1642                 if (!p) {
1643                         kmem_cache_free(dentry_cache, dentry); 
1644                         return NULL;
1645                 }
1646                 atomic_set(&p->u.count, 1);
1647                 dname = p->name;
1648         } else  {
1649                 dname = dentry->d_iname;
1650         }       
1651
1652         dentry->d_name.len = name->len;
1653         dentry->d_name.hash = name->hash;
1654         memcpy(dname, name->name, name->len);
1655         dname[name->len] = 0;
1656
1657         /* Make sure we always see the terminating NUL character */
1658         smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
1659
1660         dentry->d_lockref.count = 1;
1661         dentry->d_flags = 0;
1662         spin_lock_init(&dentry->d_lock);
1663         seqcount_init(&dentry->d_seq);
1664         dentry->d_inode = NULL;
1665         dentry->d_parent = dentry;
1666         dentry->d_sb = sb;
1667         dentry->d_op = NULL;
1668         dentry->d_fsdata = NULL;
1669         INIT_HLIST_BL_NODE(&dentry->d_hash);
1670         INIT_LIST_HEAD(&dentry->d_lru);
1671         INIT_LIST_HEAD(&dentry->d_subdirs);
1672         INIT_HLIST_NODE(&dentry->d_u.d_alias);
1673         INIT_LIST_HEAD(&dentry->d_child);
1674         d_set_d_op(dentry, dentry->d_sb->s_d_op);
1675
1676         if (dentry->d_op && dentry->d_op->d_init) {
1677                 err = dentry->d_op->d_init(dentry);
1678                 if (err) {
1679                         if (dname_external(dentry))
1680                                 kfree(external_name(dentry));
1681                         kmem_cache_free(dentry_cache, dentry);
1682                         return NULL;
1683                 }
1684         }
1685
1686         this_cpu_inc(nr_dentry);
1687
1688         return dentry;
1689 }
1690
1691 /**
1692  * d_alloc      -       allocate a dcache entry
1693  * @parent: parent of entry to allocate
1694  * @name: qstr of the name
1695  *
1696  * Allocates a dentry. It returns %NULL if there is insufficient memory
1697  * available. On a success the dentry is returned. The name passed in is
1698  * copied and the copy passed in may be reused after this call.
1699  */
1700 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1701 {
1702         struct dentry *dentry = __d_alloc(parent->d_sb, name);
1703         if (!dentry)
1704                 return NULL;
1705         dentry->d_flags |= DCACHE_RCUACCESS;
1706         spin_lock(&parent->d_lock);
1707         /*
1708          * don't need child lock because it is not subject
1709          * to concurrency here
1710          */
1711         __dget_dlock(parent);
1712         dentry->d_parent = parent;
1713         list_add(&dentry->d_child, &parent->d_subdirs);
1714         spin_unlock(&parent->d_lock);
1715
1716         return dentry;
1717 }
1718 EXPORT_SYMBOL(d_alloc);
1719
1720 struct dentry *d_alloc_anon(struct super_block *sb)
1721 {
1722         return __d_alloc(sb, NULL);
1723 }
1724 EXPORT_SYMBOL(d_alloc_anon);
1725
1726 struct dentry *d_alloc_cursor(struct dentry * parent)
1727 {
1728         struct dentry *dentry = d_alloc_anon(parent->d_sb);
1729         if (dentry) {
1730                 dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
1731                 dentry->d_parent = dget(parent);
1732         }
1733         return dentry;
1734 }
1735
1736 /**
1737  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1738  * @sb: the superblock
1739  * @name: qstr of the name
1740  *
1741  * For a filesystem that just pins its dentries in memory and never
1742  * performs lookups at all, return an unhashed IS_ROOT dentry.
1743  */
1744 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1745 {
1746         return __d_alloc(sb, name);
1747 }
1748 EXPORT_SYMBOL(d_alloc_pseudo);
1749
1750 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1751 {
1752         struct qstr q;
1753
1754         q.name = name;
1755         q.hash_len = hashlen_string(parent, name);
1756         return d_alloc(parent, &q);
1757 }
1758 EXPORT_SYMBOL(d_alloc_name);
1759
1760 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1761 {
1762         WARN_ON_ONCE(dentry->d_op);
1763         WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1764                                 DCACHE_OP_COMPARE       |
1765                                 DCACHE_OP_REVALIDATE    |
1766                                 DCACHE_OP_WEAK_REVALIDATE       |
1767                                 DCACHE_OP_DELETE        |
1768                                 DCACHE_OP_REAL));
1769         dentry->d_op = op;
1770         if (!op)
1771                 return;
1772         if (op->d_hash)
1773                 dentry->d_flags |= DCACHE_OP_HASH;
1774         if (op->d_compare)
1775                 dentry->d_flags |= DCACHE_OP_COMPARE;
1776         if (op->d_revalidate)
1777                 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1778         if (op->d_weak_revalidate)
1779                 dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1780         if (op->d_delete)
1781                 dentry->d_flags |= DCACHE_OP_DELETE;
1782         if (op->d_prune)
1783                 dentry->d_flags |= DCACHE_OP_PRUNE;
1784         if (op->d_real)
1785                 dentry->d_flags |= DCACHE_OP_REAL;
1786
1787 }
1788 EXPORT_SYMBOL(d_set_d_op);
1789
1790
1791 /*
1792  * d_set_fallthru - Mark a dentry as falling through to a lower layer
1793  * @dentry - The dentry to mark
1794  *
1795  * Mark a dentry as falling through to the lower layer (as set with
1796  * d_pin_lower()).  This flag may be recorded on the medium.
1797  */
1798 void d_set_fallthru(struct dentry *dentry)
1799 {
1800         spin_lock(&dentry->d_lock);
1801         dentry->d_flags |= DCACHE_FALLTHRU;
1802         spin_unlock(&dentry->d_lock);
1803 }
1804 EXPORT_SYMBOL(d_set_fallthru);
1805
1806 static unsigned d_flags_for_inode(struct inode *inode)
1807 {
1808         unsigned add_flags = DCACHE_REGULAR_TYPE;
1809
1810         if (!inode)
1811                 return DCACHE_MISS_TYPE;
1812
1813         if (S_ISDIR(inode->i_mode)) {
1814                 add_flags = DCACHE_DIRECTORY_TYPE;
1815                 if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1816                         if (unlikely(!inode->i_op->lookup))
1817                                 add_flags = DCACHE_AUTODIR_TYPE;
1818                         else
1819                                 inode->i_opflags |= IOP_LOOKUP;
1820                 }
1821                 goto type_determined;
1822         }
1823
1824         if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1825                 if (unlikely(inode->i_op->get_link)) {
1826                         add_flags = DCACHE_SYMLINK_TYPE;
1827                         goto type_determined;
1828                 }
1829                 inode->i_opflags |= IOP_NOFOLLOW;
1830         }
1831
1832         if (unlikely(!S_ISREG(inode->i_mode)))
1833                 add_flags = DCACHE_SPECIAL_TYPE;
1834
1835 type_determined:
1836         if (unlikely(IS_AUTOMOUNT(inode)))
1837                 add_flags |= DCACHE_NEED_AUTOMOUNT;
1838         return add_flags;
1839 }
1840
1841 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1842 {
1843         unsigned add_flags = d_flags_for_inode(inode);
1844         WARN_ON(d_in_lookup(dentry));
1845
1846         spin_lock(&dentry->d_lock);
1847         /*
1848          * Decrement negative dentry count if it was in the LRU list.
1849          */
1850         if (dentry->d_flags & DCACHE_LRU_LIST)
1851                 this_cpu_dec(nr_dentry_negative);
1852         hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1853         raw_write_seqcount_begin(&dentry->d_seq);
1854         __d_set_inode_and_type(dentry, inode, add_flags);
1855         raw_write_seqcount_end(&dentry->d_seq);
1856         fsnotify_update_flags(dentry);
1857         spin_unlock(&dentry->d_lock);
1858 }
1859
1860 /**
1861  * d_instantiate - fill in inode information for a dentry
1862  * @entry: dentry to complete
1863  * @inode: inode to attach to this dentry
1864  *
1865  * Fill in inode information in the entry.
1866  *
1867  * This turns negative dentries into productive full members
1868  * of society.
1869  *
1870  * NOTE! This assumes that the inode count has been incremented
1871  * (or otherwise set) by the caller to indicate that it is now
1872  * in use by the dcache.
1873  */
1874  
1875 void d_instantiate(struct dentry *entry, struct inode * inode)
1876 {
1877         BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1878         if (inode) {
1879                 security_d_instantiate(entry, inode);
1880                 spin_lock(&inode->i_lock);
1881                 __d_instantiate(entry, inode);
1882                 spin_unlock(&inode->i_lock);
1883         }
1884 }
1885 EXPORT_SYMBOL(d_instantiate);
1886
1887 /*
1888  * This should be equivalent to d_instantiate() + unlock_new_inode(),
1889  * with lockdep-related part of unlock_new_inode() done before
1890  * anything else.  Use that instead of open-coding d_instantiate()/
1891  * unlock_new_inode() combinations.
1892  */
1893 void d_instantiate_new(struct dentry *entry, struct inode *inode)
1894 {
1895         BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1896         BUG_ON(!inode);
1897         lockdep_annotate_inode_mutex_key(inode);
1898         security_d_instantiate(entry, inode);
1899         spin_lock(&inode->i_lock);
1900         __d_instantiate(entry, inode);
1901         WARN_ON(!(inode->i_state & I_NEW));
1902         inode->i_state &= ~I_NEW & ~I_CREATING;
1903         smp_mb();
1904         wake_up_bit(&inode->i_state, __I_NEW);
1905         spin_unlock(&inode->i_lock);
1906 }
1907 EXPORT_SYMBOL(d_instantiate_new);
1908
1909 struct dentry *d_make_root(struct inode *root_inode)
1910 {
1911         struct dentry *res = NULL;
1912
1913         if (root_inode) {
1914                 res = d_alloc_anon(root_inode->i_sb);
1915                 if (res) {
1916                         res->d_flags |= DCACHE_RCUACCESS;
1917                         d_instantiate(res, root_inode);
1918                 } else {
1919                         iput(root_inode);
1920                 }
1921         }
1922         return res;
1923 }
1924 EXPORT_SYMBOL(d_make_root);
1925
1926 static struct dentry *__d_instantiate_anon(struct dentry *dentry,
1927                                            struct inode *inode,
1928                                            bool disconnected)
1929 {
1930         struct dentry *res;
1931         unsigned add_flags;
1932
1933         security_d_instantiate(dentry, inode);
1934         spin_lock(&inode->i_lock);
1935         res = __d_find_any_alias(inode);
1936         if (res) {
1937                 spin_unlock(&inode->i_lock);
1938                 dput(dentry);
1939                 goto out_iput;
1940         }
1941
1942         /* attach a disconnected dentry */
1943         add_flags = d_flags_for_inode(inode);
1944
1945         if (disconnected)
1946                 add_flags |= DCACHE_DISCONNECTED;
1947
1948         spin_lock(&dentry->d_lock);
1949         __d_set_inode_and_type(dentry, inode, add_flags);
1950         hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1951         if (!disconnected) {
1952                 hlist_bl_lock(&dentry->d_sb->s_roots);
1953                 hlist_bl_add_head(&dentry->d_hash, &dentry->d_sb->s_roots);
1954                 hlist_bl_unlock(&dentry->d_sb->s_roots);
1955         }
1956         spin_unlock(&dentry->d_lock);
1957         spin_unlock(&inode->i_lock);
1958
1959         return dentry;
1960
1961  out_iput:
1962         iput(inode);
1963         return res;
1964 }
1965
1966 struct dentry *d_instantiate_anon(struct dentry *dentry, struct inode *inode)
1967 {
1968         return __d_instantiate_anon(dentry, inode, true);
1969 }
1970 EXPORT_SYMBOL(d_instantiate_anon);
1971
1972 static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected)
1973 {
1974         struct dentry *tmp;
1975         struct dentry *res;
1976
1977         if (!inode)
1978                 return ERR_PTR(-ESTALE);
1979         if (IS_ERR(inode))
1980                 return ERR_CAST(inode);
1981
1982         res = d_find_any_alias(inode);
1983         if (res)
1984                 goto out_iput;
1985
1986         tmp = d_alloc_anon(inode->i_sb);
1987         if (!tmp) {
1988                 res = ERR_PTR(-ENOMEM);
1989                 goto out_iput;
1990         }
1991
1992         return __d_instantiate_anon(tmp, inode, disconnected);
1993
1994 out_iput:
1995         iput(inode);
1996         return res;
1997 }
1998
1999 /**
2000  * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
2001  * @inode: inode to allocate the dentry for
2002  *
2003  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
2004  * similar open by handle operations.  The returned dentry may be anonymous,
2005  * or may have a full name (if the inode was already in the cache).
2006  *
2007  * When called on a directory inode, we must ensure that the inode only ever
2008  * has one dentry.  If a dentry is found, that is returned instead of
2009  * allocating a new one.
2010  *
2011  * On successful return, the reference to the inode has been transferred
2012  * to the dentry.  In case of an error the reference on the inode is released.
2013  * To make it easier to use in export operations a %NULL or IS_ERR inode may
2014  * be passed in and the error will be propagated to the return value,
2015  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
2016  */
2017 struct dentry *d_obtain_alias(struct inode *inode)
2018 {
2019         return __d_obtain_alias(inode, true);
2020 }
2021 EXPORT_SYMBOL(d_obtain_alias);
2022
2023 /**
2024  * d_obtain_root - find or allocate a dentry for a given inode
2025  * @inode: inode to allocate the dentry for
2026  *
2027  * Obtain an IS_ROOT dentry for the root of a filesystem.
2028  *
2029  * We must ensure that directory inodes only ever have one dentry.  If a
2030  * dentry is found, that is returned instead of allocating a new one.
2031  *
2032  * On successful return, the reference to the inode has been transferred
2033  * to the dentry.  In case of an error the reference on the inode is
2034  * released.  A %NULL or IS_ERR inode may be passed in and will be the
2035  * error will be propagate to the return value, with a %NULL @inode
2036  * replaced by ERR_PTR(-ESTALE).
2037  */
2038 struct dentry *d_obtain_root(struct inode *inode)
2039 {
2040         return __d_obtain_alias(inode, false);
2041 }
2042 EXPORT_SYMBOL(d_obtain_root);
2043
2044 /**
2045  * d_add_ci - lookup or allocate new dentry with case-exact name
2046  * @inode:  the inode case-insensitive lookup has found
2047  * @dentry: the negative dentry that was passed to the parent's lookup func
2048  * @name:   the case-exact name to be associated with the returned dentry
2049  *
2050  * This is to avoid filling the dcache with case-insensitive names to the
2051  * same inode, only the actual correct case is stored in the dcache for
2052  * case-insensitive filesystems.
2053  *
2054  * For a case-insensitive lookup match and if the the case-exact dentry
2055  * already exists in in the dcache, use it and return it.
2056  *
2057  * If no entry exists with the exact case name, allocate new dentry with
2058  * the exact case, and return the spliced entry.
2059  */
2060 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
2061                         struct qstr *name)
2062 {
2063         struct dentry *found, *res;
2064
2065         /*
2066          * First check if a dentry matching the name already exists,
2067          * if not go ahead and create it now.
2068          */
2069         found = d_hash_and_lookup(dentry->d_parent, name);
2070         if (found) {
2071                 iput(inode);
2072                 return found;
2073         }
2074         if (d_in_lookup(dentry)) {
2075                 found = d_alloc_parallel(dentry->d_parent, name,
2076                                         dentry->d_wait);
2077                 if (IS_ERR(found) || !d_in_lookup(found)) {
2078                         iput(inode);
2079                         return found;
2080                 }
2081         } else {
2082                 found = d_alloc(dentry->d_parent, name);
2083                 if (!found) {
2084                         iput(inode);
2085                         return ERR_PTR(-ENOMEM);
2086                 } 
2087         }
2088         res = d_splice_alias(inode, found);
2089         if (res) {
2090                 dput(found);
2091                 return res;
2092         }
2093         return found;
2094 }
2095 EXPORT_SYMBOL(d_add_ci);
2096
2097
2098 static inline bool d_same_name(const struct dentry *dentry,
2099                                 const struct dentry *parent,
2100                                 const struct qstr *name)
2101 {
2102         if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
2103                 if (dentry->d_name.len != name->len)
2104                         return false;
2105                 return dentry_cmp(dentry, name->name, name->len) == 0;
2106         }
2107         return parent->d_op->d_compare(dentry,
2108                                        dentry->d_name.len, dentry->d_name.name,
2109                                        name) == 0;
2110 }
2111
2112 /**
2113  * __d_lookup_rcu - search for a dentry (racy, store-free)
2114  * @parent: parent dentry
2115  * @name: qstr of name we wish to find
2116  * @seqp: returns d_seq value at the point where the dentry was found
2117  * Returns: dentry, or NULL
2118  *
2119  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
2120  * resolution (store-free path walking) design described in
2121  * Documentation/filesystems/path-lookup.txt.
2122  *
2123  * This is not to be used outside core vfs.
2124  *
2125  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
2126  * held, and rcu_read_lock held. The returned dentry must not be stored into
2127  * without taking d_lock and checking d_seq sequence count against @seq
2128  * returned here.
2129  *
2130  * A refcount may be taken on the found dentry with the d_rcu_to_refcount
2131  * function.
2132  *
2133  * Alternatively, __d_lookup_rcu may be called again to look up the child of
2134  * the returned dentry, so long as its parent's seqlock is checked after the
2135  * child is looked up. Thus, an interlocking stepping of sequence lock checks
2136  * is formed, giving integrity down the path walk.
2137  *
2138  * NOTE! The caller *has* to check the resulting dentry against the sequence
2139  * number we've returned before using any of the resulting dentry state!
2140  */
2141 struct dentry *__d_lookup_rcu(const struct dentry *parent,
2142                                 const struct qstr *name,
2143                                 unsigned *seqp)
2144 {
2145         u64 hashlen = name->hash_len;
2146         const unsigned char *str = name->name;
2147         struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2148         struct hlist_bl_node *node;
2149         struct dentry *dentry;
2150
2151         /*
2152          * Note: There is significant duplication with __d_lookup_rcu which is
2153          * required to prevent single threaded performance regressions
2154          * especially on architectures where smp_rmb (in seqcounts) are costly.
2155          * Keep the two functions in sync.
2156          */
2157
2158         /*
2159          * The hash list is protected using RCU.
2160          *
2161          * Carefully use d_seq when comparing a candidate dentry, to avoid
2162          * races with d_move().
2163          *
2164          * It is possible that concurrent renames can mess up our list
2165          * walk here and result in missing our dentry, resulting in the
2166          * false-negative result. d_lookup() protects against concurrent
2167          * renames using rename_lock seqlock.
2168          *
2169          * See Documentation/filesystems/path-lookup.txt for more details.
2170          */
2171         hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2172                 unsigned seq;
2173
2174 seqretry:
2175                 /*
2176                  * The dentry sequence count protects us from concurrent
2177                  * renames, and thus protects parent and name fields.
2178                  *
2179                  * The caller must perform a seqcount check in order
2180                  * to do anything useful with the returned dentry.
2181                  *
2182                  * NOTE! We do a "raw" seqcount_begin here. That means that
2183                  * we don't wait for the sequence count to stabilize if it
2184                  * is in the middle of a sequence change. If we do the slow
2185                  * dentry compare, we will do seqretries until it is stable,
2186                  * and if we end up with a successful lookup, we actually
2187                  * want to exit RCU lookup anyway.
2188                  *
2189                  * Note that raw_seqcount_begin still *does* smp_rmb(), so
2190                  * we are still guaranteed NUL-termination of ->d_name.name.
2191                  */
2192                 seq = raw_seqcount_begin(&dentry->d_seq);
2193                 if (dentry->d_parent != parent)
2194                         continue;
2195                 if (d_unhashed(dentry))
2196                         continue;
2197
2198                 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
2199                         int tlen;
2200                         const char *tname;
2201                         if (dentry->d_name.hash != hashlen_hash(hashlen))
2202                                 continue;
2203                         tlen = dentry->d_name.len;
2204                         tname = dentry->d_name.name;
2205                         /* we want a consistent (name,len) pair */
2206                         if (read_seqcount_retry(&dentry->d_seq, seq)) {
2207                                 cpu_relax();
2208                                 goto seqretry;
2209                         }
2210                         if (parent->d_op->d_compare(dentry,
2211                                                     tlen, tname, name) != 0)
2212                                 continue;
2213                 } else {
2214                         if (dentry->d_name.hash_len != hashlen)
2215                                 continue;
2216                         if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
2217                                 continue;
2218                 }
2219                 *seqp = seq;
2220                 return dentry;
2221         }
2222         return NULL;
2223 }
2224
2225 /**
2226  * d_lookup - search for a dentry
2227  * @parent: parent dentry
2228  * @name: qstr of name we wish to find
2229  * Returns: dentry, or NULL
2230  *
2231  * d_lookup searches the children of the parent dentry for the name in
2232  * question. If the dentry is found its reference count is incremented and the
2233  * dentry is returned. The caller must use dput to free the entry when it has
2234  * finished using it. %NULL is returned if the dentry does not exist.
2235  */
2236 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
2237 {
2238         struct dentry *dentry;
2239         unsigned seq;
2240
2241         do {
2242                 seq = read_seqbegin(&rename_lock);
2243                 dentry = __d_lookup(parent, name);
2244                 if (dentry)
2245                         break;
2246         } while (read_seqretry(&rename_lock, seq));
2247         return dentry;
2248 }
2249 EXPORT_SYMBOL(d_lookup);
2250
2251 /**
2252  * __d_lookup - search for a dentry (racy)
2253  * @parent: parent dentry
2254  * @name: qstr of name we wish to find
2255  * Returns: dentry, or NULL
2256  *
2257  * __d_lookup is like d_lookup, however it may (rarely) return a
2258  * false-negative result due to unrelated rename activity.
2259  *
2260  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
2261  * however it must be used carefully, eg. with a following d_lookup in
2262  * the case of failure.
2263  *
2264  * __d_lookup callers must be commented.
2265  */
2266 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
2267 {
2268         unsigned int hash = name->hash;
2269         struct hlist_bl_head *b = d_hash(hash);
2270         struct hlist_bl_node *node;
2271         struct dentry *found = NULL;
2272         struct dentry *dentry;
2273
2274         /*
2275          * Note: There is significant duplication with __d_lookup_rcu which is
2276          * required to prevent single threaded performance regressions
2277          * especially on architectures where smp_rmb (in seqcounts) are costly.
2278          * Keep the two functions in sync.
2279          */
2280
2281         /*
2282          * The hash list is protected using RCU.
2283          *
2284          * Take d_lock when comparing a candidate dentry, to avoid races
2285          * with d_move().
2286          *
2287          * It is possible that concurrent renames can mess up our list
2288          * walk here and result in missing our dentry, resulting in the
2289          * false-negative result. d_lookup() protects against concurrent
2290          * renames using rename_lock seqlock.
2291          *
2292          * See Documentation/filesystems/path-lookup.txt for more details.
2293          */
2294         rcu_read_lock();
2295         
2296         hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2297
2298                 if (dentry->d_name.hash != hash)
2299                         continue;
2300
2301                 spin_lock(&dentry->d_lock);
2302                 if (dentry->d_parent != parent)
2303                         goto next;
2304                 if (d_unhashed(dentry))
2305                         goto next;
2306
2307                 if (!d_same_name(dentry, parent, name))
2308                         goto next;
2309
2310                 dentry->d_lockref.count++;
2311                 found = dentry;
2312                 spin_unlock(&dentry->d_lock);
2313                 break;
2314 next:
2315                 spin_unlock(&dentry->d_lock);
2316         }
2317         rcu_read_unlock();
2318
2319         return found;
2320 }
2321
2322 /**
2323  * d_hash_and_lookup - hash the qstr then search for a dentry
2324  * @dir: Directory to search in
2325  * @name: qstr of name we wish to find
2326  *
2327  * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2328  */
2329 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2330 {
2331         /*
2332          * Check for a fs-specific hash function. Note that we must
2333          * calculate the standard hash first, as the d_op->d_hash()
2334          * routine may choose to leave the hash value unchanged.
2335          */
2336         name->hash = full_name_hash(dir, name->name, name->len);
2337         if (dir->d_flags & DCACHE_OP_HASH) {
2338                 int err = dir->d_op->d_hash(dir, name);
2339                 if (unlikely(err < 0))
2340                         return ERR_PTR(err);
2341         }
2342         return d_lookup(dir, name);
2343 }
2344 EXPORT_SYMBOL(d_hash_and_lookup);
2345
2346 /*
2347  * When a file is deleted, we have two options:
2348  * - turn this dentry into a negative dentry
2349  * - unhash this dentry and free it.
2350  *
2351  * Usually, we want to just turn this into
2352  * a negative dentry, but if anybody else is
2353  * currently using the dentry or the inode
2354  * we can't do that and we fall back on removing
2355  * it from the hash queues and waiting for
2356  * it to be deleted later when it has no users
2357  */
2358  
2359 /**
2360  * d_delete - delete a dentry
2361  * @dentry: The dentry to delete
2362  *
2363  * Turn the dentry into a negative dentry if possible, otherwise
2364  * remove it from the hash queues so it can be deleted later
2365  */
2366  
2367 void d_delete(struct dentry * dentry)
2368 {
2369         struct inode *inode = dentry->d_inode;
2370         int isdir = d_is_dir(dentry);
2371
2372         spin_lock(&inode->i_lock);
2373         spin_lock(&dentry->d_lock);
2374         /*
2375          * Are we the only user?
2376          */
2377         if (dentry->d_lockref.count == 1) {
2378                 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2379                 dentry_unlink_inode(dentry);
2380         } else {
2381                 __d_drop(dentry);
2382                 spin_unlock(&dentry->d_lock);
2383                 spin_unlock(&inode->i_lock);
2384         }
2385         fsnotify_nameremove(dentry, isdir);
2386 }
2387 EXPORT_SYMBOL(d_delete);
2388
2389 static void __d_rehash(struct dentry *entry)
2390 {
2391         struct hlist_bl_head *b = d_hash(entry->d_name.hash);
2392
2393         hlist_bl_lock(b);
2394         hlist_bl_add_head_rcu(&entry->d_hash, b);
2395         hlist_bl_unlock(b);
2396 }
2397
2398 /**
2399  * d_rehash     - add an entry back to the hash
2400  * @entry: dentry to add to the hash
2401  *
2402  * Adds a dentry to the hash according to its name.
2403  */
2404  
2405 void d_rehash(struct dentry * entry)
2406 {
2407         spin_lock(&entry->d_lock);
2408         __d_rehash(entry);
2409         spin_unlock(&entry->d_lock);
2410 }
2411 EXPORT_SYMBOL(d_rehash);
2412
2413 static inline unsigned start_dir_add(struct inode *dir)
2414 {
2415
2416         for (;;) {
2417                 unsigned n = dir->i_dir_seq;
2418                 if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
2419                         return n;
2420                 cpu_relax();
2421         }
2422 }
2423
2424 static inline void end_dir_add(struct inode *dir, unsigned n)
2425 {
2426         smp_store_release(&dir->i_dir_seq, n + 2);
2427 }
2428
2429 static void d_wait_lookup(struct dentry *dentry)
2430 {
2431         if (d_in_lookup(dentry)) {
2432                 DECLARE_WAITQUEUE(wait, current);
2433                 add_wait_queue(dentry->d_wait, &wait);
2434                 do {
2435                         set_current_state(TASK_UNINTERRUPTIBLE);
2436                         spin_unlock(&dentry->d_lock);
2437                         schedule();
2438                         spin_lock(&dentry->d_lock);
2439                 } while (d_in_lookup(dentry));
2440         }
2441 }
2442
2443 struct dentry *d_alloc_parallel(struct dentry *parent,
2444                                 const struct qstr *name,
2445                                 wait_queue_head_t *wq)
2446 {
2447         unsigned int hash = name->hash;
2448         struct hlist_bl_head *b = in_lookup_hash(parent, hash);
2449         struct hlist_bl_node *node;
2450         struct dentry *new = d_alloc(parent, name);
2451         struct dentry *dentry;
2452         unsigned seq, r_seq, d_seq;
2453
2454         if (unlikely(!new))
2455                 return ERR_PTR(-ENOMEM);
2456
2457 retry:
2458         rcu_read_lock();
2459         seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
2460         r_seq = read_seqbegin(&rename_lock);
2461         dentry = __d_lookup_rcu(parent, name, &d_seq);
2462         if (unlikely(dentry)) {
2463                 if (!lockref_get_not_dead(&dentry->d_lockref)) {
2464                         rcu_read_unlock();
2465                         goto retry;
2466                 }
2467                 if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
2468                         rcu_read_unlock();
2469                         dput(dentry);
2470                         goto retry;
2471                 }
2472                 rcu_read_unlock();
2473                 dput(new);
2474                 return dentry;
2475         }
2476         if (unlikely(read_seqretry(&rename_lock, r_seq))) {
2477                 rcu_read_unlock();
2478                 goto retry;
2479         }
2480
2481         if (unlikely(seq & 1)) {
2482                 rcu_read_unlock();
2483                 goto retry;
2484         }
2485
2486         hlist_bl_lock(b);
2487         if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
2488                 hlist_bl_unlock(b);
2489                 rcu_read_unlock();
2490                 goto retry;
2491         }
2492         /*
2493          * No changes for the parent since the beginning of d_lookup().
2494          * Since all removals from the chain happen with hlist_bl_lock(),
2495          * any potential in-lookup matches are going to stay here until
2496          * we unlock the chain.  All fields are stable in everything
2497          * we encounter.
2498          */
2499         hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
2500                 if (dentry->d_name.hash != hash)
2501                         continue;
2502                 if (dentry->d_parent != parent)
2503                         continue;
2504                 if (!d_same_name(dentry, parent, name))
2505                         continue;
2506                 hlist_bl_unlock(b);
2507                 /* now we can try to grab a reference */
2508                 if (!lockref_get_not_dead(&dentry->d_lockref)) {
2509                         rcu_read_unlock();
2510                         goto retry;
2511                 }
2512
2513                 rcu_read_unlock();
2514                 /*
2515                  * somebody is likely to be still doing lookup for it;
2516                  * wait for them to finish
2517                  */
2518                 spin_lock(&dentry->d_lock);
2519                 d_wait_lookup(dentry);
2520                 /*
2521                  * it's not in-lookup anymore; in principle we should repeat
2522                  * everything from dcache lookup, but it's likely to be what
2523                  * d_lookup() would've found anyway.  If it is, just return it;
2524                  * otherwise we really have to repeat the whole thing.
2525                  */
2526                 if (unlikely(dentry->d_name.hash != hash))
2527                         goto mismatch;
2528                 if (unlikely(dentry->d_parent != parent))
2529                         goto mismatch;
2530                 if (unlikely(d_unhashed(dentry)))
2531                         goto mismatch;
2532                 if (unlikely(!d_same_name(dentry, parent, name)))
2533                         goto mismatch;
2534                 /* OK, it *is* a hashed match; return it */
2535                 spin_unlock(&dentry->d_lock);
2536                 dput(new);
2537                 return dentry;
2538         }
2539         rcu_read_unlock();
2540         /* we can't take ->d_lock here; it's OK, though. */
2541         new->d_flags |= DCACHE_PAR_LOOKUP;
2542         new->d_wait = wq;
2543         hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
2544         hlist_bl_unlock(b);
2545         return new;
2546 mismatch:
2547         spin_unlock(&dentry->d_lock);
2548         dput(dentry);
2549         goto retry;
2550 }
2551 EXPORT_SYMBOL(d_alloc_parallel);
2552
2553 void __d_lookup_done(struct dentry *dentry)
2554 {
2555         struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
2556                                                  dentry->d_name.hash);
2557         hlist_bl_lock(b);
2558         dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
2559         __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
2560         wake_up_all(dentry->d_wait);
2561         dentry->d_wait = NULL;
2562         hlist_bl_unlock(b);
2563         INIT_HLIST_NODE(&dentry->d_u.d_alias);
2564         INIT_LIST_HEAD(&dentry->d_lru);
2565 }
2566 EXPORT_SYMBOL(__d_lookup_done);
2567
2568 /* inode->i_lock held if inode is non-NULL */
2569
2570 static inline void __d_add(struct dentry *dentry, struct inode *inode)
2571 {
2572         struct inode *dir = NULL;
2573         unsigned n;
2574         spin_lock(&dentry->d_lock);
2575         if (unlikely(d_in_lookup(dentry))) {
2576                 dir = dentry->d_parent->d_inode;
2577                 n = start_dir_add(dir);
2578                 __d_lookup_done(dentry);
2579         }
2580         if (inode) {
2581                 unsigned add_flags = d_flags_for_inode(inode);
2582                 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2583                 raw_write_seqcount_begin(&dentry->d_seq);
2584                 __d_set_inode_and_type(dentry, inode, add_flags);
2585                 raw_write_seqcount_end(&dentry->d_seq);
2586                 fsnotify_update_flags(dentry);
2587         }
2588         __d_rehash(dentry);
2589         if (dir)
2590                 end_dir_add(dir, n);
2591         spin_unlock(&dentry->d_lock);
2592         if (inode)
2593                 spin_unlock(&inode->i_lock);
2594 }
2595
2596 /**
2597  * d_add - add dentry to hash queues
2598  * @entry: dentry to add
2599  * @inode: The inode to attach to this dentry
2600  *
2601  * This adds the entry to the hash queues and initializes @inode.
2602  * The entry was actually filled in earlier during d_alloc().
2603  */
2604
2605 void d_add(struct dentry *entry, struct inode *inode)
2606 {
2607         if (inode) {
2608                 security_d_instantiate(entry, inode);
2609                 spin_lock(&inode->i_lock);
2610         }
2611         __d_add(entry, inode);
2612 }
2613 EXPORT_SYMBOL(d_add);
2614
2615 /**
2616  * d_exact_alias - find and hash an exact unhashed alias
2617  * @entry: dentry to add
2618  * @inode: The inode to go with this dentry
2619  *
2620  * If an unhashed dentry with the same name/parent and desired
2621  * inode already exists, hash and return it.  Otherwise, return
2622  * NULL.
2623  *
2624  * Parent directory should be locked.
2625  */
2626 struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
2627 {
2628         struct dentry *alias;
2629         unsigned int hash = entry->d_name.hash;
2630
2631         spin_lock(&inode->i_lock);
2632         hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
2633                 /*
2634                  * Don't need alias->d_lock here, because aliases with
2635                  * d_parent == entry->d_parent are not subject to name or
2636                  * parent changes, because the parent inode i_mutex is held.
2637                  */
2638                 if (alias->d_name.hash != hash)
2639                         continue;
2640                 if (alias->d_parent != entry->d_parent)
2641                         continue;
2642                 if (!d_same_name(alias, entry->d_parent, &entry->d_name))
2643                         continue;
2644                 spin_lock(&alias->d_lock);
2645                 if (!d_unhashed(alias)) {
2646                         spin_unlock(&alias->d_lock);
2647                         alias = NULL;
2648                 } else {
2649                         __dget_dlock(alias);
2650                         __d_rehash(alias);
2651                         spin_unlock(&alias->d_lock);
2652                 }
2653                 spin_unlock(&inode->i_lock);
2654                 return alias;
2655         }
2656         spin_unlock(&inode->i_lock);
2657         return NULL;
2658 }
2659 EXPORT_SYMBOL(d_exact_alias);
2660
2661 static void swap_names(struct dentry *dentry, struct dentry *target)
2662 {
2663         if (unlikely(dname_external(target))) {
2664                 if (unlikely(dname_external(dentry))) {
2665                         /*
2666                          * Both external: swap the pointers
2667                          */
2668                         swap(target->d_name.name, dentry->d_name.name);
2669                 } else {
2670                         /*
2671                          * dentry:internal, target:external.  Steal target's
2672                          * storage and make target internal.
2673                          */
2674                         memcpy(target->d_iname, dentry->d_name.name,
2675                                         dentry->d_name.len + 1);
2676                         dentry->d_name.name = target->d_name.name;
2677                         target->d_name.name = target->d_iname;
2678                 }
2679         } else {
2680                 if (unlikely(dname_external(dentry))) {
2681                         /*
2682                          * dentry:external, target:internal.  Give dentry's
2683                          * storage to target and make dentry internal
2684                          */
2685                         memcpy(dentry->d_iname, target->d_name.name,
2686                                         target->d_name.len + 1);
2687                         target->d_name.name = dentry->d_name.name;
2688                         dentry->d_name.name = dentry->d_iname;
2689                 } else {
2690                         /*
2691                          * Both are internal.
2692                          */
2693                         unsigned int i;
2694                         BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
2695                         for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
2696                                 swap(((long *) &dentry->d_iname)[i],
2697                                      ((long *) &target->d_iname)[i]);
2698                         }
2699                 }
2700         }
2701         swap(dentry->d_name.hash_len, target->d_name.hash_len);
2702 }
2703
2704 static void copy_name(struct dentry *dentry, struct dentry *target)
2705 {
2706         struct external_name *old_name = NULL;
2707         if (unlikely(dname_external(dentry)))
2708                 old_name = external_name(dentry);
2709         if (unlikely(dname_external(target))) {
2710                 atomic_inc(&external_name(target)->u.count);
2711                 dentry->d_name = target->d_name;
2712         } else {
2713                 memcpy(dentry->d_iname, target->d_name.name,
2714                                 target->d_name.len + 1);
2715                 dentry->d_name.name = dentry->d_iname;
2716                 dentry->d_name.hash_len = target->d_name.hash_len;
2717         }
2718         if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
2719                 kfree_rcu(old_name, u.head);
2720 }
2721
2722 /*
2723  * __d_move - move a dentry
2724  * @dentry: entry to move
2725  * @target: new dentry
2726  * @exchange: exchange the two dentries
2727  *
2728  * Update the dcache to reflect the move of a file name. Negative
2729  * dcache entries should not be moved in this way. Caller must hold
2730  * rename_lock, the i_mutex of the source and target directories,
2731  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2732  */
2733 static void __d_move(struct dentry *dentry, struct dentry *target,
2734                      bool exchange)
2735 {
2736         struct dentry *old_parent, *p;
2737         struct inode *dir = NULL;
2738         unsigned n;
2739
2740         WARN_ON(!dentry->d_inode);
2741         if (WARN_ON(dentry == target))
2742                 return;
2743
2744         BUG_ON(d_ancestor(target, dentry));
2745         old_parent = dentry->d_parent;
2746         p = d_ancestor(old_parent, target);
2747         if (IS_ROOT(dentry)) {
2748                 BUG_ON(p);
2749                 spin_lock(&target->d_parent->d_lock);
2750         } else if (!p) {
2751                 /* target is not a descendent of dentry->d_parent */
2752                 spin_lock(&target->d_parent->d_lock);
2753                 spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
2754         } else {
2755                 BUG_ON(p == dentry);
2756                 spin_lock(&old_parent->d_lock);
2757                 if (p != target)
2758                         spin_lock_nested(&target->d_parent->d_lock,
2759                                         DENTRY_D_LOCK_NESTED);
2760         }
2761         spin_lock_nested(&dentry->d_lock, 2);
2762         spin_lock_nested(&target->d_lock, 3);
2763
2764         if (unlikely(d_in_lookup(target))) {
2765                 dir = target->d_parent->d_inode;
2766                 n = start_dir_add(dir);
2767                 __d_lookup_done(target);
2768         }
2769
2770         write_seqcount_begin(&dentry->d_seq);
2771         write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2772
2773         /* unhash both */
2774         if (!d_unhashed(dentry))
2775                 ___d_drop(dentry);
2776         if (!d_unhashed(target))
2777                 ___d_drop(target);
2778
2779         /* ... and switch them in the tree */
2780         dentry->d_parent = target->d_parent;
2781         if (!exchange) {
2782                 copy_name(dentry, target);
2783                 target->d_hash.pprev = NULL;
2784                 dentry->d_parent->d_lockref.count++;
2785                 if (dentry == old_parent)
2786                         dentry->d_flags |= DCACHE_RCUACCESS;
2787                 else
2788                         WARN_ON(!--old_parent->d_lockref.count);
2789         } else {
2790                 target->d_parent = old_parent;
2791                 swap_names(dentry, target);
2792                 list_move(&target->d_child, &target->d_parent->d_subdirs);
2793                 __d_rehash(target);
2794                 fsnotify_update_flags(target);
2795         }
2796         list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2797         __d_rehash(dentry);
2798         fsnotify_update_flags(dentry);
2799         fscrypt_handle_d_move(dentry);
2800
2801         write_seqcount_end(&target->d_seq);
2802         write_seqcount_end(&dentry->d_seq);
2803
2804         if (dir)
2805                 end_dir_add(dir, n);
2806
2807         if (dentry->d_parent != old_parent)
2808                 spin_unlock(&dentry->d_parent->d_lock);
2809         if (dentry != old_parent)
2810                 spin_unlock(&old_parent->d_lock);
2811         spin_unlock(&target->d_lock);
2812         spin_unlock(&dentry->d_lock);
2813 }
2814
2815 /*
2816  * d_move - move a dentry
2817  * @dentry: entry to move
2818  * @target: new dentry
2819  *
2820  * Update the dcache to reflect the move of a file name. Negative
2821  * dcache entries should not be moved in this way. See the locking
2822  * requirements for __d_move.
2823  */
2824 void d_move(struct dentry *dentry, struct dentry *target)
2825 {
2826         write_seqlock(&rename_lock);
2827         __d_move(dentry, target, false);
2828         write_sequnlock(&rename_lock);
2829 }
2830 EXPORT_SYMBOL(d_move);
2831
2832 /*
2833  * d_exchange - exchange two dentries
2834  * @dentry1: first dentry
2835  * @dentry2: second dentry
2836  */
2837 void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
2838 {
2839         write_seqlock(&rename_lock);
2840
2841         WARN_ON(!dentry1->d_inode);
2842         WARN_ON(!dentry2->d_inode);
2843         WARN_ON(IS_ROOT(dentry1));
2844         WARN_ON(IS_ROOT(dentry2));
2845
2846         __d_move(dentry1, dentry2, true);
2847
2848         write_sequnlock(&rename_lock);
2849 }
2850
2851 /**
2852  * d_ancestor - search for an ancestor
2853  * @p1: ancestor dentry
2854  * @p2: child dentry
2855  *
2856  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2857  * an ancestor of p2, else NULL.
2858  */
2859 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2860 {
2861         struct dentry *p;
2862
2863         for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2864                 if (p->d_parent == p1)
2865                         return p;
2866         }
2867         return NULL;
2868 }
2869
2870 /*
2871  * This helper attempts to cope with remotely renamed directories
2872  *
2873  * It assumes that the caller is already holding
2874  * dentry->d_parent->d_inode->i_mutex, and rename_lock
2875  *
2876  * Note: If ever the locking in lock_rename() changes, then please
2877  * remember to update this too...
2878  */
2879 static int __d_unalias(struct inode *inode,
2880                 struct dentry *dentry, struct dentry *alias)
2881 {
2882         struct mutex *m1 = NULL;
2883         struct rw_semaphore *m2 = NULL;
2884         int ret = -ESTALE;
2885
2886         /* If alias and dentry share a parent, then no extra locks required */
2887         if (alias->d_parent == dentry->d_parent)
2888                 goto out_unalias;
2889
2890         /* See lock_rename() */
2891         if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2892                 goto out_err;
2893         m1 = &dentry->d_sb->s_vfs_rename_mutex;
2894         if (!inode_trylock_shared(alias->d_parent->d_inode))
2895                 goto out_err;
2896         m2 = &alias->d_parent->d_inode->i_rwsem;
2897 out_unalias:
2898         __d_move(alias, dentry, false);
2899         ret = 0;
2900 out_err:
2901         if (m2)
2902                 up_read(m2);
2903         if (m1)
2904                 mutex_unlock(m1);
2905         return ret;
2906 }
2907
2908 /**
2909  * d_splice_alias - splice a disconnected dentry into the tree if one exists
2910  * @inode:  the inode which may have a disconnected dentry
2911  * @dentry: a negative dentry which we want to point to the inode.
2912  *
2913  * If inode is a directory and has an IS_ROOT alias, then d_move that in
2914  * place of the given dentry and return it, else simply d_add the inode
2915  * to the dentry and return NULL.
2916  *
2917  * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
2918  * we should error out: directories can't have multiple aliases.
2919  *
2920  * This is needed in the lookup routine of any filesystem that is exportable
2921  * (via knfsd) so that we can build dcache paths to directories effectively.
2922  *
2923  * If a dentry was found and moved, then it is returned.  Otherwise NULL
2924  * is returned.  This matches the expected return value of ->lookup.
2925  *
2926  * Cluster filesystems may call this function with a negative, hashed dentry.
2927  * In that case, we know that the inode will be a regular file, and also this
2928  * will only occur during atomic_open. So we need to check for the dentry
2929  * being already hashed only in the final case.
2930  */
2931 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2932 {
2933         if (IS_ERR(inode))
2934                 return ERR_CAST(inode);
2935
2936         BUG_ON(!d_unhashed(dentry));
2937
2938         if (!inode)
2939                 goto out;
2940
2941         security_d_instantiate(dentry, inode);
2942         spin_lock(&inode->i_lock);
2943         if (S_ISDIR(inode->i_mode)) {
2944                 struct dentry *new = __d_find_any_alias(inode);
2945                 if (unlikely(new)) {
2946                         /* The reference to new ensures it remains an alias */
2947                         spin_unlock(&inode->i_lock);
2948                         write_seqlock(&rename_lock);
2949                         if (unlikely(d_ancestor(new, dentry))) {
2950                                 write_sequnlock(&rename_lock);
2951                                 dput(new);
2952                                 new = ERR_PTR(-ELOOP);
2953                                 pr_warn_ratelimited(
2954                                         "VFS: Lookup of '%s' in %s %s"
2955                                         " would have caused loop\n",
2956                                         dentry->d_name.name,
2957                                         inode->i_sb->s_type->name,
2958                                         inode->i_sb->s_id);
2959                         } else if (!IS_ROOT(new)) {
2960                                 struct dentry *old_parent = dget(new->d_parent);
2961                                 int err = __d_unalias(inode, dentry, new);
2962                                 write_sequnlock(&rename_lock);
2963                                 if (err) {
2964                                         dput(new);
2965                                         new = ERR_PTR(err);
2966                                 }
2967                                 dput(old_parent);
2968                         } else {
2969                                 __d_move(new, dentry, false);
2970                                 write_sequnlock(&rename_lock);
2971                         }
2972                         iput(inode);
2973                         return new;
2974                 }
2975         }
2976 out:
2977         __d_add(dentry, inode);
2978         return NULL;
2979 }
2980 EXPORT_SYMBOL(d_splice_alias);
2981
2982 /*
2983  * Test whether new_dentry is a subdirectory of old_dentry.
2984  *
2985  * Trivially implemented using the dcache structure
2986  */
2987
2988 /**
2989  * is_subdir - is new dentry a subdirectory of old_dentry
2990  * @new_dentry: new dentry
2991  * @old_dentry: old dentry
2992  *
2993  * Returns true if new_dentry is a subdirectory of the parent (at any depth).
2994  * Returns false otherwise.
2995  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2996  */
2997   
2998 bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2999 {
3000         bool result;
3001         unsigned seq;
3002
3003         if (new_dentry == old_dentry)
3004                 return true;
3005
3006         do {
3007                 /* for restarting inner loop in case of seq retry */
3008                 seq = read_seqbegin(&rename_lock);
3009                 /*
3010                  * Need rcu_readlock to protect against the d_parent trashing
3011                  * due to d_move
3012                  */
3013                 rcu_read_lock();
3014                 if (d_ancestor(old_dentry, new_dentry))
3015                         result = true;
3016                 else
3017                         result = false;
3018                 rcu_read_unlock();
3019         } while (read_seqretry(&rename_lock, seq));
3020
3021         return result;
3022 }
3023 EXPORT_SYMBOL(is_subdir);
3024
3025 static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3026 {
3027         struct dentry *root = data;
3028         if (dentry != root) {
3029                 if (d_unhashed(dentry) || !dentry->d_inode)
3030                         return D_WALK_SKIP;
3031
3032                 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
3033                         dentry->d_flags |= DCACHE_GENOCIDE;
3034                         dentry->d_lockref.count--;
3035                 }
3036         }
3037         return D_WALK_CONTINUE;
3038 }
3039
3040 void d_genocide(struct dentry *parent)
3041 {
3042         d_walk(parent, parent, d_genocide_kill);
3043 }
3044
3045 EXPORT_SYMBOL(d_genocide);
3046
3047 void d_tmpfile(struct dentry *dentry, struct inode *inode)
3048 {
3049         inode_dec_link_count(inode);
3050         BUG_ON(dentry->d_name.name != dentry->d_iname ||
3051                 !hlist_unhashed(&dentry->d_u.d_alias) ||
3052                 !d_unlinked(dentry));
3053         spin_lock(&dentry->d_parent->d_lock);
3054         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
3055         dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
3056                                 (unsigned long long)inode->i_ino);
3057         spin_unlock(&dentry->d_lock);
3058         spin_unlock(&dentry->d_parent->d_lock);
3059         d_instantiate(dentry, inode);
3060 }
3061 EXPORT_SYMBOL(d_tmpfile);
3062
3063 static __initdata unsigned long dhash_entries;
3064 static int __init set_dhash_entries(char *str)
3065 {
3066         if (!str)
3067                 return 0;
3068         dhash_entries = simple_strtoul(str, &str, 0);
3069         return 1;
3070 }
3071 __setup("dhash_entries=", set_dhash_entries);
3072
3073 static void __init dcache_init_early(void)
3074 {
3075         /* If hashes are distributed across NUMA nodes, defer
3076          * hash allocation until vmalloc space is available.
3077          */
3078         if (hashdist)
3079                 return;
3080
3081         dentry_hashtable =
3082                 alloc_large_system_hash("Dentry cache",
3083                                         sizeof(struct hlist_bl_head),
3084                                         dhash_entries,
3085                                         13,
3086                                         HASH_EARLY | HASH_ZERO,
3087                                         &d_hash_shift,
3088                                         NULL,
3089                                         0,
3090                                         0);
3091         d_hash_shift = 32 - d_hash_shift;
3092 }
3093
3094 static void __init dcache_init(void)
3095 {
3096         /*
3097          * A constructor could be added for stable state like the lists,
3098          * but it is probably not worth it because of the cache nature
3099          * of the dcache.
3100          */
3101         dentry_cache = KMEM_CACHE_USERCOPY(dentry,
3102                 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT,
3103                 d_iname);
3104
3105         /* Hash may have been set up in dcache_init_early */
3106         if (!hashdist)
3107                 return;
3108
3109         dentry_hashtable =
3110                 alloc_large_system_hash("Dentry cache",
3111                                         sizeof(struct hlist_bl_head),
3112                                         dhash_entries,
3113                                         13,
3114                                         HASH_ZERO,
3115                                         &d_hash_shift,
3116                                         NULL,
3117                                         0,
3118                                         0);
3119         d_hash_shift = 32 - d_hash_shift;
3120 }
3121
3122 /* SLAB cache for __getname() consumers */
3123 struct kmem_cache *names_cachep __read_mostly;
3124 EXPORT_SYMBOL(names_cachep);
3125
3126 void __init vfs_caches_init_early(void)
3127 {
3128         int i;
3129
3130         for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++)
3131                 INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]);
3132
3133         dcache_init_early();
3134         inode_init_early();
3135 }
3136
3137 void __init vfs_caches_init(void)
3138 {
3139         names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0,
3140                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL);
3141
3142         dcache_init();
3143         inode_init();
3144         files_init();
3145         files_maxfiles_init();
3146         mnt_init();
3147         bdev_cache_init();
3148         chrdev_init();
3149 }