vfs: make it possible to access the dentry hash/len as one 64-bit entry
[linux-2.6-block.git] / fs / dcache.c
index b80531c917799475147ba59234c821ea52306fd3..92099f61bc647ca14aef18ac99bbfd852ba152dd 100644 (file)
@@ -153,16 +153,12 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
  * In contrast, 'ct' and 'tcount' can be from a pathname, and do
  * need the careful unaligned handling.
  */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-                               const unsigned char *ct, size_t tcount)
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 {
        unsigned long a,b,mask;
 
-       if (unlikely(scount != tcount))
-               return 1;
-
        for (;;) {
-               a = load_unaligned_zeropad(cs);
+               a = *(unsigned long *)cs;
                b = load_unaligned_zeropad(ct);
                if (tcount < sizeof(unsigned long))
                        break;
@@ -180,12 +176,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 
 #else
 
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-                               const unsigned char *ct, size_t tcount)
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
 {
-       if (scount != tcount)
-               return 1;
-
        do {
                if (*cs != *ct)
                        return 1;
@@ -198,6 +190,27 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 
 #endif
 
+static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
+{
+       /*
+        * Be careful about RCU walk racing with rename:
+        * use ACCESS_ONCE to fetch the name pointer.
+        *
+        * NOTE! Even if a rename will mean that the length
+        * was not loaded atomically, we don't care. The
+        * RCU walk will check the sequence count eventually,
+        * and catch it. And we won't overrun the buffer,
+        * because we're reading the name pointer atomically,
+        * and a dentry name is guaranteed to be properly
+        * terminated with a NUL byte.
+        *
+        * End result: even if 'len' is wrong, we'll exit
+        * early because the data cannot match (there can
+        * be no NUL in the ct/tcount data)
+        */
+       return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount);
+}
+
 static void __d_free(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1439,18 +1452,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
        }
 
        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-               struct qstr *qstr = &alias->d_name;
-
                /*
                 * Don't need alias->d_lock here, because aliases with
                 * d_parent == entry->d_parent are not subject to name or
                 * parent changes, because the parent inode i_mutex is held.
                 */
-               if (qstr->hash != hash)
+               if (alias->d_name.hash != hash)
                        continue;
                if (alias->d_parent != entry->d_parent)
                        continue;
-               if (dentry_cmp(qstr->name, qstr->len, name, len))
+               if (alias->d_name.len != len)
+                       continue;
+               if (dentry_cmp(alias, name, len))
                        continue;
                __dget(alias);
                return alias;
@@ -1489,7 +1502,7 @@ struct dentry *d_make_root(struct inode *root_inode)
        struct dentry *res = NULL;
 
        if (root_inode) {
-               static const struct qstr name = { .name = "/", .len = 1 };
+               static const struct qstr name = QSTR_INIT("/", 1);
 
                res = __d_alloc(root_inode->i_sb, &name);
                if (res)
@@ -1727,6 +1740,48 @@ err_out:
 }
 EXPORT_SYMBOL(d_add_ci);
 
+/*
+ * Do the slow-case of the dentry name compare.
+ *
+ * Unlike the dentry_cmp() function, we need to atomically
+ * load the name, length and inode information, so that the
+ * filesystem can rely on them, and can use the 'name' and
+ * 'len' information without worrying about walking off the
+ * end of memory etc.
+ *
+ * Thus the read_seqcount_retry() and the "duplicate" info
+ * in arguments (the low-level filesystem should not look
+ * at the dentry inode or name contents directly, since
+ * rename can change them while we're in RCU mode).
+ */
+enum slow_d_compare {
+       D_COMP_OK,
+       D_COMP_NOMATCH,
+       D_COMP_SEQRETRY,
+};
+
+static noinline enum slow_d_compare slow_dentry_cmp(
+               const struct dentry *parent,
+               struct inode *inode,
+               struct dentry *dentry,
+               unsigned int seq,
+               const struct qstr *name)
+{
+       int tlen = dentry->d_name.len;
+       const char *tname = dentry->d_name.name;
+       struct inode *i = dentry->d_inode;
+
+       if (read_seqcount_retry(&dentry->d_seq, seq)) {
+               cpu_relax();
+               return D_COMP_SEQRETRY;
+       }
+       if (parent->d_op->d_compare(parent, inode,
+                               dentry, i,
+                               tlen, tname, name))
+               return D_COMP_NOMATCH;
+       return D_COMP_OK;
+}
+
 /**
  * __d_lookup_rcu - search for a dentry (racy, store-free)
  * @parent: parent dentry
@@ -1753,15 +1808,17 @@ EXPORT_SYMBOL(d_add_ci);
  * the returned dentry, so long as its parent's seqlock is checked after the
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
+ *
+ * NOTE! The caller *has* to check the resulting dentry against the sequence
+ * number we've returned before using any of the resulting dentry state!
  */
 struct dentry *__d_lookup_rcu(const struct dentry *parent,
                                const struct qstr *name,
-                               unsigned *seqp, struct inode **inode)
+                               unsigned *seqp, struct inode *inode)
 {
-       unsigned int len = name->len;
-       unsigned int hash = name->hash;
+       u64 hashlen = name->hash_len;
        const unsigned char *str = name->name;
-       struct hlist_bl_head *b = d_hash(parent, hash);
+       struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
        struct hlist_bl_node *node;
        struct dentry *dentry;
 
@@ -1787,49 +1844,45 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
         */
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
                unsigned seq;
-               struct inode *i;
-               const char *tname;
-               int tlen;
-
-               if (dentry->d_name.hash != hash)
-                       continue;
 
 seqretry:
-               seq = read_seqcount_begin(&dentry->d_seq);
-               if (dentry->d_parent != parent)
-                       continue;
-               if (d_unhashed(dentry))
-                       continue;
-               tlen = dentry->d_name.len;
-               tname = dentry->d_name.name;
-               i = dentry->d_inode;
-               prefetch(tname);
                /*
-                * This seqcount check is required to ensure name and
-                * len are loaded atomically, so as not to walk off the
-                * edge of memory when walking. If we could load this
-                * atomically some other way, we could drop this check.
+                * The dentry sequence count protects us from concurrent
+                * renames, and thus protects inode, parent and name fields.
+                *
+                * The caller must perform a seqcount check in order
+                * to do anything useful with the returned dentry,
+                * including using the 'd_inode' pointer.
+                *
+                * NOTE! We do a "raw" seqcount_begin here. That means that
+                * we don't wait for the sequence count to stabilize if it
+                * is in the middle of a sequence change. If we do the slow
+                * dentry compare, we will do seqretries until it is stable,
+                * and if we end up with a successful lookup, we actually
+                * want to exit RCU lookup anyway.
                 */
-               if (read_seqcount_retry(&dentry->d_seq, seq))
-                       goto seqretry;
+               seq = raw_seqcount_begin(&dentry->d_seq);
+               if (dentry->d_parent != parent)
+                       continue;
+               *seqp = seq;
+
                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
-                       if (parent->d_op->d_compare(parent, *inode,
-                                               dentry, i,
-                                               tlen, tname, name))
+                       if (dentry->d_name.hash != hashlen_hash(hashlen))
                                continue;
-               } else {
-                       if (dentry_cmp(tname, tlen, str, len))
+                       switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+                       case D_COMP_OK:
+                               return dentry;
+                       case D_COMP_NOMATCH:
                                continue;
+                       default:
+                               goto seqretry;
+                       }
                }
-               /*
-                * No extra seqcount check is required after the name
-                * compare. The caller must perform a seqcount check in
-                * order to do anything useful with the returned dentry
-                * anyway.
-                */
-               *seqp = seq;
-               *inode = i;
-               return dentry;
+
+               if (dentry->d_name.hash_len != hashlen)
+                       continue;
+               if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
+                       return dentry;
        }
        return NULL;
 }
@@ -1908,8 +1961,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
        rcu_read_lock();
        
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
-               const char *tname;
-               int tlen;
 
                if (dentry->d_name.hash != hash)
                        continue;
@@ -1924,15 +1975,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
                 * It is safe to compare names since d_move() cannot
                 * change the qstr (protected by d_lock).
                 */
-               tlen = dentry->d_name.len;
-               tname = dentry->d_name.name;
                if (parent->d_flags & DCACHE_OP_COMPARE) {
+                       int tlen = dentry->d_name.len;
+                       const char *tname = dentry->d_name.name;
                        if (parent->d_op->d_compare(parent, parent->d_inode,
                                                dentry, dentry->d_inode,
                                                tlen, tname, name))
                                goto next;
                } else {
-                       if (dentry_cmp(tname, tlen, str, len))
+                       if (dentry->d_name.len != len)
+                               goto next;
+                       if (dentry_cmp(dentry, str, len))
                                goto next;
                }