vfs: make it possible to access the dentry hash/len as one 64-bit entry
[linux-2.6-block.git] / fs / dcache.c
index b60ddc41d78385d168e67e98adc6020fedc1cfeb..92099f61bc647ca14aef18ac99bbfd852ba152dd 100644 (file)
@@ -141,18 +141,25 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
  * The strings are both count bytes long, and count is non-zero.
  */
-static inline int dentry_cmp(const unsigned char *cs, size_t scount,
-                               const unsigned char *ct, size_t tcount)
-{
 #ifdef CONFIG_DCACHE_WORD_ACCESS
-       unsigned long a,b,mask;
 
-       if (unlikely(scount != tcount))
-               return 1;
+#include <asm/word-at-a-time.h>
+/*
+ * NOTE! 'cs' and 'scount' come from a dentry, so it has a
+ * aligned allocation for this particular component. We don't
+ * strictly need the load_unaligned_zeropad() safety, but it
+ * doesn't hurt either.
+ *
+ * In contrast, 'ct' and 'tcount' can be from a pathname, and do
+ * need the careful unaligned handling.
+ */
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
+       unsigned long a,b,mask;
 
        for (;;) {
                a = *(unsigned long *)cs;
-               b = *(unsigned long *)ct;
+               b = load_unaligned_zeropad(ct);
                if (tcount < sizeof(unsigned long))
                        break;
                if (unlikely(a != b))
@@ -165,10 +172,12 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
        }
        mask = ~(~0ul << tcount*8);
        return unlikely(!!((a ^ b) & mask));
+}
+
 #else
-       if (scount != tcount)
-               return 1;
 
+static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
+{
        do {
                if (*cs != *ct)
                        return 1;
@@ -177,7 +186,29 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
                tcount--;
        } while (tcount);
        return 0;
+}
+
 #endif
+
+static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
+{
+       /*
+        * Be careful about RCU walk racing with rename:
+        * use ACCESS_ONCE to fetch the name pointer.
+        *
+        * NOTE! Even if a rename will mean that the length
+        * was not loaded atomically, we don't care. The
+        * RCU walk will check the sequence count eventually,
+        * and catch it. And we won't overrun the buffer,
+        * because we're reading the name pointer atomically,
+        * and a dentry name is guaranteed to be properly
+        * terminated with a NUL byte.
+        *
+        * End result: even if 'len' is wrong, we'll exit
+        * early because the data cannot match (there can
+        * be no NUL in the ct/tcount data)
+        */
+       return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount);
 }
 
 static void __d_free(struct rcu_head *head)
@@ -1421,18 +1452,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
        }
 
        list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-               struct qstr *qstr = &alias->d_name;
-
                /*
                 * Don't need alias->d_lock here, because aliases with
                 * d_parent == entry->d_parent are not subject to name or
                 * parent changes, because the parent inode i_mutex is held.
                 */
-               if (qstr->hash != hash)
+               if (alias->d_name.hash != hash)
                        continue;
                if (alias->d_parent != entry->d_parent)
                        continue;
-               if (dentry_cmp(qstr->name, qstr->len, name, len))
+               if (alias->d_name.len != len)
+                       continue;
+               if (dentry_cmp(alias, name, len))
                        continue;
                __dget(alias);
                return alias;
@@ -1471,7 +1502,7 @@ struct dentry *d_make_root(struct inode *root_inode)
        struct dentry *res = NULL;
 
        if (root_inode) {
-               static const struct qstr name = { .name = "/", .len = 1 };
+               static const struct qstr name = QSTR_INIT("/", 1);
 
                res = __d_alloc(root_inode->i_sb, &name);
                if (res)
@@ -1709,6 +1740,48 @@ err_out:
 }
 EXPORT_SYMBOL(d_add_ci);
 
+/*
+ * Do the slow-case of the dentry name compare.
+ *
+ * Unlike the dentry_cmp() function, we need to atomically
+ * load the name, length and inode information, so that the
+ * filesystem can rely on them, and can use the 'name' and
+ * 'len' information without worrying about walking off the
+ * end of memory etc.
+ *
+ * Thus the read_seqcount_retry() and the "duplicate" info
+ * in arguments (the low-level filesystem should not look
+ * at the dentry inode or name contents directly, since
+ * rename can change them while we're in RCU mode).
+ */
+enum slow_d_compare {
+       D_COMP_OK,
+       D_COMP_NOMATCH,
+       D_COMP_SEQRETRY,
+};
+
+static noinline enum slow_d_compare slow_dentry_cmp(
+               const struct dentry *parent,
+               struct inode *inode,
+               struct dentry *dentry,
+               unsigned int seq,
+               const struct qstr *name)
+{
+       int tlen = dentry->d_name.len;
+       const char *tname = dentry->d_name.name;
+       struct inode *i = dentry->d_inode;
+
+       if (read_seqcount_retry(&dentry->d_seq, seq)) {
+               cpu_relax();
+               return D_COMP_SEQRETRY;
+       }
+       if (parent->d_op->d_compare(parent, inode,
+                               dentry, i,
+                               tlen, tname, name))
+               return D_COMP_NOMATCH;
+       return D_COMP_OK;
+}
+
 /**
  * __d_lookup_rcu - search for a dentry (racy, store-free)
  * @parent: parent dentry
@@ -1735,15 +1808,17 @@ EXPORT_SYMBOL(d_add_ci);
  * the returned dentry, so long as its parent's seqlock is checked after the
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
  * is formed, giving integrity down the path walk.
+ *
+ * NOTE! The caller *has* to check the resulting dentry against the sequence
+ * number we've returned before using any of the resulting dentry state!
  */
 struct dentry *__d_lookup_rcu(const struct dentry *parent,
                                const struct qstr *name,
-                               unsigned *seqp, struct inode **inode)
+                               unsigned *seqp, struct inode *inode)
 {
-       unsigned int len = name->len;
-       unsigned int hash = name->hash;
+       u64 hashlen = name->hash_len;
        const unsigned char *str = name->name;
-       struct hlist_bl_head *b = d_hash(parent, hash);
+       struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
        struct hlist_bl_node *node;
        struct dentry *dentry;
 
@@ -1769,49 +1844,45 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
         */
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
                unsigned seq;
-               struct inode *i;
-               const char *tname;
-               int tlen;
-
-               if (dentry->d_name.hash != hash)
-                       continue;
 
 seqretry:
-               seq = read_seqcount_begin(&dentry->d_seq);
-               if (dentry->d_parent != parent)
-                       continue;
-               if (d_unhashed(dentry))
-                       continue;
-               tlen = dentry->d_name.len;
-               tname = dentry->d_name.name;
-               i = dentry->d_inode;
-               prefetch(tname);
                /*
-                * This seqcount check is required to ensure name and
-                * len are loaded atomically, so as not to walk off the
-                * edge of memory when walking. If we could load this
-                * atomically some other way, we could drop this check.
+                * The dentry sequence count protects us from concurrent
+                * renames, and thus protects inode, parent and name fields.
+                *
+                * The caller must perform a seqcount check in order
+                * to do anything useful with the returned dentry,
+                * including using the 'd_inode' pointer.
+                *
+                * NOTE! We do a "raw" seqcount_begin here. That means that
+                * we don't wait for the sequence count to stabilize if it
+                * is in the middle of a sequence change. If we do the slow
+                * dentry compare, we will do seqretries until it is stable,
+                * and if we end up with a successful lookup, we actually
+                * want to exit RCU lookup anyway.
                 */
-               if (read_seqcount_retry(&dentry->d_seq, seq))
-                       goto seqretry;
+               seq = raw_seqcount_begin(&dentry->d_seq);
+               if (dentry->d_parent != parent)
+                       continue;
+               *seqp = seq;
+
                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
-                       if (parent->d_op->d_compare(parent, *inode,
-                                               dentry, i,
-                                               tlen, tname, name))
+                       if (dentry->d_name.hash != hashlen_hash(hashlen))
                                continue;
-               } else {
-                       if (dentry_cmp(tname, tlen, str, len))
+                       switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
+                       case D_COMP_OK:
+                               return dentry;
+                       case D_COMP_NOMATCH:
                                continue;
+                       default:
+                               goto seqretry;
+                       }
                }
-               /*
-                * No extra seqcount check is required after the name
-                * compare. The caller must perform a seqcount check in
-                * order to do anything useful with the returned dentry
-                * anyway.
-                */
-               *seqp = seq;
-               *inode = i;
-               return dentry;
+
+               if (dentry->d_name.hash_len != hashlen)
+                       continue;
+               if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
+                       return dentry;
        }
        return NULL;
 }
@@ -1890,8 +1961,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
        rcu_read_lock();
        
        hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
-               const char *tname;
-               int tlen;
 
                if (dentry->d_name.hash != hash)
                        continue;
@@ -1906,15 +1975,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
                 * It is safe to compare names since d_move() cannot
                 * change the qstr (protected by d_lock).
                 */
-               tlen = dentry->d_name.len;
-               tname = dentry->d_name.name;
                if (parent->d_flags & DCACHE_OP_COMPARE) {
+                       int tlen = dentry->d_name.len;
+                       const char *tname = dentry->d_name.name;
                        if (parent->d_op->d_compare(parent, parent->d_inode,
                                                dentry, dentry->d_inode,
                                                tlen, tname, name))
                                goto next;
                } else {
-                       if (dentry_cmp(tname, tlen, str, len))
+                       if (dentry->d_name.len != len)
+                               goto next;
+                       if (dentry_cmp(dentry, str, len))
                                goto next;
                }