fs/ocfs2/dcache.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * dcache.c
   4  *
   5  * dentry cache handling code
   6  *
   7  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   8  */
   9
  10 #include <linux/fs.h>
  11 #include <linux/types.h>
  12 #include <linux/slab.h>
  13 #include <linux/namei.h>
  14
  15 #include <cluster/masklog.h>
  16
  17 #include "ocfs2.h"
  18
  19 #include "alloc.h"
  20 #include "dcache.h"
  21 #include "dlmglue.h"
  22 #include "file.h"
  23 #include "inode.h"
  24 #include "ocfs2_trace.h"
  25
  26 void ocfs2_dentry_attach_gen(struct dentry *dentry)
  27 {
  28         unsigned long gen =
  29                 OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
  30         BUG_ON(d_inode(dentry));
  31         dentry->d_fsdata = (void *)gen;
  32 }
  33
  34
  35 static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
  36 {
  37         struct inode *inode;
  38         int ret = 0;    /* if all else fails, just return false */
  39         struct ocfs2_super *osb;
  40
  41         if (flags & LOOKUP_RCU)
  42                 return -ECHILD;
  43
  44         inode = d_inode(dentry);
  45         osb = OCFS2_SB(dentry->d_sb);
  46
  47         trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
  48                                       dentry->d_name.name);
  49
  50         /* For a negative dentry -
  51          * check the generation number of the parent and compare with the
  52          * one stored in the inode.
  53          */
  54         if (inode == NULL) {
  55                 unsigned long gen = (unsigned long) dentry->d_fsdata;
  56                 unsigned long pgen;
  57                 spin_lock(&dentry->d_lock);
  58                 pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
  59                 spin_unlock(&dentry->d_lock);
  60                 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
  61                                                        dentry->d_name.name,
  62                                                        pgen, gen);
  63                 if (gen != pgen)
  64                         goto bail;
  65                 goto valid;
  66         }
  67
  68         BUG_ON(!osb);
  69
  70         if (inode == osb->root_inode || is_bad_inode(inode))
  71                 goto bail;
  72
  73         spin_lock(&OCFS2_I(inode)->ip_lock);
  74         /* did we or someone else delete this inode? */
  75         if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
  76                 spin_unlock(&OCFS2_I(inode)->ip_lock);
  77                 trace_ocfs2_dentry_revalidate_delete(
  78                                 (unsigned long long)OCFS2_I(inode)->ip_blkno);
  79                 goto bail;
  80         }
  81         spin_unlock(&OCFS2_I(inode)->ip_lock);
  82
  83         /*
  84          * We don't need a cluster lock to test this because once an
  85          * inode nlink hits zero, it never goes back.
  86          */
  87         if (inode->i_nlink == 0) {
  88                 trace_ocfs2_dentry_revalidate_orphaned(
  89                         (unsigned long long)OCFS2_I(inode)->ip_blkno,
  90                         S_ISDIR(inode->i_mode));
  91                 goto bail;
  92         }
  93
  94         /*
  95          * If the last lookup failed to create dentry lock, let us
  96          * redo it.
  97          */
  98         if (!dentry->d_fsdata) {
  99                 trace_ocfs2_dentry_revalidate_nofsdata(
 100                                 (unsigned long long)OCFS2_I(inode)->ip_blkno);
 101                 goto bail;
 102         }
 103
 104 valid:
 105         ret = 1;
 106
 107 bail:
 108         trace_ocfs2_dentry_revalidate_ret(ret);
 109         return ret;
 110 }
 111
 112 static int ocfs2_match_dentry(struct dentry *dentry,
 113                               u64 parent_blkno,
 114                               int skip_unhashed)
 115 {
 116         struct inode *parent;
 117
 118         /*
 119          * ocfs2_lookup() does a d_splice_alias() _before_ attaching
 120          * to the lock data, so we skip those here, otherwise
 121          * ocfs2_dentry_attach_lock() will get its original dentry
 122          * back.
 123          */
 124         if (!dentry->d_fsdata)
 125                 return 0;
 126
 127         if (skip_unhashed && d_unhashed(dentry))
 128                 return 0;
 129
 130         parent = d_inode(dentry->d_parent);
 131         /* Name is in a different directory. */
 132         if (OCFS2_I(parent)->ip_blkno != parent_blkno)
 133                 return 0;
 134
 135         return 1;
 136 }
 137
 138 /*
 139  * Walk the inode alias list, and find a dentry which has a given
 140  * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
 141  * is looking for a dentry_lock reference. The downconvert thread is
 142  * looking to unhash aliases, so we allow it to skip any that already
 143  * have that property.
 144  */
 145 struct dentry *ocfs2_find_local_alias(struct inode *inode,
 146                                       u64 parent_blkno,
 147                                       int skip_unhashed)
 148 {
 149         struct dentry *dentry;
 150
 151         spin_lock(&inode->i_lock);
 152         hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 153                 spin_lock(&dentry->d_lock);
 154                 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 155                         trace_ocfs2_find_local_alias(dentry->d_name.len,
 156                                                      dentry->d_name.name);
 157
 158                         dget_dlock(dentry);
 159                         spin_unlock(&dentry->d_lock);
 160                         spin_unlock(&inode->i_lock);
 161                         return dentry;
 162                 }
 163                 spin_unlock(&dentry->d_lock);
 164         }
 165         spin_unlock(&inode->i_lock);
 166         return NULL;
 167 }
 168
 169 DEFINE_SPINLOCK(dentry_attach_lock);
 170
 171 /*
 172  * Attach this dentry to a cluster lock.
 173  *
 174  * Dentry locks cover all links in a given directory to a particular
 175  * inode. We do this so that ocfs2 can build a lock name which all
 176  * nodes in the cluster can agree on at all times. Shoving full names
 177  * in the cluster lock won't work due to size restrictions. Covering
 178  * links inside of a directory is a good compromise because it still
 179  * allows us to use the parent directory lock to synchronize
 180  * operations.
 181  *
 182  * Call this function with the parent dir semaphore and the parent dir
 183  * cluster lock held.
 184  *
 185  * The dir semaphore will protect us from having to worry about
 186  * concurrent processes on our node trying to attach a lock at the
 187  * same time.
 188  *
 189  * The dir cluster lock (held at either PR or EX mode) protects us
 190  * from unlink and rename on other nodes.
 191  *
 192  * A dput() can happen asynchronously due to pruning, so we cover
 193  * attaching and detaching the dentry lock with a
 194  * dentry_attach_lock.
 195  *
 196  * A node which has done lookup on a name retains a protected read
 197  * lock until final dput. If the user requests and unlink or rename,
 198  * the protected read is upgraded to an exclusive lock. Other nodes
 199  * who have seen the dentry will then be informed that they need to
 200  * downgrade their lock, which will involve d_delete on the
 201  * dentry. This happens in ocfs2_dentry_convert_worker().
 202  */
 203 int ocfs2_dentry_attach_lock(struct dentry *dentry,
 204                              struct inode *inode,
 205                              u64 parent_blkno)
 206 {
 207         int ret;
 208         struct dentry *alias;
 209         struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 210
 211         trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name,
 212                                        (unsigned long long)parent_blkno, dl);
 213
 214         /*
 215          * Negative dentry. We ignore these for now.
 216          *
 217          * XXX: Could we can improve ocfs2_dentry_revalidate() by
 218          * tracking these?
 219          */
 220         if (!inode)
 221                 return 0;
 222
 223         if (d_really_is_negative(dentry) && dentry->d_fsdata) {
 224                 /* Converting a negative dentry to positive
 225                    Clear dentry->d_fsdata */
 226                 dentry->d_fsdata = dl = NULL;
 227         }
 228
 229         if (dl) {
 230                 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
 231                                 " \"%pd\": old parent: %llu, new: %llu\n",
 232                                 dentry,
 233                                 (unsigned long long)parent_blkno,
 234                                 (unsigned long long)dl->dl_parent_blkno);
 235                 return 0;
 236         }
 237
 238         alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
 239         if (alias) {
 240                 /*
 241                  * Great, an alias exists, which means we must have a
 242                  * dentry lock already. We can just grab the lock off
 243                  * the alias and add it to the list.
 244                  *
 245                  * We're depending here on the fact that this dentry
 246                  * was found and exists in the dcache and so must have
 247                  * a reference to the dentry_lock because we can't
 248                  * race creates. Final dput() cannot happen on it
 249                  * since we have it pinned, so our reference is safe.
 250                  */
 251                 dl = alias->d_fsdata;
 252                 mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
 253                                 (unsigned long long)parent_blkno,
 254                                 (unsigned long long)OCFS2_I(inode)->ip_blkno);
 255
 256                 mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
 257                                 " \"%pd\": old parent: %llu, new: %llu\n",
 258                                 dentry,
 259                                 (unsigned long long)parent_blkno,
 260                                 (unsigned long long)dl->dl_parent_blkno);
 261
 262                 trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name,
 263                                 (unsigned long long)parent_blkno,
 264                                 (unsigned long long)OCFS2_I(inode)->ip_blkno);
 265
 266                 goto out_attach;
 267         }
 268
 269         /*
 270          * There are no other aliases
 271          */
 272         dl = kmalloc(sizeof(*dl), GFP_NOFS);
 273         if (!dl) {
 274                 ret = -ENOMEM;
 275                 mlog_errno(ret);
 276                 return ret;
 277         }
 278
 279         dl->dl_count = 0;
 280         /*
 281          * Does this have to happen below, for all attaches, in case
 282          * the struct inode gets blown away by the downconvert thread?
 283          */
 284         dl->dl_inode = igrab(inode);
 285         dl->dl_parent_blkno = parent_blkno;
 286         ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
 287
 288 out_attach:
 289         spin_lock(&dentry_attach_lock);
 290         if (unlikely(dentry->d_fsdata && !alias)) {
 291                 /* d_fsdata is set by a racing thread which is doing
 292                  * the same thing as this thread is doing. Leave the racing
 293                  * thread going ahead and we return here.
 294                  */
 295                 spin_unlock(&dentry_attach_lock);
 296                 iput(dl->dl_inode);
 297                 ocfs2_lock_res_free(&dl->dl_lockres);
 298                 kfree(dl);
 299                 return 0;
 300         }
 301
 302         dentry->d_fsdata = dl;
 303         dl->dl_count++;
 304         spin_unlock(&dentry_attach_lock);
 305
 306         /*
 307          * This actually gets us our PRMODE level lock. From now on,
 308          * we'll have a notification if one of these names is
 309          * destroyed on another node.
 310          */
 311         ret = ocfs2_dentry_lock(dentry, 0);
 312         if (!ret)
 313                 ocfs2_dentry_unlock(dentry, 0);
 314         else
 315                 mlog_errno(ret);
 316
 317         /*
 318          * In case of error, manually free the allocation and do the iput().
 319          * We need to do this because error here means no d_instantiate(),
 320          * which means iput() will not be called during dput(dentry).
 321          */
 322         if (ret < 0 && !alias) {
 323                 ocfs2_lock_res_free(&dl->dl_lockres);
 324                 BUG_ON(dl->dl_count != 1);
 325                 spin_lock(&dentry_attach_lock);
 326                 dentry->d_fsdata = NULL;
 327                 spin_unlock(&dentry_attach_lock);
 328                 kfree(dl);
 329                 iput(inode);
 330         }
 331
 332         dput(alias);
 333
 334         return ret;
 335 }
 336
 337 /*
 338  * ocfs2_dentry_iput() and friends.
 339  *
 340  * At this point, our particular dentry is detached from the inodes
 341  * alias list, so there's no way that the locking code can find it.
 342  *
 343  * The interesting stuff happens when we determine that our lock needs
 344  * to go away because this is the last subdir alias in the
 345  * system. This function needs to handle a couple things:
 346  *
 347  * 1) Synchronizing lock shutdown with the downconvert threads. This
 348  *    is already handled for us via the lockres release drop function
 349  *    called in ocfs2_release_dentry_lock()
 350  *
 351  * 2) A race may occur when we're doing our lock shutdown and
 352  *    another process wants to create a new dentry lock. Right now we
 353  *    let them race, which means that for a very short while, this
 354  *    node might have two locks on a lock resource. This should be a
 355  *    problem though because one of them is in the process of being
 356  *    thrown out.
 357  */
 358 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
 359                                    struct ocfs2_dentry_lock *dl)
 360 {
 361         iput(dl->dl_inode);
 362         ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
 363         ocfs2_lock_res_free(&dl->dl_lockres);
 364         kfree(dl);
 365 }
 366
 367 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 368                            struct ocfs2_dentry_lock *dl)
 369 {
 370         int unlock = 0;
 371
 372         BUG_ON(dl->dl_count == 0);
 373
 374         spin_lock(&dentry_attach_lock);
 375         dl->dl_count--;
 376         unlock = !dl->dl_count;
 377         spin_unlock(&dentry_attach_lock);
 378
 379         if (unlock)
 380                 ocfs2_drop_dentry_lock(osb, dl);
 381 }
 382
 383 static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
 384 {
 385         struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 386
 387         if (!dl) {
 388                 /*
 389                  * No dentry lock is ok if we're disconnected or
 390                  * unhashed.
 391                  */
 392                 if (!(dentry->d_flags & DCACHE_DISCONNECTED) &&
 393                     !d_unhashed(dentry)) {
 394                         unsigned long long ino = 0ULL;
 395                         if (inode)
 396                                 ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
 397                         mlog(ML_ERROR, "Dentry is missing cluster lock. "
 398                              "inode: %llu, d_flags: 0x%x, d_name: %pd\n",
 399                              ino, dentry->d_flags, dentry);
 400                 }
 401
 402                 goto out;
 403         }
 404
 405         mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n",
 406                         dentry, dl->dl_count);
 407
 408         ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
 409
 410 out:
 411         iput(inode);
 412 }
 413
 414 /*
 415  * d_move(), but keep the locks in sync.
 416  *
 417  * When we are done, "dentry" will have the parent dir and name of
 418  * "target", which will be thrown away.
 419  *
 420  * We manually update the lock of "dentry" if need be.
 421  *
 422  * "target" doesn't have it's dentry lock touched - we allow the later
 423  * dput() to handle this for us.
 424  *
 425  * This is called during ocfs2_rename(), while holding parent
 426  * directory locks. The dentries have already been deleted on other
 427  * nodes via ocfs2_remote_dentry_delete().
 428  *
 429  * Normally, the VFS handles the d_move() for the file system, after
 430  * the ->rename() callback. OCFS2 wants to handle this internally, so
 431  * the new lock can be created atomically with respect to the cluster.
 432  */
 433 void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 434                        struct inode *old_dir, struct inode *new_dir)
 435 {
 436         int ret;
 437         struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
 438         struct inode *inode = d_inode(dentry);
 439
 440         /*
 441          * Move within the same directory, so the actual lock info won't
 442          * change.
 443          *
 444          * XXX: Is there any advantage to dropping the lock here?
 445          */
 446         if (old_dir == new_dir)
 447                 goto out_move;
 448
 449         ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
 450
 451         dentry->d_fsdata = NULL;
 452         ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
 453         if (ret)
 454                 mlog_errno(ret);
 455
 456 out_move:
 457         d_move(dentry, target);
 458 }
 459
 460 const struct dentry_operations ocfs2_dentry_ops = {
 461         .d_revalidate           = ocfs2_dentry_revalidate,
 462         .d_iput                 = ocfs2_dentry_iput,
 463 };