fs/xfs/xfs_refcount_item.c

   1 /*
   2  * Copyright (C) 2016 Oracle.  All Rights Reserved.
   3  *
   4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
   5  *
   6  * This program is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU General Public License
   8  * as published by the Free Software Foundation; either version 2
   9  * of the License, or (at your option) any later version.
  10  *
  11  * This program is distributed in the hope that it would be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with this program; if not, write the Free Software Foundation,
  18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
  19  */
  20 #include "xfs.h"
  21 #include "xfs_fs.h"
  22 #include "xfs_format.h"
  23 #include "xfs_log_format.h"
  24 #include "xfs_trans_resv.h"
  25 #include "xfs_bit.h"
  26 #include "xfs_shared.h"
  27 #include "xfs_mount.h"
  28 #include "xfs_defer.h"
  29 #include "xfs_trans.h"
  30 #include "xfs_trans_priv.h"
  31 #include "xfs_buf_item.h"
  32 #include "xfs_refcount_item.h"
  33 #include "xfs_log.h"
  34 #include "xfs_refcount.h"
  35
  36
  37 kmem_zone_t     *xfs_cui_zone;
  38 kmem_zone_t     *xfs_cud_zone;
  39
  40 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
  41 {
  42         return container_of(lip, struct xfs_cui_log_item, cui_item);
  43 }
  44
  45 void
  46 xfs_cui_item_free(
  47         struct xfs_cui_log_item *cuip)
  48 {
  49         if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
  50                 kmem_free(cuip);
  51         else
  52                 kmem_zone_free(xfs_cui_zone, cuip);
  53 }
  54
  55 /*
  56  * Freeing the CUI requires that we remove it from the AIL if it has already
  57  * been placed there. However, the CUI may not yet have been placed in the AIL
  58  * when called by xfs_cui_release() from CUD processing due to the ordering of
  59  * committed vs unpin operations in bulk insert operations. Hence the reference
  60  * count to ensure only the last caller frees the CUI.
  61  */
  62 void
  63 xfs_cui_release(
  64         struct xfs_cui_log_item *cuip)
  65 {
  66         ASSERT(atomic_read(&cuip->cui_refcount) > 0);
  67         if (atomic_dec_and_test(&cuip->cui_refcount)) {
  68                 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
  69                 xfs_cui_item_free(cuip);
  70         }
  71 }
  72
  73
  74 STATIC void
  75 xfs_cui_item_size(
  76         struct xfs_log_item     *lip,
  77         int                     *nvecs,
  78         int                     *nbytes)
  79 {
  80         struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
  81
  82         *nvecs += 1;
  83         *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
  84 }
  85
  86 /*
  87  * This is called to fill in the vector of log iovecs for the
  88  * given cui log item. We use only 1 iovec, and we point that
  89  * at the cui_log_format structure embedded in the cui item.
  90  * It is at this point that we assert that all of the extent
  91  * slots in the cui item have been filled.
  92  */
  93 STATIC void
  94 xfs_cui_item_format(
  95         struct xfs_log_item     *lip,
  96         struct xfs_log_vec      *lv)
  97 {
  98         struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
  99         struct xfs_log_iovec    *vecp = NULL;
 100
 101         ASSERT(atomic_read(&cuip->cui_next_extent) ==
 102                         cuip->cui_format.cui_nextents);
 103
 104         cuip->cui_format.cui_type = XFS_LI_CUI;
 105         cuip->cui_format.cui_size = 1;
 106
 107         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
 108                         xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
 109 }
 110
 111 /*
 112  * Pinning has no meaning for an cui item, so just return.
 113  */
 114 STATIC void
 115 xfs_cui_item_pin(
 116         struct xfs_log_item     *lip)
 117 {
 118 }
 119
 120 /*
 121  * The unpin operation is the last place an CUI is manipulated in the log. It is
 122  * either inserted in the AIL or aborted in the event of a log I/O error. In
 123  * either case, the CUI transaction has been successfully committed to make it
 124  * this far. Therefore, we expect whoever committed the CUI to either construct
 125  * and commit the CUD or drop the CUD's reference in the event of error. Simply
 126  * drop the log's CUI reference now that the log is done with it.
 127  */
 128 STATIC void
 129 xfs_cui_item_unpin(
 130         struct xfs_log_item     *lip,
 131         int                     remove)
 132 {
 133         struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
 134
 135         xfs_cui_release(cuip);
 136 }
 137
 138 /*
 139  * CUI items have no locking or pushing.  However, since CUIs are pulled from
 140  * the AIL when their corresponding CUDs are committed to disk, their situation
 141  * is very similar to being pinned.  Return XFS_ITEM_PINNED so that the caller
 142  * will eventually flush the log.  This should help in getting the CUI out of
 143  * the AIL.
 144  */
 145 STATIC uint
 146 xfs_cui_item_push(
 147         struct xfs_log_item     *lip,
 148         struct list_head        *buffer_list)
 149 {
 150         return XFS_ITEM_PINNED;
 151 }
 152
 153 /*
 154  * The CUI has been either committed or aborted if the transaction has been
 155  * cancelled. If the transaction was cancelled, an CUD isn't going to be
 156  * constructed and thus we free the CUI here directly.
 157  */
 158 STATIC void
 159 xfs_cui_item_unlock(
 160         struct xfs_log_item     *lip)
 161 {
 162         if (lip->li_flags & XFS_LI_ABORTED)
 163                 xfs_cui_release(CUI_ITEM(lip));
 164 }
 165
 166 /*
 167  * The CUI is logged only once and cannot be moved in the log, so simply return
 168  * the lsn at which it's been logged.
 169  */
 170 STATIC xfs_lsn_t
 171 xfs_cui_item_committed(
 172         struct xfs_log_item     *lip,
 173         xfs_lsn_t               lsn)
 174 {
 175         return lsn;
 176 }
 177
 178 /*
 179  * The CUI dependency tracking op doesn't do squat.  It can't because
 180  * it doesn't know where the free extent is coming from.  The dependency
 181  * tracking has to be handled by the "enclosing" metadata object.  For
 182  * example, for inodes, the inode is locked throughout the extent freeing
 183  * so the dependency should be recorded there.
 184  */
 185 STATIC void
 186 xfs_cui_item_committing(
 187         struct xfs_log_item     *lip,
 188         xfs_lsn_t               lsn)
 189 {
 190 }
 191
 192 /*
 193  * This is the ops vector shared by all cui log items.
 194  */
 195 static const struct xfs_item_ops xfs_cui_item_ops = {
 196         .iop_size       = xfs_cui_item_size,
 197         .iop_format     = xfs_cui_item_format,
 198         .iop_pin        = xfs_cui_item_pin,
 199         .iop_unpin      = xfs_cui_item_unpin,
 200         .iop_unlock     = xfs_cui_item_unlock,
 201         .iop_committed  = xfs_cui_item_committed,
 202         .iop_push       = xfs_cui_item_push,
 203         .iop_committing = xfs_cui_item_committing,
 204 };
 205
 206 /*
 207  * Allocate and initialize an cui item with the given number of extents.
 208  */
 209 struct xfs_cui_log_item *
 210 xfs_cui_init(
 211         struct xfs_mount                *mp,
 212         uint                            nextents)
 213
 214 {
 215         struct xfs_cui_log_item         *cuip;
 216
 217         ASSERT(nextents > 0);
 218         if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
 219                 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
 220                                 KM_SLEEP);
 221         else
 222                 cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
 223
 224         xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
 225         cuip->cui_format.cui_nextents = nextents;
 226         cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
 227         atomic_set(&cuip->cui_next_extent, 0);
 228         atomic_set(&cuip->cui_refcount, 2);
 229
 230         return cuip;
 231 }
 232
 233 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
 234 {
 235         return container_of(lip, struct xfs_cud_log_item, cud_item);
 236 }
 237
 238 STATIC void
 239 xfs_cud_item_size(
 240         struct xfs_log_item     *lip,
 241         int                     *nvecs,
 242         int                     *nbytes)
 243 {
 244         *nvecs += 1;
 245         *nbytes += sizeof(struct xfs_cud_log_format);
 246 }
 247
 248 /*
 249  * This is called to fill in the vector of log iovecs for the
 250  * given cud log item. We use only 1 iovec, and we point that
 251  * at the cud_log_format structure embedded in the cud item.
 252  * It is at this point that we assert that all of the extent
 253  * slots in the cud item have been filled.
 254  */
 255 STATIC void
 256 xfs_cud_item_format(
 257         struct xfs_log_item     *lip,
 258         struct xfs_log_vec      *lv)
 259 {
 260         struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
 261         struct xfs_log_iovec    *vecp = NULL;
 262
 263         cudp->cud_format.cud_type = XFS_LI_CUD;
 264         cudp->cud_format.cud_size = 1;
 265
 266         xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
 267                         sizeof(struct xfs_cud_log_format));
 268 }
 269
 270 /*
 271  * Pinning has no meaning for an cud item, so just return.
 272  */
 273 STATIC void
 274 xfs_cud_item_pin(
 275         struct xfs_log_item     *lip)
 276 {
 277 }
 278
 279 /*
 280  * Since pinning has no meaning for an cud item, unpinning does
 281  * not either.
 282  */
 283 STATIC void
 284 xfs_cud_item_unpin(
 285         struct xfs_log_item     *lip,
 286         int                     remove)
 287 {
 288 }
 289
 290 /*
 291  * There isn't much you can do to push on an cud item.  It is simply stuck
 292  * waiting for the log to be flushed to disk.
 293  */
 294 STATIC uint
 295 xfs_cud_item_push(
 296         struct xfs_log_item     *lip,
 297         struct list_head        *buffer_list)
 298 {
 299         return XFS_ITEM_PINNED;
 300 }
 301
 302 /*
 303  * The CUD is either committed or aborted if the transaction is cancelled. If
 304  * the transaction is cancelled, drop our reference to the CUI and free the
 305  * CUD.
 306  */
 307 STATIC void
 308 xfs_cud_item_unlock(
 309         struct xfs_log_item     *lip)
 310 {
 311         struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
 312
 313         if (lip->li_flags & XFS_LI_ABORTED) {
 314                 xfs_cui_release(cudp->cud_cuip);
 315                 kmem_zone_free(xfs_cud_zone, cudp);
 316         }
 317 }
 318
 319 /*
 320  * When the cud item is committed to disk, all we need to do is delete our
 321  * reference to our partner cui item and then free ourselves. Since we're
 322  * freeing ourselves we must return -1 to keep the transaction code from
 323  * further referencing this item.
 324  */
 325 STATIC xfs_lsn_t
 326 xfs_cud_item_committed(
 327         struct xfs_log_item     *lip,
 328         xfs_lsn_t               lsn)
 329 {
 330         struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
 331
 332         /*
 333          * Drop the CUI reference regardless of whether the CUD has been
 334          * aborted. Once the CUD transaction is constructed, it is the sole
 335          * responsibility of the CUD to release the CUI (even if the CUI is
 336          * aborted due to log I/O error).
 337          */
 338         xfs_cui_release(cudp->cud_cuip);
 339         kmem_zone_free(xfs_cud_zone, cudp);
 340
 341         return (xfs_lsn_t)-1;
 342 }
 343
 344 /*
 345  * The CUD dependency tracking op doesn't do squat.  It can't because
 346  * it doesn't know where the free extent is coming from.  The dependency
 347  * tracking has to be handled by the "enclosing" metadata object.  For
 348  * example, for inodes, the inode is locked throughout the extent freeing
 349  * so the dependency should be recorded there.
 350  */
 351 STATIC void
 352 xfs_cud_item_committing(
 353         struct xfs_log_item     *lip,
 354         xfs_lsn_t               lsn)
 355 {
 356 }
 357
 358 /*
 359  * This is the ops vector shared by all cud log items.
 360  */
 361 static const struct xfs_item_ops xfs_cud_item_ops = {
 362         .iop_size       = xfs_cud_item_size,
 363         .iop_format     = xfs_cud_item_format,
 364         .iop_pin        = xfs_cud_item_pin,
 365         .iop_unpin      = xfs_cud_item_unpin,
 366         .iop_unlock     = xfs_cud_item_unlock,
 367         .iop_committed  = xfs_cud_item_committed,
 368         .iop_push       = xfs_cud_item_push,
 369         .iop_committing = xfs_cud_item_committing,
 370 };
 371
 372 /*
 373  * Allocate and initialize an cud item with the given number of extents.
 374  */
 375 struct xfs_cud_log_item *
 376 xfs_cud_init(
 377         struct xfs_mount                *mp,
 378         struct xfs_cui_log_item         *cuip)
 379
 380 {
 381         struct xfs_cud_log_item *cudp;
 382
 383         cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
 384         xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
 385         cudp->cud_cuip = cuip;
 386         cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
 387
 388         return cudp;
 389 }
 390
 391 /*
 392  * Process a refcount update intent item that was recovered from the log.
 393  * We need to update the refcountbt.
 394  */
 395 int
 396 xfs_cui_recover(
 397         struct xfs_mount                *mp,
 398         struct xfs_cui_log_item         *cuip,
 399         struct xfs_defer_ops            *dfops)
 400 {
 401         int                             i;
 402         int                             error = 0;
 403         unsigned int                    refc_type;
 404         struct xfs_phys_extent          *refc;
 405         xfs_fsblock_t                   startblock_fsb;
 406         bool                            op_ok;
 407         struct xfs_cud_log_item         *cudp;
 408         struct xfs_trans                *tp;
 409         struct xfs_btree_cur            *rcur = NULL;
 410         enum xfs_refcount_intent_type   type;
 411         xfs_fsblock_t                   new_fsb;
 412         xfs_extlen_t                    new_len;
 413         struct xfs_bmbt_irec            irec;
 414         bool                            requeue_only = false;
 415
 416         ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
 417
 418         /*
 419          * First check the validity of the extents described by the
 420          * CUI.  If any are bad, then assume that all are bad and
 421          * just toss the CUI.
 422          */
 423         for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
 424                 refc = &cuip->cui_format.cui_extents[i];
 425                 startblock_fsb = XFS_BB_TO_FSB(mp,
 426                                    XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
 427                 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
 428                 case XFS_REFCOUNT_INCREASE:
 429                 case XFS_REFCOUNT_DECREASE:
 430                 case XFS_REFCOUNT_ALLOC_COW:
 431                 case XFS_REFCOUNT_FREE_COW:
 432                         op_ok = true;
 433                         break;
 434                 default:
 435                         op_ok = false;
 436                         break;
 437                 }
 438                 if (!op_ok || startblock_fsb == 0 ||
 439                     refc->pe_len == 0 ||
 440                     startblock_fsb >= mp->m_sb.sb_dblocks ||
 441                     refc->pe_len >= mp->m_sb.sb_agblocks ||
 442                     (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
 443                         /*
 444                          * This will pull the CUI from the AIL and
 445                          * free the memory associated with it.
 446                          */
 447                         set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
 448                         xfs_cui_release(cuip);
 449                         return -EIO;
 450                 }
 451         }
 452
 453         /*
 454          * Under normal operation, refcount updates are deferred, so we
 455          * wouldn't be adding them directly to a transaction.  All
 456          * refcount updates manage reservation usage internally and
 457          * dynamically by deferring work that won't fit in the
 458          * transaction.  Normally, any work that needs to be deferred
 459          * gets attached to the same defer_ops that scheduled the
 460          * refcount update.  However, we're in log recovery here, so we
 461          * we use the passed in defer_ops and to finish up any work that
 462          * doesn't fit.  We need to reserve enough blocks to handle a
 463          * full btree split on either end of the refcount range.
 464          */
 465         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
 466                         mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
 467         if (error)
 468                 return error;
 469         cudp = xfs_trans_get_cud(tp, cuip);
 470
 471         for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
 472                 refc = &cuip->cui_format.cui_extents[i];
 473                 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
 474                 switch (refc_type) {
 475                 case XFS_REFCOUNT_INCREASE:
 476                 case XFS_REFCOUNT_DECREASE:
 477                 case XFS_REFCOUNT_ALLOC_COW:
 478                 case XFS_REFCOUNT_FREE_COW:
 479                         type = refc_type;
 480                         break;
 481                 default:
 482                         error = -EFSCORRUPTED;
 483                         goto abort_error;
 484                 }
 485                 if (requeue_only) {
 486                         new_fsb = refc->pe_startblock;
 487                         new_len = refc->pe_len;
 488                 } else
 489                         error = xfs_trans_log_finish_refcount_update(tp, cudp,
 490                                 dfops, type, refc->pe_startblock, refc->pe_len,
 491                                 &new_fsb, &new_len, &rcur);
 492                 if (error)
 493                         goto abort_error;
 494
 495                 /* Requeue what we didn't finish. */
 496                 if (new_len > 0) {
 497                         irec.br_startblock = new_fsb;
 498                         irec.br_blockcount = new_len;
 499                         switch (type) {
 500                         case XFS_REFCOUNT_INCREASE:
 501                                 error = xfs_refcount_increase_extent(
 502                                                 tp->t_mountp, dfops, &irec);
 503                                 break;
 504                         case XFS_REFCOUNT_DECREASE:
 505                                 error = xfs_refcount_decrease_extent(
 506                                                 tp->t_mountp, dfops, &irec);
 507                                 break;
 508                         case XFS_REFCOUNT_ALLOC_COW:
 509                                 error = xfs_refcount_alloc_cow_extent(
 510                                                 tp->t_mountp, dfops,
 511                                                 irec.br_startblock,
 512                                                 irec.br_blockcount);
 513                                 break;
 514                         case XFS_REFCOUNT_FREE_COW:
 515                                 error = xfs_refcount_free_cow_extent(
 516                                                 tp->t_mountp, dfops,
 517                                                 irec.br_startblock,
 518                                                 irec.br_blockcount);
 519                                 break;
 520                         default:
 521                                 ASSERT(0);
 522                         }
 523                         if (error)
 524                                 goto abort_error;
 525                         requeue_only = true;
 526                 }
 527         }
 528
 529         xfs_refcount_finish_one_cleanup(tp, rcur, error);
 530         set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
 531         error = xfs_trans_commit(tp);
 532         return error;
 533
 534 abort_error:
 535         xfs_refcount_finish_one_cleanup(tp, rcur, error);
 536         xfs_trans_cancel(tp);
 537         return error;
 538 }