xfs: teach online scrub to find directory tree structure problems
[linux-2.6-block.git] / fs / xfs / scrub / scrub.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_quota.h"
16 #include "xfs_qm.h"
17 #include "xfs_scrub.h"
18 #include "xfs_buf_mem.h"
19 #include "xfs_rmap.h"
20 #include "xfs_exchrange.h"
21 #include "xfs_exchmaps.h"
22 #include "xfs_dir2.h"
23 #include "xfs_parent.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/trace.h"
27 #include "scrub/repair.h"
28 #include "scrub/health.h"
29 #include "scrub/stats.h"
30 #include "scrub/xfile.h"
31 #include "scrub/tempfile.h"
32 #include "scrub/orphanage.h"
33
34 /*
35  * Online Scrub and Repair
36  *
37  * Traditionally, XFS (the kernel driver) did not know how to check or
38  * repair on-disk data structures.  That task was left to the xfs_check
39  * and xfs_repair tools, both of which require taking the filesystem
40  * offline for a thorough but time consuming examination.  Online
41  * scrub & repair, on the other hand, enables us to check the metadata
42  * for obvious errors while carefully stepping around the filesystem's
43  * ongoing operations, locking rules, etc.
44  *
45  * Given that most XFS metadata consist of records stored in a btree,
46  * most of the checking functions iterate the btree blocks themselves
47  * looking for irregularities.  When a record block is encountered, each
48  * record can be checked for obviously bad values.  Record values can
49  * also be cross-referenced against other btrees to look for potential
50  * misunderstandings between pieces of metadata.
51  *
52  * It is expected that the checkers responsible for per-AG metadata
53  * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
54  * metadata structure, and perform any relevant cross-referencing before
55  * unlocking the AG and returning the results to userspace.  These
56  * scrubbers must not keep an AG locked for too long to avoid tying up
57  * the block and inode allocators.
58  *
59  * Block maps and b-trees rooted in an inode present a special challenge
60  * because they can involve extents from any AG.  The general scrubber
61  * structure of lock -> check -> xref -> unlock still holds, but AG
62  * locking order rules /must/ be obeyed to avoid deadlocks.  The
63  * ordering rule, of course, is that we must lock in increasing AG
64  * order.  Helper functions are provided to track which AG headers we've
65  * already locked.  If we detect an imminent locking order violation, we
66  * can signal a potential deadlock, in which case the scrubber can jump
67  * out to the top level, lock all the AGs in order, and retry the scrub.
68  *
69  * For file data (directories, extended attributes, symlinks) scrub, we
70  * can simply lock the inode and walk the data.  For btree data
71  * (directories and attributes) we follow the same btree-scrubbing
72  * strategy outlined previously to check the records.
73  *
74  * We use a bit of trickery with transactions to avoid buffer deadlocks
75  * if there is a cycle in the metadata.  The basic problem is that
76  * travelling down a btree involves locking the current buffer at each
77  * tree level.  If a pointer should somehow point back to a buffer that
78  * we've already examined, we will deadlock due to the second buffer
79  * locking attempt.  Note however that grabbing a buffer in transaction
80  * context links the locked buffer to the transaction.  If we try to
81  * re-grab the buffer in the context of the same transaction, we avoid
82  * the second lock attempt and continue.  Between the verifier and the
83  * scrubber, something will notice that something is amiss and report
84  * the corruption.  Therefore, each scrubber will allocate an empty
85  * transaction, attach buffers to it, and cancel the transaction at the
86  * end of the scrub run.  Cancelling a non-dirty transaction simply
87  * unlocks the buffers.
88  *
89  * There are four pieces of data that scrub can communicate to
90  * userspace.  The first is the error code (errno), which can be used to
91  * communicate operational errors in performing the scrub.  There are
92  * also three flags that can be set in the scrub context.  If the data
93  * structure itself is corrupt, the CORRUPT flag will be set.  If
94  * the metadata is correct but otherwise suboptimal, the PREEN flag
95  * will be set.
96  *
97  * We perform secondary validation of filesystem metadata by
98  * cross-referencing every record with all other available metadata.
99  * For example, for block mapping extents, we verify that there are no
100  * records in the free space and inode btrees corresponding to that
101  * space extent and that there is a corresponding entry in the reverse
102  * mapping btree.  Inconsistent metadata is noted by setting the
103  * XCORRUPT flag; btree query function errors are noted by setting the
104  * XFAIL flag and deleting the cursor to prevent further attempts to
105  * cross-reference with a defective btree.
106  *
107  * If a piece of metadata proves corrupt or suboptimal, the userspace
108  * program can ask the kernel to apply some tender loving care (TLC) to
109  * the metadata object by setting the REPAIR flag and re-calling the
110  * scrub ioctl.  "Corruption" is defined by metadata violating the
111  * on-disk specification; operations cannot continue if the violation is
112  * left untreated.  It is possible for XFS to continue if an object is
113  * "suboptimal", however performance may be degraded.  Repairs are
114  * usually performed by rebuilding the metadata entirely out of
115  * redundant metadata.  Optimizing, on the other hand, can sometimes be
116  * done without rebuilding entire structures.
117  *
118  * Generally speaking, the repair code has the following code structure:
119  * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
120  * The first check helps us figure out if we need to rebuild or simply
121  * optimize the structure so that the rebuild knows what to do.  The
122  * second check evaluates the completeness of the repair; that is what
123  * is reported to userspace.
124  *
125  * A quick note on symbol prefixes:
126  * - "xfs_" are general XFS symbols.
127  * - "xchk_" are symbols related to metadata checking.
128  * - "xrep_" are symbols related to metadata repair.
129  * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
130  */
131
132 /*
133  * Scrub probe -- userspace uses this to probe if we're willing to scrub
134  * or repair a given mountpoint.  This will be used by xfs_scrub to
135  * probe the kernel's abilities to scrub (and repair) the metadata.  We
136  * do this by validating the ioctl inputs from userspace, preparing the
137  * filesystem for a scrub (or a repair) operation, and immediately
138  * returning to userspace.  Userspace can use the returned errno and
139  * structure state to decide (in broad terms) if scrub/repair are
140  * supported by the running kernel.
141  */
142 static int
143 xchk_probe(
144         struct xfs_scrub        *sc)
145 {
146         int                     error = 0;
147
148         if (xchk_should_terminate(sc, &error))
149                 return error;
150
151         return 0;
152 }
153
154 /* Scrub setup and teardown */
155
156 #define FSGATES_MASK    (XCHK_FSGATES_ALL | XREP_FSGATES_ALL)
157 static inline void
158 xchk_fsgates_disable(
159         struct xfs_scrub        *sc)
160 {
161         if (!(sc->flags & FSGATES_MASK))
162                 return;
163
164         trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK);
165
166         if (sc->flags & XCHK_FSGATES_DRAIN)
167                 xfs_drain_wait_disable();
168
169         if (sc->flags & XCHK_FSGATES_QUOTA)
170                 xfs_dqtrx_hook_disable();
171
172         if (sc->flags & XCHK_FSGATES_DIRENTS)
173                 xfs_dir_hook_disable();
174
175         if (sc->flags & XCHK_FSGATES_RMAP)
176                 xfs_rmap_hook_disable();
177
178         sc->flags &= ~FSGATES_MASK;
179 }
180 #undef FSGATES_MASK
181
182 /* Free the resources associated with a scrub subtype. */
183 void
184 xchk_scrub_free_subord(
185         struct xfs_scrub_subord *sub)
186 {
187         struct xfs_scrub        *sc = sub->parent_sc;
188
189         ASSERT(sc->ip == sub->sc.ip);
190         ASSERT(sc->orphanage == sub->sc.orphanage);
191         ASSERT(sc->tempip == sub->sc.tempip);
192
193         sc->sm->sm_type = sub->old_smtype;
194         sc->sm->sm_flags = sub->old_smflags |
195                                 (sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT);
196         sc->tp = sub->sc.tp;
197
198         if (sub->sc.buf) {
199                 if (sub->sc.buf_cleanup)
200                         sub->sc.buf_cleanup(sub->sc.buf);
201                 kvfree(sub->sc.buf);
202         }
203         if (sub->sc.xmbtp)
204                 xmbuf_free(sub->sc.xmbtp);
205         if (sub->sc.xfile)
206                 xfile_destroy(sub->sc.xfile);
207
208         sc->ilock_flags = sub->sc.ilock_flags;
209         sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags;
210         sc->temp_ilock_flags = sub->sc.temp_ilock_flags;
211
212         kfree(sub);
213 }
214
215 /* Free all the resources and finish the transactions. */
216 STATIC int
217 xchk_teardown(
218         struct xfs_scrub        *sc,
219         int                     error)
220 {
221         xchk_ag_free(sc, &sc->sa);
222         if (sc->tp) {
223                 if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
224                         error = xfs_trans_commit(sc->tp);
225                 else
226                         xfs_trans_cancel(sc->tp);
227                 sc->tp = NULL;
228         }
229         if (sc->ip) {
230                 if (sc->ilock_flags)
231                         xchk_iunlock(sc, sc->ilock_flags);
232                 xchk_irele(sc, sc->ip);
233                 sc->ip = NULL;
234         }
235         if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
236                 sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
237                 mnt_drop_write_file(sc->file);
238         }
239         if (sc->xmbtp) {
240                 xmbuf_free(sc->xmbtp);
241                 sc->xmbtp = NULL;
242         }
243         if (sc->xfile) {
244                 xfile_destroy(sc->xfile);
245                 sc->xfile = NULL;
246         }
247         if (sc->buf) {
248                 if (sc->buf_cleanup)
249                         sc->buf_cleanup(sc->buf);
250                 kvfree(sc->buf);
251                 sc->buf_cleanup = NULL;
252                 sc->buf = NULL;
253         }
254
255         xrep_tempfile_rele(sc);
256         xrep_orphanage_rele(sc);
257         xchk_fsgates_disable(sc);
258         return error;
259 }
260
261 /* Scrubbing dispatch. */
262
263 static const struct xchk_meta_ops meta_scrub_ops[] = {
264         [XFS_SCRUB_TYPE_PROBE] = {      /* ioctl presence test */
265                 .type   = ST_NONE,
266                 .setup  = xchk_setup_fs,
267                 .scrub  = xchk_probe,
268                 .repair = xrep_probe,
269         },
270         [XFS_SCRUB_TYPE_SB] = {         /* superblock */
271                 .type   = ST_PERAG,
272                 .setup  = xchk_setup_agheader,
273                 .scrub  = xchk_superblock,
274                 .repair = xrep_superblock,
275         },
276         [XFS_SCRUB_TYPE_AGF] = {        /* agf */
277                 .type   = ST_PERAG,
278                 .setup  = xchk_setup_agheader,
279                 .scrub  = xchk_agf,
280                 .repair = xrep_agf,
281         },
282         [XFS_SCRUB_TYPE_AGFL]= {        /* agfl */
283                 .type   = ST_PERAG,
284                 .setup  = xchk_setup_agheader,
285                 .scrub  = xchk_agfl,
286                 .repair = xrep_agfl,
287         },
288         [XFS_SCRUB_TYPE_AGI] = {        /* agi */
289                 .type   = ST_PERAG,
290                 .setup  = xchk_setup_agheader,
291                 .scrub  = xchk_agi,
292                 .repair = xrep_agi,
293         },
294         [XFS_SCRUB_TYPE_BNOBT] = {      /* bnobt */
295                 .type   = ST_PERAG,
296                 .setup  = xchk_setup_ag_allocbt,
297                 .scrub  = xchk_allocbt,
298                 .repair = xrep_allocbt,
299                 .repair_eval = xrep_revalidate_allocbt,
300         },
301         [XFS_SCRUB_TYPE_CNTBT] = {      /* cntbt */
302                 .type   = ST_PERAG,
303                 .setup  = xchk_setup_ag_allocbt,
304                 .scrub  = xchk_allocbt,
305                 .repair = xrep_allocbt,
306                 .repair_eval = xrep_revalidate_allocbt,
307         },
308         [XFS_SCRUB_TYPE_INOBT] = {      /* inobt */
309                 .type   = ST_PERAG,
310                 .setup  = xchk_setup_ag_iallocbt,
311                 .scrub  = xchk_iallocbt,
312                 .repair = xrep_iallocbt,
313                 .repair_eval = xrep_revalidate_iallocbt,
314         },
315         [XFS_SCRUB_TYPE_FINOBT] = {     /* finobt */
316                 .type   = ST_PERAG,
317                 .setup  = xchk_setup_ag_iallocbt,
318                 .scrub  = xchk_iallocbt,
319                 .has    = xfs_has_finobt,
320                 .repair = xrep_iallocbt,
321                 .repair_eval = xrep_revalidate_iallocbt,
322         },
323         [XFS_SCRUB_TYPE_RMAPBT] = {     /* rmapbt */
324                 .type   = ST_PERAG,
325                 .setup  = xchk_setup_ag_rmapbt,
326                 .scrub  = xchk_rmapbt,
327                 .has    = xfs_has_rmapbt,
328                 .repair = xrep_rmapbt,
329         },
330         [XFS_SCRUB_TYPE_REFCNTBT] = {   /* refcountbt */
331                 .type   = ST_PERAG,
332                 .setup  = xchk_setup_ag_refcountbt,
333                 .scrub  = xchk_refcountbt,
334                 .has    = xfs_has_reflink,
335                 .repair = xrep_refcountbt,
336         },
337         [XFS_SCRUB_TYPE_INODE] = {      /* inode record */
338                 .type   = ST_INODE,
339                 .setup  = xchk_setup_inode,
340                 .scrub  = xchk_inode,
341                 .repair = xrep_inode,
342         },
343         [XFS_SCRUB_TYPE_BMBTD] = {      /* inode data fork */
344                 .type   = ST_INODE,
345                 .setup  = xchk_setup_inode_bmap,
346                 .scrub  = xchk_bmap_data,
347                 .repair = xrep_bmap_data,
348         },
349         [XFS_SCRUB_TYPE_BMBTA] = {      /* inode attr fork */
350                 .type   = ST_INODE,
351                 .setup  = xchk_setup_inode_bmap,
352                 .scrub  = xchk_bmap_attr,
353                 .repair = xrep_bmap_attr,
354         },
355         [XFS_SCRUB_TYPE_BMBTC] = {      /* inode CoW fork */
356                 .type   = ST_INODE,
357                 .setup  = xchk_setup_inode_bmap,
358                 .scrub  = xchk_bmap_cow,
359                 .repair = xrep_bmap_cow,
360         },
361         [XFS_SCRUB_TYPE_DIR] = {        /* directory */
362                 .type   = ST_INODE,
363                 .setup  = xchk_setup_directory,
364                 .scrub  = xchk_directory,
365                 .repair = xrep_directory,
366         },
367         [XFS_SCRUB_TYPE_XATTR] = {      /* extended attributes */
368                 .type   = ST_INODE,
369                 .setup  = xchk_setup_xattr,
370                 .scrub  = xchk_xattr,
371                 .repair = xrep_xattr,
372         },
373         [XFS_SCRUB_TYPE_SYMLINK] = {    /* symbolic link */
374                 .type   = ST_INODE,
375                 .setup  = xchk_setup_symlink,
376                 .scrub  = xchk_symlink,
377                 .repair = xrep_symlink,
378         },
379         [XFS_SCRUB_TYPE_PARENT] = {     /* parent pointers */
380                 .type   = ST_INODE,
381                 .setup  = xchk_setup_parent,
382                 .scrub  = xchk_parent,
383                 .repair = xrep_parent,
384         },
385         [XFS_SCRUB_TYPE_RTBITMAP] = {   /* realtime bitmap */
386                 .type   = ST_FS,
387                 .setup  = xchk_setup_rtbitmap,
388                 .scrub  = xchk_rtbitmap,
389                 .repair = xrep_rtbitmap,
390         },
391         [XFS_SCRUB_TYPE_RTSUM] = {      /* realtime summary */
392                 .type   = ST_FS,
393                 .setup  = xchk_setup_rtsummary,
394                 .scrub  = xchk_rtsummary,
395                 .repair = xrep_rtsummary,
396         },
397         [XFS_SCRUB_TYPE_UQUOTA] = {     /* user quota */
398                 .type   = ST_FS,
399                 .setup  = xchk_setup_quota,
400                 .scrub  = xchk_quota,
401                 .repair = xrep_quota,
402         },
403         [XFS_SCRUB_TYPE_GQUOTA] = {     /* group quota */
404                 .type   = ST_FS,
405                 .setup  = xchk_setup_quota,
406                 .scrub  = xchk_quota,
407                 .repair = xrep_quota,
408         },
409         [XFS_SCRUB_TYPE_PQUOTA] = {     /* project quota */
410                 .type   = ST_FS,
411                 .setup  = xchk_setup_quota,
412                 .scrub  = xchk_quota,
413                 .repair = xrep_quota,
414         },
415         [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
416                 .type   = ST_FS,
417                 .setup  = xchk_setup_fscounters,
418                 .scrub  = xchk_fscounters,
419                 .repair = xrep_fscounters,
420         },
421         [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
422                 .type   = ST_FS,
423                 .setup  = xchk_setup_quotacheck,
424                 .scrub  = xchk_quotacheck,
425                 .repair = xrep_quotacheck,
426         },
427         [XFS_SCRUB_TYPE_NLINKS] = {     /* inode link counts */
428                 .type   = ST_FS,
429                 .setup  = xchk_setup_nlinks,
430                 .scrub  = xchk_nlinks,
431                 .repair = xrep_nlinks,
432         },
433         [XFS_SCRUB_TYPE_HEALTHY] = {    /* fs healthy; clean all reminders */
434                 .type   = ST_FS,
435                 .setup  = xchk_setup_fs,
436                 .scrub  = xchk_health_record,
437                 .repair = xrep_notsupported,
438         },
439         [XFS_SCRUB_TYPE_DIRTREE] = {    /* directory tree structure */
440                 .type   = ST_INODE,
441                 .setup  = xchk_setup_dirtree,
442                 .scrub  = xchk_dirtree,
443                 .has    = xfs_has_parent,
444                 .repair = xrep_notsupported,
445         },
446 };
447
448 static int
449 xchk_validate_inputs(
450         struct xfs_mount                *mp,
451         struct xfs_scrub_metadata       *sm)
452 {
453         int                             error;
454         const struct xchk_meta_ops      *ops;
455
456         error = -EINVAL;
457         /* Check our inputs. */
458         sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
459         if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
460                 goto out;
461         /* sm_reserved[] must be zero */
462         if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
463                 goto out;
464
465         error = -ENOENT;
466         /* Do we know about this type of metadata? */
467         if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
468                 goto out;
469         ops = &meta_scrub_ops[sm->sm_type];
470         if (ops->setup == NULL || ops->scrub == NULL)
471                 goto out;
472         /* Does this fs even support this type of metadata? */
473         if (ops->has && !ops->has(mp))
474                 goto out;
475
476         error = -EINVAL;
477         /* restricting fields must be appropriate for type */
478         switch (ops->type) {
479         case ST_NONE:
480         case ST_FS:
481                 if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
482                         goto out;
483                 break;
484         case ST_PERAG:
485                 if (sm->sm_ino || sm->sm_gen ||
486                     sm->sm_agno >= mp->m_sb.sb_agcount)
487                         goto out;
488                 break;
489         case ST_INODE:
490                 if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
491                         goto out;
492                 break;
493         default:
494                 goto out;
495         }
496
497         /* No rebuild without repair. */
498         if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) &&
499             !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
500                 return -EINVAL;
501
502         /*
503          * We only want to repair read-write v5+ filesystems.  Defer the check
504          * for ops->repair until after our scrub confirms that we need to
505          * perform repairs so that we avoid failing due to not supporting
506          * repairing an object that doesn't need repairs.
507          */
508         if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
509                 error = -EOPNOTSUPP;
510                 if (!xfs_has_crc(mp))
511                         goto out;
512
513                 error = -EROFS;
514                 if (xfs_is_readonly(mp))
515                         goto out;
516         }
517
518         error = 0;
519 out:
520         return error;
521 }
522
523 #ifdef CONFIG_XFS_ONLINE_REPAIR
524 static inline void xchk_postmortem(struct xfs_scrub *sc)
525 {
526         /*
527          * Userspace asked us to repair something, we repaired it, rescanned
528          * it, and the rescan says it's still broken.  Scream about this in
529          * the system logs.
530          */
531         if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
532             (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
533                                  XFS_SCRUB_OFLAG_XCORRUPT)))
534                 xrep_failure(sc->mp);
535 }
536 #else
537 static inline void xchk_postmortem(struct xfs_scrub *sc)
538 {
539         /*
540          * Userspace asked us to scrub something, it's broken, and we have no
541          * way of fixing it.  Scream in the logs.
542          */
543         if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
544                                 XFS_SCRUB_OFLAG_XCORRUPT))
545                 xfs_alert_ratelimited(sc->mp,
546                                 "Corruption detected during scrub.");
547 }
548 #endif /* CONFIG_XFS_ONLINE_REPAIR */
549
550 /*
551  * Create a new scrub context from an existing one, but with a different scrub
552  * type.
553  */
554 struct xfs_scrub_subord *
555 xchk_scrub_create_subord(
556         struct xfs_scrub        *sc,
557         unsigned int            subtype)
558 {
559         struct xfs_scrub_subord *sub;
560
561         sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS);
562         if (!sub)
563                 return ERR_PTR(-ENOMEM);
564
565         sub->old_smtype = sc->sm->sm_type;
566         sub->old_smflags = sc->sm->sm_flags;
567         sub->parent_sc = sc;
568         memcpy(&sub->sc, sc, sizeof(struct xfs_scrub));
569         sub->sc.ops = &meta_scrub_ops[subtype];
570         sub->sc.sm->sm_type = subtype;
571         sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
572         sub->sc.buf = NULL;
573         sub->sc.buf_cleanup = NULL;
574         sub->sc.xfile = NULL;
575         sub->sc.xmbtp = NULL;
576
577         return sub;
578 }
579
580 /* Dispatch metadata scrubbing. */
581 int
582 xfs_scrub_metadata(
583         struct file                     *file,
584         struct xfs_scrub_metadata       *sm)
585 {
586         struct xchk_stats_run           run = { };
587         struct xfs_scrub                *sc;
588         struct xfs_mount                *mp = XFS_I(file_inode(file))->i_mount;
589         u64                             check_start;
590         int                             error = 0;
591
592         BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
593                 (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
594
595         trace_xchk_start(XFS_I(file_inode(file)), sm, error);
596
597         /* Forbidden if we are shut down or mounted norecovery. */
598         error = -ESHUTDOWN;
599         if (xfs_is_shutdown(mp))
600                 goto out;
601         error = -ENOTRECOVERABLE;
602         if (xfs_has_norecovery(mp))
603                 goto out;
604
605         error = xchk_validate_inputs(mp, sm);
606         if (error)
607                 goto out;
608
609         xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
610  "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
611
612         sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
613         if (!sc) {
614                 error = -ENOMEM;
615                 goto out;
616         }
617
618         sc->mp = mp;
619         sc->file = file;
620         sc->sm = sm;
621         sc->ops = &meta_scrub_ops[sm->sm_type];
622         sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
623 retry_op:
624         /*
625          * When repairs are allowed, prevent freezing or readonly remount while
626          * scrub is running with a real transaction.
627          */
628         if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
629                 error = mnt_want_write_file(sc->file);
630                 if (error)
631                         goto out_sc;
632
633                 sc->flags |= XCHK_HAVE_FREEZE_PROT;
634         }
635
636         /* Set up for the operation. */
637         error = sc->ops->setup(sc);
638         if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
639                 goto try_harder;
640         if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
641                 goto need_drain;
642         if (error)
643                 goto out_teardown;
644
645         /* Scrub for errors. */
646         check_start = xchk_stats_now();
647         if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL)
648                 error = sc->ops->repair_eval(sc);
649         else
650                 error = sc->ops->scrub(sc);
651         run.scrub_ns += xchk_stats_elapsed_ns(check_start);
652         if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
653                 goto try_harder;
654         if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
655                 goto need_drain;
656         if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
657                 goto out_teardown;
658
659         xchk_update_health(sc);
660
661         if (xchk_could_repair(sc)) {
662                 /*
663                  * If userspace asked for a repair but it wasn't necessary,
664                  * report that back to userspace.
665                  */
666                 if (!xrep_will_attempt(sc)) {
667                         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
668                         goto out_nofix;
669                 }
670
671                 /*
672                  * If it's broken, userspace wants us to fix it, and we haven't
673                  * already tried to fix it, then attempt a repair.
674                  */
675                 error = xrep_attempt(sc, &run);
676                 if (error == -EAGAIN) {
677                         /*
678                          * Either the repair function succeeded or it couldn't
679                          * get all the resources it needs; either way, we go
680                          * back to the beginning and call the scrub function.
681                          */
682                         error = xchk_teardown(sc, 0);
683                         if (error) {
684                                 xrep_failure(mp);
685                                 goto out_sc;
686                         }
687                         goto retry_op;
688                 }
689         }
690
691 out_nofix:
692         xchk_postmortem(sc);
693 out_teardown:
694         error = xchk_teardown(sc, error);
695 out_sc:
696         if (error != -ENOENT)
697                 xchk_stats_merge(mp, sm, &run);
698         kfree(sc);
699 out:
700         trace_xchk_done(XFS_I(file_inode(file)), sm, error);
701         if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
702                 sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
703                 error = 0;
704         }
705         return error;
706 need_drain:
707         error = xchk_teardown(sc, 0);
708         if (error)
709                 goto out_sc;
710         sc->flags |= XCHK_NEED_DRAIN;
711         run.retries++;
712         goto retry_op;
713 try_harder:
714         /*
715          * Scrubbers return -EDEADLOCK to mean 'try harder'.  Tear down
716          * everything we hold, then set up again with preparation for
717          * worst-case scenarios.
718          */
719         error = xchk_teardown(sc, 0);
720         if (error)
721                 goto out_sc;
722         sc->flags |= XCHK_TRY_HARDER;
723         run.retries++;
724         goto retry_op;
725 }