xfs: use dontcache for grabbing inodes during scrub
[linux-2.6-block.git] / fs / xfs / scrub / scrub.h
CommitLineData
739a2fe0 1// SPDX-License-Identifier: GPL-2.0-or-later
36fd6e86 2/*
ecc73f8a 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
739a2fe0 4 * Author: Darrick J. Wong <djwong@kernel.org>
36fd6e86
DW
5 */
6#ifndef __XFS_SCRUB_SCRUB_H__
7#define __XFS_SCRUB_SCRUB_H__
8
1d8a748a 9struct xfs_scrub;
a5637186 10
271557de
DW
11struct xchk_relax {
12 unsigned long next_resched;
13 unsigned int resched_nr;
14 bool interruptible;
15};
16
17/* Yield to the scheduler at most 10x per second. */
18#define XCHK_RELAX_NEXT (jiffies + (HZ / 10))
19
20#define INIT_XCHK_RELAX \
21 (struct xchk_relax){ \
22 .next_resched = XCHK_RELAX_NEXT, \
23 .resched_nr = 0, \
24 .interruptible = true, \
25 }
26
27/*
28 * Relax during a scrub operation and exit if there's a fatal signal pending.
29 *
30 * If preemption is disabled, we need to yield to the scheduler every now and
31 * then so that we don't run afoul of the soft lockup watchdog or RCU stall
32 * detector. cond_resched calls are somewhat expensive (~5ns) so we want to
33 * ratelimit this to 10x per second. Amortize the cost of the other checks by
34 * only doing it once every 100 calls.
35 */
36static inline int xchk_maybe_relax(struct xchk_relax *widget)
37{
38 /* Amortize the cost of scheduling and checking signals. */
39 if (likely(++widget->resched_nr < 100))
40 return 0;
41 widget->resched_nr = 0;
42
43 if (unlikely(widget->next_resched <= jiffies)) {
44 cond_resched();
45 widget->next_resched = XCHK_RELAX_NEXT;
46 }
47
48 if (widget->interruptible && fatal_signal_pending(current))
49 return -EINTR;
50
51 return 0;
52}
53
48ff4045
DW
54/*
55 * Standard flags for allocating memory within scrub. NOFS context is
56 * configured by the process allocation scope. Scrub and repair must be able
57 * to back out gracefully if there isn't enough memory. Force-cast to avoid
58 * complaints from static checkers.
59 */
60#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
61 __GFP_RETRY_MAYFAIL))
62
b27ce0da
DW
63/*
64 * For opening files by handle for fsck operations, we don't trust the inumber
65 * or the allocation state; therefore, perform an untrusted lookup. We don't
66 * want these inodes to pollute the cache, so mark them for immediate removal.
67 */
68#define XCHK_IGET_FLAGS (XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
69
8e630837 70/* Type info and names for the scrub types. */
c517b3aa 71enum xchk_type {
8e630837
ES
72 ST_NONE = 1, /* disabled */
73 ST_PERAG, /* per-AG metadata */
74 ST_FS, /* per-FS metadata */
75 ST_INODE, /* per-inode metadata */
76};
77
c517b3aa 78struct xchk_meta_ops {
a5637186 79 /* Acquire whatever resources are needed for the operation. */
026f57eb 80 int (*setup)(struct xfs_scrub *sc);
a5637186
DW
81
82 /* Examine metadata for errors. */
1d8a748a 83 int (*scrub)(struct xfs_scrub *);
a5637186 84
84d42ea6 85 /* Repair or optimize the metadata. */
1d8a748a 86 int (*repair)(struct xfs_scrub *);
84d42ea6 87
4bdfd7d1
DW
88 /*
89 * Re-scrub the metadata we repaired, in case there's extra work that
90 * we need to do to check our repair work. If this is NULL, we'll use
91 * the ->scrub function pointer, assuming that the regular scrub is
92 * sufficient.
93 */
94 int (*repair_eval)(struct xfs_scrub *sc);
95
a5637186 96 /* Decide if we even have this piece of metadata. */
55fafb31 97 bool (*has)(struct xfs_mount *);
8e630837
ES
98
99 /* type describing required/allowed inputs */
c517b3aa 100 enum xchk_type type;
a5637186
DW
101};
102
b6c1beb9 103/* Buffer pointers and btree cursors for an entire AG. */
c517b3aa 104struct xchk_ag {
032d91f9 105 struct xfs_perag *pag;
b6c1beb9
DW
106
107 /* AG btree roots */
032d91f9 108 struct xfs_buf *agf_bp;
032d91f9 109 struct xfs_buf *agi_bp;
b6c1beb9
DW
110
111 /* AG btrees */
032d91f9
DW
112 struct xfs_btree_cur *bno_cur;
113 struct xfs_btree_cur *cnt_cur;
114 struct xfs_btree_cur *ino_cur;
115 struct xfs_btree_cur *fino_cur;
116 struct xfs_btree_cur *rmap_cur;
117 struct xfs_btree_cur *refc_cur;
b6c1beb9
DW
118};
119
1d8a748a 120struct xfs_scrub {
a5637186
DW
121 /* General scrub state. */
122 struct xfs_mount *mp;
123 struct xfs_scrub_metadata *sm;
c517b3aa 124 const struct xchk_meta_ops *ops;
a5637186 125 struct xfs_trans *tp;
71bddbcc
DW
126
127 /* File that scrub was called with. */
128 struct file *file;
129
130 /*
131 * File that is undergoing the scrub operation. This can differ from
132 * the file that scrub was called with if we're checking file-based fs
133 * metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for
134 * something that can't be opened directly (e.g. symlinks).
135 */
a5637186 136 struct xfs_inode *ip;
71bddbcc 137
91781ff5 138 /* Kernel memory buffer used by scrubbers; freed at teardown. */
eec0482e 139 void *buf;
91781ff5
DW
140
141 /*
142 * Clean up resources owned by whatever is in the buffer. Cleanup can
143 * be deferred with this hook as a means for scrub functions to pass
144 * data to repair functions. This function must not free the buffer
145 * itself.
146 */
147 void (*buf_cleanup)(void *buf);
148
526aab5f
DW
149 /* xfile used by the scrubbers; freed at teardown. */
150 struct xfile *xfile;
151
a095686a
DW
152 /* buffer target for in-memory btrees; also freed at teardown. */
153 struct xfs_buftarg *xmbtp;
154
526aab5f 155 /* Lock flags for @ip. */
80e4e126 156 uint ilock_flags;
f8c2a225 157
1e58a8cc
DW
158 /* The orphanage, for stashing files that have lost their parent. */
159 uint orphanage_ilock_flags;
160 struct xfs_inode *orphanage;
161
84c14ee3
DW
162 /* A temporary file on this filesystem, for staging new metadata. */
163 struct xfs_inode *tempip;
164 uint temp_ilock_flags;
165
160b5a78 166 /* See the XCHK/XREP state flags below. */
f8c2a225 167 unsigned int flags;
b6c1beb9 168
4860a05d
DW
169 /*
170 * The XFS_SICK_* flags that correspond to the metadata being scrubbed
171 * or repaired. We will use this mask to update the in-core fs health
172 * status with whatever we find.
173 */
174 unsigned int sick_mask;
175
271557de
DW
176 /* next time we want to cond_resched() */
177 struct xchk_relax relax;
178
b6c1beb9 179 /* State tracking for single-AG operations. */
032d91f9 180 struct xchk_ag sa;
a5637186
DW
181};
182
160b5a78 183/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
4320f346 184#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
ce85a1e0 185#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
4320f346
GU
186#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
187#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
20049187 188#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
86a1746e 189#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
7e1b84b2 190#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */
56596d8b 191#define XREP_FSGATES_EXCHANGE_RANGE (1U << 29) /* uses file content exchange */
dbfbf3bd 192#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
4320f346 193#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
f8c2a225 194
466c525d
DW
195/*
196 * The XCHK_FSGATES* flags reflect functionality in the main filesystem that
197 * are only enabled for this particular online fsck. When not in use, the
198 * features are gated off via dynamic code patching, which is why the state
199 * must be enabled during scrub setup and can only be torn down afterwards.
200 */
20049187 201#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
86a1746e 202 XCHK_FSGATES_QUOTA | \
7e1b84b2
DW
203 XCHK_FSGATES_DIRENTS | \
204 XCHK_FSGATES_RMAP)
466c525d 205
56596d8b
DW
206/*
207 * The sole XREP_FSGATES* flag reflects a log intent item that is protected
208 * by a log-incompat feature flag. No code patching in use here.
209 */
210#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE)
211
1a5f6e08
DW
212struct xfs_scrub_subord {
213 struct xfs_scrub sc;
214 struct xfs_scrub *parent_sc;
215 unsigned int old_smtype;
216 unsigned int old_smflags;
217};
218
219struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
220 unsigned int subtype);
221void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
222
271557de
DW
223/*
224 * We /could/ terminate a scrub/repair operation early. If we're not
225 * in a good place to continue (fatal signal, etc.) then bail out.
226 * Note that we're careful not to make any judgements about *error.
227 */
228static inline bool
229xchk_should_terminate(
230 struct xfs_scrub *sc,
231 int *error)
232{
233 if (xchk_maybe_relax(&sc->relax)) {
234 if (*error == 0)
235 *error = -EINTR;
236 return true;
237 }
238 return false;
239}
240
36fd6e86 241/* Metadata scrubbers */
1d8a748a
DW
242int xchk_tester(struct xfs_scrub *sc);
243int xchk_superblock(struct xfs_scrub *sc);
244int xchk_agf(struct xfs_scrub *sc);
245int xchk_agfl(struct xfs_scrub *sc);
246int xchk_agi(struct xfs_scrub *sc);
8bd0bf57
DW
247int xchk_allocbt(struct xfs_scrub *sc);
248int xchk_iallocbt(struct xfs_scrub *sc);
1d8a748a
DW
249int xchk_rmapbt(struct xfs_scrub *sc);
250int xchk_refcountbt(struct xfs_scrub *sc);
251int xchk_inode(struct xfs_scrub *sc);
252int xchk_bmap_data(struct xfs_scrub *sc);
253int xchk_bmap_attr(struct xfs_scrub *sc);
254int xchk_bmap_cow(struct xfs_scrub *sc);
255int xchk_directory(struct xfs_scrub *sc);
256int xchk_xattr(struct xfs_scrub *sc);
257int xchk_symlink(struct xfs_scrub *sc);
258int xchk_parent(struct xfs_scrub *sc);
928b721a 259int xchk_dirtree(struct xfs_scrub *sc);
29b0767b 260#ifdef CONFIG_XFS_RT
1d8a748a
DW
261int xchk_rtbitmap(struct xfs_scrub *sc);
262int xchk_rtsummary(struct xfs_scrub *sc);
29b0767b
DW
263#else
264static inline int
1d8a748a 265xchk_rtbitmap(struct xfs_scrub *sc)
29b0767b
DW
266{
267 return -ENOENT;
268}
269static inline int
1d8a748a 270xchk_rtsummary(struct xfs_scrub *sc)
29b0767b
DW
271{
272 return -ENOENT;
273}
274#endif
c2fc338c 275#ifdef CONFIG_XFS_QUOTA
1d8a748a 276int xchk_quota(struct xfs_scrub *sc);
48dd9117 277int xchk_quotacheck(struct xfs_scrub *sc);
c2fc338c
DW
278#else
279static inline int
1d8a748a 280xchk_quota(struct xfs_scrub *sc)
c2fc338c
DW
281{
282 return -ENOENT;
283}
48dd9117
DW
284static inline int
285xchk_quotacheck(struct xfs_scrub *sc)
286{
287 return -ENOENT;
288}
c2fc338c 289#endif
75efa57d 290int xchk_fscounters(struct xfs_scrub *sc);
f1184081 291int xchk_nlinks(struct xfs_scrub *sc);
36fd6e86 292
52dc4b44 293/* cross-referencing helpers */
032d91f9
DW
294void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
295 xfs_extlen_t len);
296void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
297 xfs_extlen_t len);
298void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
299 xfs_extlen_t len);
69115f77 300void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
66e3237e 301 xfs_extlen_t len, const struct xfs_owner_info *oinfo);
032d91f9 302void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
66e3237e 303 xfs_extlen_t len, const struct xfs_owner_info *oinfo);
032d91f9
DW
304void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
305 xfs_extlen_t len);
306void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
307 xfs_extlen_t len);
308void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
309 xfs_extlen_t len);
7ac14fa2
DW
310void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
311 xfs_extlen_t len);
46d9bfb5 312#ifdef CONFIG_XFS_RT
032d91f9
DW
313void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
314 xfs_extlen_t len);
46d9bfb5 315#else
c517b3aa 316# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
46d9bfb5 317#endif
52dc4b44 318
36fd6e86 319#endif /* __XFS_SCRUB_SCRUB_H__ */