Commit | Line | Data |
---|---|---|
8660c7b7 DW |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * Copyright (c) 2021-2024 Oracle. All Rights Reserved. | |
4 | * Author: Darrick J. Wong <djwong@kernel.org> | |
5 | */ | |
6 | #include "xfs.h" | |
7 | #include "xfs_fs.h" | |
8 | #include "xfs_shared.h" | |
9 | #include "xfs_format.h" | |
10 | #include "xfs_trans_resv.h" | |
11 | #include "xfs_mount.h" | |
12 | #include "xfs_log_format.h" | |
13 | #include "xfs_trans.h" | |
14 | #include "xfs_inode.h" | |
15 | #include "xfs_btree.h" | |
16 | #include "xfs_ialloc.h" | |
17 | #include "xfs_ialloc_btree.h" | |
18 | #include "xfs_ag.h" | |
19 | #include "xfs_error.h" | |
20 | #include "xfs_bit.h" | |
21 | #include "xfs_icache.h" | |
22 | #include "scrub/scrub.h" | |
23 | #include "scrub/iscan.h" | |
24 | #include "scrub/common.h" | |
25 | #include "scrub/trace.h" | |
26 | ||
27 | /* | |
28 | * Live File Scan | |
29 | * ============== | |
30 | * | |
31 | * Live file scans walk every inode in a live filesystem. This is more or | |
32 | * less like a regular iwalk, except that when we're advancing the scan cursor, | |
33 | * we must ensure that inodes cannot be added or deleted anywhere between the | |
34 | * old cursor value and the new cursor value. If we're advancing the cursor | |
35 | * by one inode, the caller must hold that inode; if we're finding the next | |
36 | * inode to scan, we must grab the AGI and hold it until we've updated the | |
37 | * scan cursor. | |
38 | * | |
39 | * Callers are expected to use this code to scan all files in the filesystem to | |
40 | * construct a new metadata index of some kind. The scan races against other | |
41 | * live updates, which means there must be a provision to update the new index | |
42 | * when updates are made to inodes that already been scanned. The iscan lock | |
43 | * can be used in live update hook code to stop the scan and protect this data | |
44 | * structure. | |
45 | * | |
46 | * To keep the new index up to date with other metadata updates being made to | |
47 | * the live filesystem, it is assumed that the caller will add hooks as needed | |
48 | * to be notified when a metadata update occurs. The inode scanner must tell | |
49 | * the hook code when an inode has been visited with xchk_iscan_mark_visit. | |
50 | * Hook functions can use xchk_iscan_want_live_update to decide if the | |
51 | * scanner's observations must be updated. | |
52 | */ | |
53 | ||
5385f1a6 DW |
54 | /* |
55 | * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so | |
56 | * that the scan ignores that inode. | |
57 | */ | |
58 | STATIC void | |
59 | xchk_iscan_mask_skipino( | |
60 | struct xchk_iscan *iscan, | |
61 | struct xfs_perag *pag, | |
62 | struct xfs_inobt_rec_incore *rec, | |
63 | xfs_agino_t lastrecino) | |
64 | { | |
65 | struct xfs_scrub *sc = iscan->sc; | |
66 | struct xfs_mount *mp = sc->mp; | |
67 | xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino); | |
68 | xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino); | |
69 | ||
70 | if (pag->pag_agno != skip_agno) | |
71 | return; | |
72 | if (skip_agino < rec->ir_startino) | |
73 | return; | |
74 | if (skip_agino > lastrecino) | |
75 | return; | |
76 | ||
77 | rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1); | |
78 | } | |
79 | ||
8660c7b7 DW |
80 | /* |
81 | * Set *cursor to the next allocated inode after whatever it's set to now. | |
82 | * If there are no more inodes in this AG, cursor is set to NULLAGINO. | |
83 | */ | |
84 | STATIC int | |
85 | xchk_iscan_find_next( | |
86 | struct xchk_iscan *iscan, | |
87 | struct xfs_buf *agi_bp, | |
88 | struct xfs_perag *pag, | |
a7a686cb | 89 | xfs_inofree_t *allocmaskp, |
82334a79 DW |
90 | xfs_agino_t *cursor, |
91 | uint8_t *nr_inodesp) | |
8660c7b7 DW |
92 | { |
93 | struct xfs_scrub *sc = iscan->sc; | |
94 | struct xfs_inobt_rec_incore rec; | |
95 | struct xfs_btree_cur *cur; | |
96 | struct xfs_mount *mp = sc->mp; | |
97 | struct xfs_trans *tp = sc->tp; | |
98 | xfs_agnumber_t agno = pag->pag_agno; | |
99 | xfs_agino_t lastino = NULLAGINO; | |
100 | xfs_agino_t first, last; | |
101 | xfs_agino_t agino = *cursor; | |
102 | int has_rec; | |
103 | int error; | |
104 | ||
105 | /* If the cursor is beyond the end of this AG, move to the next one. */ | |
106 | xfs_agino_range(mp, agno, &first, &last); | |
107 | if (agino > last) { | |
108 | *cursor = NULLAGINO; | |
109 | return 0; | |
110 | } | |
111 | ||
112 | /* | |
113 | * Look up the inode chunk for the current cursor position. If there | |
114 | * is no chunk here, we want the next one. | |
115 | */ | |
14dd46cf | 116 | cur = xfs_inobt_init_cursor(pag, tp, agi_bp); |
8660c7b7 DW |
117 | error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec); |
118 | if (!error && !has_rec) | |
119 | error = xfs_btree_increment(cur, 0, &has_rec); | |
120 | for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) { | |
121 | xfs_inofree_t allocmask; | |
122 | ||
123 | /* | |
124 | * If we've run out of inobt records in this AG, move the | |
125 | * cursor on to the next AG and exit. The caller can try | |
126 | * again with the next AG. | |
127 | */ | |
128 | if (!has_rec) { | |
129 | *cursor = NULLAGINO; | |
130 | break; | |
131 | } | |
132 | ||
133 | error = xfs_inobt_get_rec(cur, &rec, &has_rec); | |
134 | if (error) | |
135 | break; | |
136 | if (!has_rec) { | |
137 | error = -EFSCORRUPTED; | |
138 | break; | |
139 | } | |
140 | ||
141 | /* Make sure that we always move forward. */ | |
142 | if (lastino != NULLAGINO && | |
143 | XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) { | |
144 | error = -EFSCORRUPTED; | |
145 | break; | |
146 | } | |
147 | lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1; | |
148 | ||
149 | /* | |
150 | * If this record only covers inodes that come before the | |
151 | * cursor, advance to the next record. | |
152 | */ | |
153 | if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) | |
154 | continue; | |
155 | ||
5385f1a6 DW |
156 | if (iscan->skip_ino) |
157 | xchk_iscan_mask_skipino(iscan, pag, &rec, lastino); | |
158 | ||
8660c7b7 DW |
159 | /* |
160 | * If the incoming lookup put us in the middle of an inobt | |
161 | * record, mark it and the previous inodes "free" so that the | |
162 | * search for allocated inodes will start at the cursor. | |
163 | * We don't care about ir_freecount here. | |
164 | */ | |
165 | if (agino >= rec.ir_startino) | |
166 | rec.ir_free |= xfs_inobt_maskn(0, | |
167 | agino + 1 - rec.ir_startino); | |
168 | ||
169 | /* | |
170 | * If there are allocated inodes in this chunk, find them | |
171 | * and update the scan cursor. | |
172 | */ | |
173 | allocmask = ~rec.ir_free; | |
174 | if (hweight64(allocmask) > 0) { | |
175 | int next = xfs_lowbit64(allocmask); | |
176 | ||
177 | ASSERT(next >= 0); | |
178 | *cursor = rec.ir_startino + next; | |
a7a686cb | 179 | *allocmaskp = allocmask >> next; |
82334a79 | 180 | *nr_inodesp = XFS_INODES_PER_CHUNK - next; |
8660c7b7 DW |
181 | break; |
182 | } | |
183 | } | |
184 | ||
185 | xfs_btree_del_cursor(cur, error); | |
186 | return error; | |
187 | } | |
188 | ||
189 | /* | |
190 | * Advance both the scan and the visited cursors. | |
191 | * | |
192 | * The inumber address space for a given filesystem is sparse, which means that | |
193 | * the scan cursor can jump a long ways in a single iter() call. There are no | |
194 | * inodes in these sparse areas, so we must move the visited cursor forward at | |
195 | * the same time so that the scan user can receive live updates for inodes that | |
196 | * may get created once we release the AGI buffer. | |
197 | */ | |
198 | static inline void | |
199 | xchk_iscan_move_cursor( | |
200 | struct xchk_iscan *iscan, | |
201 | xfs_agnumber_t agno, | |
202 | xfs_agino_t agino) | |
203 | { | |
204 | struct xfs_scrub *sc = iscan->sc; | |
205 | struct xfs_mount *mp = sc->mp; | |
c473a332 DW |
206 | xfs_ino_t cursor, visited; |
207 | ||
208 | BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO); | |
209 | ||
210 | /* | |
211 | * Special-case ino == 0 here so that we never set visited_ino to | |
212 | * NULLFSINO when wrapping around EOFS, for that will let through all | |
213 | * live updates. | |
214 | */ | |
215 | cursor = XFS_AGINO_TO_INO(mp, agno, agino); | |
216 | if (cursor == 0) | |
217 | visited = XFS_MAXINUMBER; | |
218 | else | |
219 | visited = cursor - 1; | |
8660c7b7 DW |
220 | |
221 | mutex_lock(&iscan->lock); | |
c473a332 DW |
222 | iscan->cursor_ino = cursor; |
223 | iscan->__visited_ino = visited; | |
8660c7b7 DW |
224 | trace_xchk_iscan_move_cursor(iscan); |
225 | mutex_unlock(&iscan->lock); | |
226 | } | |
227 | ||
228 | /* | |
229 | * Prepare to return agno/agino to the iscan caller by moving the lastino | |
230 | * cursor to the previous inode. Do this while we still hold the AGI so that | |
231 | * no other threads can create or delete inodes in this AG. | |
232 | */ | |
233 | static inline void | |
234 | xchk_iscan_finish( | |
235 | struct xchk_iscan *iscan) | |
236 | { | |
237 | mutex_lock(&iscan->lock); | |
238 | iscan->cursor_ino = NULLFSINO; | |
239 | ||
240 | /* All live updates will be applied from now on */ | |
241 | iscan->__visited_ino = NULLFSINO; | |
242 | ||
243 | mutex_unlock(&iscan->lock); | |
244 | } | |
245 | ||
a07b4557 DW |
246 | /* Mark an inode scan finished before we actually scan anything. */ |
247 | void | |
248 | xchk_iscan_finish_early( | |
249 | struct xchk_iscan *iscan) | |
250 | { | |
251 | ASSERT(iscan->cursor_ino == iscan->scan_start_ino); | |
252 | ASSERT(iscan->__visited_ino == iscan->scan_start_ino); | |
253 | ||
254 | xchk_iscan_finish(iscan); | |
255 | } | |
256 | ||
2afd5276 DW |
257 | /* |
258 | * Grab the AGI to advance the inode scan. Returns 0 if *agi_bpp is now set, | |
259 | * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed, | |
260 | * or the usual negative errno. | |
261 | */ | |
262 | STATIC int | |
263 | xchk_iscan_read_agi( | |
264 | struct xchk_iscan *iscan, | |
265 | struct xfs_perag *pag, | |
266 | struct xfs_buf **agi_bpp) | |
267 | { | |
268 | struct xfs_scrub *sc = iscan->sc; | |
269 | unsigned long relax; | |
270 | int ret; | |
271 | ||
272 | if (!xchk_iscan_agi_needs_trylock(iscan)) | |
273 | return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp); | |
274 | ||
275 | relax = msecs_to_jiffies(iscan->iget_retry_delay); | |
276 | do { | |
277 | ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK, | |
278 | agi_bpp); | |
279 | if (ret != -EAGAIN) | |
280 | return ret; | |
281 | if (!iscan->iget_timeout || | |
282 | time_is_before_jiffies(iscan->__iget_deadline)) | |
283 | return -EBUSY; | |
284 | ||
285 | trace_xchk_iscan_agi_retry_wait(iscan); | |
286 | } while (!schedule_timeout_killable(relax) && | |
287 | !xchk_iscan_aborted(iscan)); | |
288 | return -ECANCELED; | |
289 | } | |
290 | ||
8660c7b7 DW |
291 | /* |
292 | * Advance ino to the next inode that the inobt thinks is allocated, being | |
293 | * careful to jump to the next AG if we've reached the right end of this AG's | |
294 | * inode btree. Advancing ino effectively means that we've pushed the inode | |
295 | * scan forward, so set the iscan cursor to (ino - 1) so that our live update | |
296 | * predicates will track inode allocations in that part of the inode number | |
297 | * key space once we release the AGI buffer. | |
298 | * | |
299 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, | |
300 | * -ECANCELED if the live scan aborted, or the usual negative errno. | |
301 | */ | |
302 | STATIC int | |
303 | xchk_iscan_advance( | |
304 | struct xchk_iscan *iscan, | |
305 | struct xfs_perag **pagp, | |
a7a686cb | 306 | struct xfs_buf **agi_bpp, |
82334a79 DW |
307 | xfs_inofree_t *allocmaskp, |
308 | uint8_t *nr_inodesp) | |
8660c7b7 DW |
309 | { |
310 | struct xfs_scrub *sc = iscan->sc; | |
311 | struct xfs_mount *mp = sc->mp; | |
312 | struct xfs_buf *agi_bp; | |
313 | struct xfs_perag *pag; | |
314 | xfs_agnumber_t agno; | |
315 | xfs_agino_t agino; | |
316 | int ret; | |
317 | ||
318 | ASSERT(iscan->cursor_ino >= iscan->__visited_ino); | |
319 | ||
320 | do { | |
321 | if (xchk_iscan_aborted(iscan)) | |
322 | return -ECANCELED; | |
323 | ||
324 | agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino); | |
325 | pag = xfs_perag_get(mp, agno); | |
326 | if (!pag) | |
327 | return -ECANCELED; | |
328 | ||
2afd5276 | 329 | ret = xchk_iscan_read_agi(iscan, pag, &agi_bp); |
8660c7b7 DW |
330 | if (ret) |
331 | goto out_pag; | |
332 | ||
333 | agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino); | |
a7a686cb | 334 | ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp, |
82334a79 | 335 | &agino, nr_inodesp); |
8660c7b7 DW |
336 | if (ret) |
337 | goto out_buf; | |
338 | ||
339 | if (agino != NULLAGINO) { | |
340 | /* | |
341 | * Found the next inode in this AG, so return it along | |
342 | * with the AGI buffer and the perag structure to | |
343 | * ensure it cannot go away. | |
344 | */ | |
345 | xchk_iscan_move_cursor(iscan, agno, agino); | |
346 | *agi_bpp = agi_bp; | |
347 | *pagp = pag; | |
348 | return 1; | |
349 | } | |
350 | ||
351 | /* | |
352 | * Did not find any more inodes in this AG, move on to the next | |
353 | * AG. | |
354 | */ | |
c473a332 DW |
355 | agno = (agno + 1) % mp->m_sb.sb_agcount; |
356 | xchk_iscan_move_cursor(iscan, agno, 0); | |
8660c7b7 DW |
357 | xfs_trans_brelse(sc->tp, agi_bp); |
358 | xfs_perag_put(pag); | |
359 | ||
360 | trace_xchk_iscan_advance_ag(iscan); | |
c473a332 | 361 | } while (iscan->cursor_ino != iscan->scan_start_ino); |
8660c7b7 DW |
362 | |
363 | xchk_iscan_finish(iscan); | |
364 | return 0; | |
365 | ||
366 | out_buf: | |
367 | xfs_trans_brelse(sc->tp, agi_bp); | |
368 | out_pag: | |
369 | xfs_perag_put(pag); | |
370 | return ret; | |
371 | } | |
372 | ||
373 | /* | |
374 | * Grabbing the inode failed, so we need to back up the scan and ask the caller | |
375 | * to try to _advance the scan again. Returns -EBUSY if we've run out of retry | |
376 | * opportunities, -ECANCELED if the process has a fatal signal pending, or | |
377 | * -EAGAIN if we should try again. | |
378 | */ | |
379 | STATIC int | |
380 | xchk_iscan_iget_retry( | |
381 | struct xchk_iscan *iscan, | |
382 | bool wait) | |
383 | { | |
384 | ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1); | |
385 | ||
386 | if (!iscan->iget_timeout || | |
387 | time_is_before_jiffies(iscan->__iget_deadline)) | |
388 | return -EBUSY; | |
389 | ||
390 | if (wait) { | |
391 | unsigned long relax; | |
392 | ||
393 | /* | |
394 | * Sleep for a period of time to let the rest of the system | |
395 | * catch up. If we return early, someone sent a kill signal to | |
396 | * the calling process. | |
397 | */ | |
398 | relax = msecs_to_jiffies(iscan->iget_retry_delay); | |
399 | trace_xchk_iscan_iget_retry_wait(iscan); | |
400 | ||
401 | if (schedule_timeout_killable(relax) || | |
402 | xchk_iscan_aborted(iscan)) | |
403 | return -ECANCELED; | |
404 | } | |
405 | ||
406 | iscan->cursor_ino--; | |
407 | return -EAGAIN; | |
408 | } | |
409 | ||
b27ce0da DW |
410 | /* |
411 | * For an inode scan, we hold the AGI and want to try to grab a batch of | |
412 | * inodes. Holding the AGI prevents inodegc from clearing freed inodes, | |
413 | * so we must use noretry here. For every inode after the first one in the | |
414 | * batch, we don't want to wait, so we use retry there too. Finally, use | |
415 | * dontcache to avoid polluting the cache. | |
416 | */ | |
417 | #define ISCAN_IGET_FLAGS (XFS_IGET_NORETRY | XFS_IGET_DONTCACHE) | |
418 | ||
8660c7b7 DW |
419 | /* |
420 | * Grab an inode as part of an inode scan. While scanning this inode, the | |
421 | * caller must ensure that no other threads can modify the inode until a call | |
422 | * to xchk_iscan_visit succeeds. | |
423 | * | |
a7a686cb DW |
424 | * Returns the number of incore inodes grabbed; -EAGAIN if the caller should |
425 | * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode; | |
426 | * -ECANCELED if there's a fatal signal pending; or some other negative errno. | |
8660c7b7 DW |
427 | */ |
428 | STATIC int | |
429 | xchk_iscan_iget( | |
430 | struct xchk_iscan *iscan, | |
431 | struct xfs_perag *pag, | |
432 | struct xfs_buf *agi_bp, | |
82334a79 DW |
433 | xfs_inofree_t allocmask, |
434 | uint8_t nr_inodes) | |
8660c7b7 DW |
435 | { |
436 | struct xfs_scrub *sc = iscan->sc; | |
437 | struct xfs_mount *mp = sc->mp; | |
a7a686cb DW |
438 | xfs_ino_t ino = iscan->cursor_ino; |
439 | unsigned int idx = 0; | |
82334a79 | 440 | unsigned int i; |
8660c7b7 DW |
441 | int error; |
442 | ||
a7a686cb DW |
443 | ASSERT(iscan->__inodes[0] == NULL); |
444 | ||
445 | /* Fill the first slot in the inode array. */ | |
b27ce0da | 446 | error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
a7a686cb | 447 | &iscan->__inodes[idx]); |
8660c7b7 DW |
448 | |
449 | trace_xchk_iscan_iget(iscan, error); | |
450 | ||
451 | if (error == -ENOENT || error == -EAGAIN) { | |
a7a686cb DW |
452 | xfs_trans_brelse(sc->tp, agi_bp); |
453 | xfs_perag_put(pag); | |
454 | ||
8660c7b7 DW |
455 | /* |
456 | * It's possible that this inode has lost all of its links but | |
457 | * hasn't yet been inactivated. If we don't have a transaction | |
458 | * or it's not writable, flush the inodegc workers and wait. | |
a07b4557 DW |
459 | * If we have a non-empty transaction, we must not block on |
460 | * inodegc, which allocates its own transactions. | |
8660c7b7 | 461 | */ |
a07b4557 DW |
462 | if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) |
463 | xfs_inodegc_push(mp); | |
464 | else | |
465 | xfs_inodegc_flush(mp); | |
8660c7b7 DW |
466 | return xchk_iscan_iget_retry(iscan, true); |
467 | } | |
468 | ||
469 | if (error == -EINVAL) { | |
a7a686cb DW |
470 | xfs_trans_brelse(sc->tp, agi_bp); |
471 | xfs_perag_put(pag); | |
472 | ||
8660c7b7 DW |
473 | /* |
474 | * We thought the inode was allocated, but the inode btree | |
475 | * lookup failed, which means that it was freed since the last | |
476 | * time we advanced the cursor. Back up and try again. This | |
477 | * should never happen since still hold the AGI buffer from the | |
478 | * inobt check, but we need to be careful about infinite loops. | |
479 | */ | |
480 | return xchk_iscan_iget_retry(iscan, false); | |
481 | } | |
482 | ||
a7a686cb DW |
483 | if (error) { |
484 | xfs_trans_brelse(sc->tp, agi_bp); | |
485 | xfs_perag_put(pag); | |
486 | return error; | |
487 | } | |
488 | idx++; | |
489 | ino++; | |
490 | allocmask >>= 1; | |
491 | ||
492 | /* | |
493 | * Now that we've filled the first slot in __inodes, try to fill the | |
494 | * rest of the batch with consecutively ordered inodes. to reduce the | |
82334a79 DW |
495 | * number of _iter calls. Make a bitmap of unallocated inodes from the |
496 | * zeroes in the inuse bitmap; these inodes will not be scanned, but | |
497 | * the _want_live_update predicate will pass through all live updates. | |
498 | * | |
499 | * If we can't iget an allocated inode, stop and return what we have. | |
a7a686cb | 500 | */ |
82334a79 DW |
501 | mutex_lock(&iscan->lock); |
502 | iscan->__batch_ino = ino - 1; | |
503 | iscan->__skipped_inomask = 0; | |
504 | mutex_unlock(&iscan->lock); | |
505 | ||
506 | for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) { | |
507 | if (!(allocmask & 1)) { | |
508 | ASSERT(!(iscan->__skipped_inomask & (1ULL << i))); | |
509 | ||
510 | mutex_lock(&iscan->lock); | |
511 | iscan->cursor_ino = ino; | |
512 | iscan->__skipped_inomask |= (1ULL << i); | |
513 | mutex_unlock(&iscan->lock); | |
514 | continue; | |
515 | } | |
516 | ||
a7a686cb DW |
517 | ASSERT(iscan->__inodes[idx] == NULL); |
518 | ||
b27ce0da | 519 | error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0, |
a7a686cb DW |
520 | &iscan->__inodes[idx]); |
521 | if (error) | |
522 | break; | |
523 | ||
524 | mutex_lock(&iscan->lock); | |
525 | iscan->cursor_ino = ino; | |
526 | mutex_unlock(&iscan->lock); | |
82334a79 | 527 | idx++; |
a7a686cb DW |
528 | } |
529 | ||
82334a79 | 530 | trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx); |
a7a686cb DW |
531 | xfs_trans_brelse(sc->tp, agi_bp); |
532 | xfs_perag_put(pag); | |
533 | return idx; | |
8660c7b7 DW |
534 | } |
535 | ||
82334a79 DW |
536 | /* |
537 | * Advance the visit cursor to reflect skipped inodes beyond whatever we | |
538 | * scanned. | |
539 | */ | |
540 | STATIC void | |
541 | xchk_iscan_finish_batch( | |
542 | struct xchk_iscan *iscan) | |
543 | { | |
544 | xfs_ino_t highest_skipped; | |
545 | ||
546 | mutex_lock(&iscan->lock); | |
547 | ||
548 | if (iscan->__batch_ino != NULLFSINO) { | |
549 | highest_skipped = iscan->__batch_ino + | |
550 | xfs_highbit64(iscan->__skipped_inomask); | |
551 | iscan->__visited_ino = max(iscan->__visited_ino, | |
552 | highest_skipped); | |
553 | ||
554 | trace_xchk_iscan_skip(iscan); | |
555 | } | |
556 | ||
557 | iscan->__batch_ino = NULLFSINO; | |
558 | iscan->__skipped_inomask = 0; | |
559 | ||
560 | mutex_unlock(&iscan->lock); | |
561 | } | |
562 | ||
8660c7b7 | 563 | /* |
a7a686cb DW |
564 | * Advance the inode scan cursor to the next allocated inode and return up to |
565 | * 64 consecutive allocated inodes starting with the cursor position. | |
8660c7b7 | 566 | */ |
a7a686cb DW |
567 | STATIC int |
568 | xchk_iscan_iter_batch( | |
569 | struct xchk_iscan *iscan) | |
8660c7b7 DW |
570 | { |
571 | struct xfs_scrub *sc = iscan->sc; | |
572 | int ret; | |
573 | ||
82334a79 DW |
574 | xchk_iscan_finish_batch(iscan); |
575 | ||
8660c7b7 DW |
576 | if (iscan->iget_timeout) |
577 | iscan->__iget_deadline = jiffies + | |
578 | msecs_to_jiffies(iscan->iget_timeout); | |
579 | ||
580 | do { | |
581 | struct xfs_buf *agi_bp = NULL; | |
582 | struct xfs_perag *pag = NULL; | |
a7a686cb | 583 | xfs_inofree_t allocmask = 0; |
82334a79 | 584 | uint8_t nr_inodes = 0; |
8660c7b7 | 585 | |
82334a79 DW |
586 | ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask, |
587 | &nr_inodes); | |
8660c7b7 DW |
588 | if (ret != 1) |
589 | return ret; | |
590 | ||
591 | if (xchk_iscan_aborted(iscan)) { | |
592 | xfs_trans_brelse(sc->tp, agi_bp); | |
593 | xfs_perag_put(pag); | |
594 | ret = -ECANCELED; | |
595 | break; | |
596 | } | |
597 | ||
82334a79 | 598 | ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes); |
8660c7b7 DW |
599 | } while (ret == -EAGAIN); |
600 | ||
8660c7b7 DW |
601 | return ret; |
602 | } | |
603 | ||
a7a686cb DW |
604 | /* |
605 | * Advance the inode scan cursor to the next allocated inode and return the | |
606 | * incore inode structure associated with it. | |
607 | * | |
608 | * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes, | |
609 | * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be | |
610 | * grabbed, or the usual negative errno. | |
611 | * | |
612 | * If the function returns -EBUSY and the caller can handle skipping an inode, | |
613 | * it may call this function again to continue the scan with the next allocated | |
614 | * inode. | |
615 | */ | |
616 | int | |
617 | xchk_iscan_iter( | |
618 | struct xchk_iscan *iscan, | |
619 | struct xfs_inode **ipp) | |
620 | { | |
621 | unsigned int i; | |
622 | int error; | |
623 | ||
624 | /* Find a cached inode, or go get another batch. */ | |
625 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { | |
626 | if (iscan->__inodes[i]) | |
627 | goto foundit; | |
628 | } | |
629 | ||
630 | error = xchk_iscan_iter_batch(iscan); | |
631 | if (error <= 0) | |
632 | return error; | |
633 | ||
634 | ASSERT(iscan->__inodes[0] != NULL); | |
635 | i = 0; | |
636 | ||
637 | foundit: | |
638 | /* Give the caller our reference. */ | |
639 | *ipp = iscan->__inodes[i]; | |
640 | iscan->__inodes[i] = NULL; | |
641 | return 1; | |
642 | } | |
643 | ||
644 | /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */ | |
645 | void | |
646 | xchk_iscan_iter_finish( | |
647 | struct xchk_iscan *iscan) | |
648 | { | |
649 | struct xfs_scrub *sc = iscan->sc; | |
650 | unsigned int i; | |
651 | ||
652 | for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { | |
653 | if (iscan->__inodes[i]) { | |
654 | xchk_irele(sc, iscan->__inodes[i]); | |
655 | iscan->__inodes[i] = NULL; | |
656 | } | |
657 | } | |
658 | } | |
8660c7b7 DW |
659 | |
660 | /* Mark this inode scan finished and release resources. */ | |
661 | void | |
662 | xchk_iscan_teardown( | |
663 | struct xchk_iscan *iscan) | |
664 | { | |
a7a686cb | 665 | xchk_iscan_iter_finish(iscan); |
8660c7b7 DW |
666 | xchk_iscan_finish(iscan); |
667 | mutex_destroy(&iscan->lock); | |
668 | } | |
669 | ||
c473a332 DW |
670 | /* Pick an AG from which to start a scan. */ |
671 | static inline xfs_ino_t | |
672 | xchk_iscan_rotor( | |
673 | struct xfs_mount *mp) | |
674 | { | |
675 | static atomic_t agi_rotor; | |
676 | unsigned int r = atomic_inc_return(&agi_rotor) - 1; | |
677 | ||
678 | /* | |
679 | * Rotoring *backwards* through the AGs, so we add one here before | |
680 | * subtracting from the agcount to arrive at an AG number. | |
681 | */ | |
682 | r = (r % mp->m_sb.sb_agcount) + 1; | |
683 | ||
684 | return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0); | |
685 | } | |
686 | ||
8660c7b7 DW |
687 | /* |
688 | * Set ourselves up to start an inode scan. If the @iget_timeout and | |
689 | * @iget_retry_delay parameters are set, the scan will try to iget each inode | |
690 | * for @iget_timeout milliseconds. If an iget call indicates that the inode is | |
691 | * waiting to be inactivated, the CPU will relax for @iget_retry_delay | |
692 | * milliseconds after pushing the inactivation workers. | |
693 | */ | |
694 | void | |
695 | xchk_iscan_start( | |
696 | struct xfs_scrub *sc, | |
697 | unsigned int iget_timeout, | |
698 | unsigned int iget_retry_delay, | |
699 | struct xchk_iscan *iscan) | |
700 | { | |
c473a332 DW |
701 | xfs_ino_t start_ino; |
702 | ||
703 | start_ino = xchk_iscan_rotor(sc->mp); | |
704 | ||
82334a79 DW |
705 | iscan->__batch_ino = NULLFSINO; |
706 | iscan->__skipped_inomask = 0; | |
707 | ||
8660c7b7 DW |
708 | iscan->sc = sc; |
709 | clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate); | |
710 | iscan->iget_timeout = iget_timeout; | |
711 | iscan->iget_retry_delay = iget_retry_delay; | |
c473a332 DW |
712 | iscan->__visited_ino = start_ino; |
713 | iscan->cursor_ino = start_ino; | |
714 | iscan->scan_start_ino = start_ino; | |
8660c7b7 | 715 | mutex_init(&iscan->lock); |
a7a686cb | 716 | memset(iscan->__inodes, 0, sizeof(iscan->__inodes)); |
8660c7b7 | 717 | |
c473a332 | 718 | trace_xchk_iscan_start(iscan, start_ino); |
8660c7b7 DW |
719 | } |
720 | ||
721 | /* | |
722 | * Mark this inode as having been visited. Callers must hold a sufficiently | |
723 | * exclusive lock on the inode to prevent concurrent modifications. | |
724 | */ | |
725 | void | |
726 | xchk_iscan_mark_visited( | |
727 | struct xchk_iscan *iscan, | |
728 | struct xfs_inode *ip) | |
729 | { | |
730 | mutex_lock(&iscan->lock); | |
731 | iscan->__visited_ino = ip->i_ino; | |
732 | trace_xchk_iscan_visit(iscan); | |
733 | mutex_unlock(&iscan->lock); | |
734 | } | |
735 | ||
82334a79 DW |
736 | /* |
737 | * Did we skip this inode because it wasn't allocated when we loaded the batch? | |
738 | * If so, it is newly allocated and will not be scanned. All live updates to | |
739 | * this inode must be passed to the caller to maintain scan correctness. | |
740 | */ | |
741 | static inline bool | |
742 | xchk_iscan_skipped( | |
743 | const struct xchk_iscan *iscan, | |
744 | xfs_ino_t ino) | |
745 | { | |
746 | if (iscan->__batch_ino == NULLFSINO) | |
747 | return false; | |
748 | if (ino < iscan->__batch_ino) | |
749 | return false; | |
750 | if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK) | |
751 | return false; | |
752 | ||
753 | return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino)); | |
754 | } | |
755 | ||
8660c7b7 DW |
756 | /* |
757 | * Do we need a live update for this inode? This is true if the scanner thread | |
758 | * has visited this inode and the scan hasn't been aborted due to errors. | |
759 | * Callers must hold a sufficiently exclusive lock on the inode to prevent | |
760 | * scanners from reading any inode metadata. | |
761 | */ | |
762 | bool | |
763 | xchk_iscan_want_live_update( | |
764 | struct xchk_iscan *iscan, | |
765 | xfs_ino_t ino) | |
766 | { | |
c473a332 | 767 | bool ret = false; |
8660c7b7 DW |
768 | |
769 | if (xchk_iscan_aborted(iscan)) | |
770 | return false; | |
771 | ||
772 | mutex_lock(&iscan->lock); | |
c473a332 | 773 | |
8660c7b7 | 774 | trace_xchk_iscan_want_live_update(iscan, ino); |
8660c7b7 | 775 | |
c473a332 DW |
776 | /* Scan is finished, caller should receive all updates. */ |
777 | if (iscan->__visited_ino == NULLFSINO) { | |
778 | ret = true; | |
779 | goto unlock; | |
780 | } | |
781 | ||
a7a686cb DW |
782 | /* |
783 | * No inodes have been visited yet, so the visited cursor points at the | |
784 | * start of the scan range. The caller should not receive any updates. | |
785 | */ | |
786 | if (iscan->scan_start_ino == iscan->__visited_ino) { | |
787 | ret = false; | |
788 | goto unlock; | |
789 | } | |
790 | ||
82334a79 DW |
791 | /* |
792 | * This inode was not allocated at the time of the iscan batch. | |
793 | * The caller should receive all updates. | |
794 | */ | |
795 | if (xchk_iscan_skipped(iscan, ino)) { | |
796 | ret = true; | |
797 | goto unlock; | |
798 | } | |
799 | ||
c473a332 DW |
800 | /* |
801 | * The visited cursor hasn't yet wrapped around the end of the FS. If | |
802 | * @ino is inside the starred range, the caller should receive updates: | |
803 | * | |
804 | * 0 ------------ S ************ V ------------ EOFS | |
805 | */ | |
806 | if (iscan->scan_start_ino <= iscan->__visited_ino) { | |
807 | if (ino >= iscan->scan_start_ino && | |
808 | ino <= iscan->__visited_ino) | |
809 | ret = true; | |
810 | ||
811 | goto unlock; | |
812 | } | |
813 | ||
814 | /* | |
815 | * The visited cursor wrapped around the end of the FS. If @ino is | |
816 | * inside the starred range, the caller should receive updates: | |
817 | * | |
818 | * 0 ************ V ------------ S ************ EOFS | |
819 | */ | |
820 | if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino) | |
821 | ret = true; | |
822 | ||
823 | unlock: | |
824 | mutex_unlock(&iscan->lock); | |
8660c7b7 DW |
825 | return ret; |
826 | } |