Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
0f28b257 DW |
2 | /* |
3 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
0f28b257 | 4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> |
0f28b257 DW |
5 | */ |
6 | #include "xfs.h" | |
7 | #include "xfs_fs.h" | |
8 | #include "xfs_shared.h" | |
9 | #include "xfs_format.h" | |
10 | #include "xfs_trans_resv.h" | |
11 | #include "xfs_mount.h" | |
12 | #include "xfs_defer.h" | |
13 | #include "xfs_btree.h" | |
14 | #include "xfs_bit.h" | |
15 | #include "xfs_log_format.h" | |
16 | #include "xfs_trans.h" | |
17 | #include "xfs_sb.h" | |
18 | #include "xfs_inode.h" | |
19 | #include "xfs_icache.h" | |
20 | #include "xfs_dir2.h" | |
21 | #include "xfs_dir2_priv.h" | |
22 | #include "xfs_ialloc.h" | |
23 | #include "scrub/xfs_scrub.h" | |
24 | #include "scrub/scrub.h" | |
25 | #include "scrub/common.h" | |
26 | #include "scrub/trace.h" | |
27 | ||
28 | /* Set us up to scrub parents. */ | |
29 | int | |
30 | xfs_scrub_setup_parent( | |
31 | struct xfs_scrub_context *sc, | |
32 | struct xfs_inode *ip) | |
33 | { | |
34 | return xfs_scrub_setup_inode_contents(sc, ip, 0); | |
35 | } | |
36 | ||
37 | /* Parent pointers */ | |
38 | ||
39 | /* Look for an entry in a parent pointing to this inode. */ | |
40 | ||
41 | struct xfs_scrub_parent_ctx { | |
42 | struct dir_context dc; | |
43 | xfs_ino_t ino; | |
44 | xfs_nlink_t nlink; | |
45 | }; | |
46 | ||
47 | /* Look for a single entry in a directory pointing to an inode. */ | |
48 | STATIC int | |
49 | xfs_scrub_parent_actor( | |
50 | struct dir_context *dc, | |
51 | const char *name, | |
52 | int namelen, | |
53 | loff_t pos, | |
54 | u64 ino, | |
55 | unsigned type) | |
56 | { | |
57 | struct xfs_scrub_parent_ctx *spc; | |
58 | ||
59 | spc = container_of(dc, struct xfs_scrub_parent_ctx, dc); | |
60 | if (spc->ino == ino) | |
61 | spc->nlink++; | |
62 | return 0; | |
63 | } | |
64 | ||
65 | /* Count the number of dentries in the parent dir that point to this inode. */ | |
66 | STATIC int | |
67 | xfs_scrub_parent_count_parent_dentries( | |
68 | struct xfs_scrub_context *sc, | |
69 | struct xfs_inode *parent, | |
70 | xfs_nlink_t *nlink) | |
71 | { | |
72 | struct xfs_scrub_parent_ctx spc = { | |
73 | .dc.actor = xfs_scrub_parent_actor, | |
74 | .dc.pos = 0, | |
75 | .ino = sc->ip->i_ino, | |
76 | .nlink = 0, | |
77 | }; | |
78 | size_t bufsize; | |
79 | loff_t oldpos; | |
80 | uint lock_mode; | |
81 | int error = 0; | |
82 | ||
83 | /* | |
84 | * If there are any blocks, read-ahead block 0 as we're almost | |
85 | * certain to have the next operation be a read there. This is | |
86 | * how we guarantee that the parent's extent map has been loaded, | |
87 | * if there is one. | |
88 | */ | |
89 | lock_mode = xfs_ilock_data_map_shared(parent); | |
90 | if (parent->i_d.di_nextents > 0) | |
91 | error = xfs_dir3_data_readahead(parent, 0, -1); | |
92 | xfs_iunlock(parent, lock_mode); | |
93 | if (error) | |
94 | return error; | |
95 | ||
96 | /* | |
97 | * Iterate the parent dir to confirm that there is | |
98 | * exactly one entry pointing back to the inode being | |
99 | * scanned. | |
100 | */ | |
101 | bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, | |
102 | parent->i_d.di_size); | |
103 | oldpos = 0; | |
104 | while (true) { | |
105 | error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); | |
106 | if (error) | |
107 | goto out; | |
108 | if (oldpos == spc.dc.pos) | |
109 | break; | |
110 | oldpos = spc.dc.pos; | |
111 | } | |
112 | *nlink = spc.nlink; | |
113 | out: | |
114 | return error; | |
115 | } | |
116 | ||
117 | /* | |
118 | * Given the inode number of the alleged parent of the inode being | |
119 | * scrubbed, try to validate that the parent has exactly one directory | |
120 | * entry pointing back to the inode being scrubbed. | |
121 | */ | |
122 | STATIC int | |
123 | xfs_scrub_parent_validate( | |
124 | struct xfs_scrub_context *sc, | |
125 | xfs_ino_t dnum, | |
126 | bool *try_again) | |
127 | { | |
128 | struct xfs_mount *mp = sc->mp; | |
129 | struct xfs_inode *dp = NULL; | |
130 | xfs_nlink_t expected_nlink; | |
131 | xfs_nlink_t nlink; | |
72f76f73 | 132 | int error = 0; |
0f28b257 DW |
133 | |
134 | *try_again = false; | |
135 | ||
8bc763c2 DW |
136 | if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) |
137 | goto out; | |
138 | ||
0f28b257 DW |
139 | /* '..' must not point to ourselves. */ |
140 | if (sc->ip->i_ino == dnum) { | |
141 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
142 | goto out; | |
143 | } | |
144 | ||
145 | /* | |
146 | * If we're an unlinked directory, the parent /won't/ have a link | |
147 | * to us. Otherwise, it should have one link. | |
148 | */ | |
149 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
150 | ||
151 | /* | |
152 | * Grab this parent inode. We release the inode before we | |
153 | * cancel the scrub transaction. Since we're don't know a | |
154 | * priori that releasing the inode won't trigger eofblocks | |
155 | * cleanup (which allocates what would be a nested transaction) | |
156 | * if the parent pointer erroneously points to a file, we | |
157 | * can't use DONTCACHE here because DONTCACHE inodes can trigger | |
158 | * immediate inactive cleanup of the inode. | |
5927268f DW |
159 | * |
160 | * If _iget returns -EINVAL then the parent inode number is garbage | |
161 | * and the directory is corrupt. If the _iget returns -EFSCORRUPTED | |
162 | * or -EFSBADCRC then the parent is corrupt which is a cross | |
163 | * referencing error. Any other error is an operational error. | |
0f28b257 | 164 | */ |
5927268f DW |
165 | error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); |
166 | if (error == -EINVAL) { | |
167 | error = -EFSCORRUPTED; | |
168 | xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); | |
169 | goto out; | |
170 | } | |
9a7e2695 | 171 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 | 172 | goto out; |
46c59736 | 173 | if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { |
0f28b257 DW |
174 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
175 | goto out_rele; | |
176 | } | |
177 | ||
178 | /* | |
179 | * We prefer to keep the inode locked while we lock and search | |
180 | * its alleged parent for a forward reference. If we can grab | |
181 | * the iolock, validate the pointers and we're done. We must | |
182 | * use nowait here to avoid an ABBA deadlock on the parent and | |
183 | * the child inodes. | |
184 | */ | |
185 | if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { | |
186 | error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); | |
9a7e2695 | 187 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, |
0f28b257 DW |
188 | &error)) |
189 | goto out_unlock; | |
190 | if (nlink != expected_nlink) | |
191 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
192 | goto out_unlock; | |
193 | } | |
194 | ||
195 | /* | |
196 | * The game changes if we get here. We failed to lock the parent, | |
197 | * so we're going to try to verify both pointers while only holding | |
198 | * one lock so as to avoid deadlocking with something that's actually | |
199 | * trying to traverse down the directory tree. | |
200 | */ | |
201 | xfs_iunlock(sc->ip, sc->ilock_flags); | |
202 | sc->ilock_flags = 0; | |
ddd10c2f DW |
203 | error = xfs_scrub_ilock_inverted(dp, XFS_IOLOCK_SHARED); |
204 | if (error) | |
205 | goto out_rele; | |
0f28b257 DW |
206 | |
207 | /* Go looking for our dentry. */ | |
208 | error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); | |
9a7e2695 | 209 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
210 | goto out_unlock; |
211 | ||
212 | /* Drop the parent lock, relock this inode. */ | |
213 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
ddd10c2f DW |
214 | error = xfs_scrub_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL); |
215 | if (error) | |
216 | goto out_rele; | |
0f28b257 | 217 | sc->ilock_flags = XFS_IOLOCK_EXCL; |
0f28b257 DW |
218 | |
219 | /* | |
220 | * If we're an unlinked directory, the parent /won't/ have a link | |
221 | * to us. Otherwise, it should have one link. We have to re-set | |
222 | * it here because we dropped the lock on sc->ip. | |
223 | */ | |
224 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
225 | ||
226 | /* Look up '..' to see if the inode changed. */ | |
227 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
228 | if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
229 | goto out_rele; | |
230 | ||
231 | /* Drat, parent changed. Try again! */ | |
232 | if (dnum != dp->i_ino) { | |
233 | iput(VFS_I(dp)); | |
234 | *try_again = true; | |
235 | return 0; | |
236 | } | |
237 | iput(VFS_I(dp)); | |
238 | ||
239 | /* | |
240 | * '..' didn't change, so check that there was only one entry | |
241 | * for us in the parent. | |
242 | */ | |
243 | if (nlink != expected_nlink) | |
244 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
245 | return error; | |
246 | ||
247 | out_unlock: | |
248 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
249 | out_rele: | |
250 | iput(VFS_I(dp)); | |
251 | out: | |
252 | return error; | |
253 | } | |
254 | ||
255 | /* Scrub a parent pointer. */ | |
256 | int | |
257 | xfs_scrub_parent( | |
258 | struct xfs_scrub_context *sc) | |
259 | { | |
260 | struct xfs_mount *mp = sc->mp; | |
261 | xfs_ino_t dnum; | |
262 | bool try_again; | |
263 | int tries = 0; | |
72f76f73 | 264 | int error = 0; |
0f28b257 DW |
265 | |
266 | /* | |
267 | * If we're a directory, check that the '..' link points up to | |
268 | * a directory that has one entry pointing to us. | |
269 | */ | |
270 | if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) | |
271 | return -ENOENT; | |
272 | ||
273 | /* We're not a special inode, are we? */ | |
274 | if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { | |
275 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
276 | goto out; | |
277 | } | |
278 | ||
279 | /* | |
280 | * The VFS grabs a read or write lock via i_rwsem before it reads | |
281 | * or writes to a directory. If we've gotten this far we've | |
282 | * already obtained IOLOCK_EXCL, which (since 4.10) is the same as | |
283 | * getting a write lock on i_rwsem. Therefore, it is safe for us | |
284 | * to drop the ILOCK here in order to do directory lookups. | |
285 | */ | |
286 | sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
287 | xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
288 | ||
289 | /* Look up '..' */ | |
290 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
291 | if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
292 | goto out; | |
293 | if (!xfs_verify_dir_ino(mp, dnum)) { | |
294 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
295 | goto out; | |
296 | } | |
297 | ||
298 | /* Is this the root dir? Then '..' must point to itself. */ | |
299 | if (sc->ip == mp->m_rootip) { | |
300 | if (sc->ip->i_ino != mp->m_sb.sb_rootino || | |
301 | sc->ip->i_ino != dnum) | |
302 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
303 | goto out; | |
304 | } | |
305 | ||
306 | do { | |
307 | error = xfs_scrub_parent_validate(sc, dnum, &try_again); | |
308 | if (error) | |
309 | goto out; | |
310 | } while (try_again && ++tries < 20); | |
311 | ||
312 | /* | |
313 | * We gave it our best shot but failed, so mark this scrub | |
314 | * incomplete. Userspace can decide if it wants to try again. | |
315 | */ | |
316 | if (try_again && tries == 20) | |
317 | xfs_scrub_set_incomplete(sc); | |
318 | out: | |
ddd10c2f DW |
319 | /* |
320 | * If we failed to lock the parent inode even after a retry, just mark | |
321 | * this scrub incomplete and return. | |
322 | */ | |
323 | if (sc->try_harder && error == -EDEADLOCK) { | |
324 | error = 0; | |
325 | xfs_scrub_set_incomplete(sc); | |
326 | } | |
0f28b257 DW |
327 | return error; |
328 | } |