Commit | Line | Data |
---|---|---|
0f28b257 DW |
1 | /* |
2 | * Copyright (C) 2017 Oracle. All Rights Reserved. | |
3 | * | |
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it would be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write the Free Software Foundation, | |
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #include "xfs.h" | |
21 | #include "xfs_fs.h" | |
22 | #include "xfs_shared.h" | |
23 | #include "xfs_format.h" | |
24 | #include "xfs_trans_resv.h" | |
25 | #include "xfs_mount.h" | |
26 | #include "xfs_defer.h" | |
27 | #include "xfs_btree.h" | |
28 | #include "xfs_bit.h" | |
29 | #include "xfs_log_format.h" | |
30 | #include "xfs_trans.h" | |
31 | #include "xfs_sb.h" | |
32 | #include "xfs_inode.h" | |
33 | #include "xfs_icache.h" | |
34 | #include "xfs_dir2.h" | |
35 | #include "xfs_dir2_priv.h" | |
36 | #include "xfs_ialloc.h" | |
37 | #include "scrub/xfs_scrub.h" | |
38 | #include "scrub/scrub.h" | |
39 | #include "scrub/common.h" | |
40 | #include "scrub/trace.h" | |
41 | ||
42 | /* Set us up to scrub parents. */ | |
43 | int | |
44 | xfs_scrub_setup_parent( | |
45 | struct xfs_scrub_context *sc, | |
46 | struct xfs_inode *ip) | |
47 | { | |
48 | return xfs_scrub_setup_inode_contents(sc, ip, 0); | |
49 | } | |
50 | ||
51 | /* Parent pointers */ | |
52 | ||
53 | /* Look for an entry in a parent pointing to this inode. */ | |
54 | ||
55 | struct xfs_scrub_parent_ctx { | |
56 | struct dir_context dc; | |
57 | xfs_ino_t ino; | |
58 | xfs_nlink_t nlink; | |
59 | }; | |
60 | ||
61 | /* Look for a single entry in a directory pointing to an inode. */ | |
62 | STATIC int | |
63 | xfs_scrub_parent_actor( | |
64 | struct dir_context *dc, | |
65 | const char *name, | |
66 | int namelen, | |
67 | loff_t pos, | |
68 | u64 ino, | |
69 | unsigned type) | |
70 | { | |
71 | struct xfs_scrub_parent_ctx *spc; | |
72 | ||
73 | spc = container_of(dc, struct xfs_scrub_parent_ctx, dc); | |
74 | if (spc->ino == ino) | |
75 | spc->nlink++; | |
76 | return 0; | |
77 | } | |
78 | ||
79 | /* Count the number of dentries in the parent dir that point to this inode. */ | |
80 | STATIC int | |
81 | xfs_scrub_parent_count_parent_dentries( | |
82 | struct xfs_scrub_context *sc, | |
83 | struct xfs_inode *parent, | |
84 | xfs_nlink_t *nlink) | |
85 | { | |
86 | struct xfs_scrub_parent_ctx spc = { | |
87 | .dc.actor = xfs_scrub_parent_actor, | |
88 | .dc.pos = 0, | |
89 | .ino = sc->ip->i_ino, | |
90 | .nlink = 0, | |
91 | }; | |
92 | size_t bufsize; | |
93 | loff_t oldpos; | |
94 | uint lock_mode; | |
95 | int error = 0; | |
96 | ||
97 | /* | |
98 | * If there are any blocks, read-ahead block 0 as we're almost | |
99 | * certain to have the next operation be a read there. This is | |
100 | * how we guarantee that the parent's extent map has been loaded, | |
101 | * if there is one. | |
102 | */ | |
103 | lock_mode = xfs_ilock_data_map_shared(parent); | |
104 | if (parent->i_d.di_nextents > 0) | |
105 | error = xfs_dir3_data_readahead(parent, 0, -1); | |
106 | xfs_iunlock(parent, lock_mode); | |
107 | if (error) | |
108 | return error; | |
109 | ||
110 | /* | |
111 | * Iterate the parent dir to confirm that there is | |
112 | * exactly one entry pointing back to the inode being | |
113 | * scanned. | |
114 | */ | |
115 | bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, | |
116 | parent->i_d.di_size); | |
117 | oldpos = 0; | |
118 | while (true) { | |
119 | error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize); | |
120 | if (error) | |
121 | goto out; | |
122 | if (oldpos == spc.dc.pos) | |
123 | break; | |
124 | oldpos = spc.dc.pos; | |
125 | } | |
126 | *nlink = spc.nlink; | |
127 | out: | |
128 | return error; | |
129 | } | |
130 | ||
131 | /* | |
132 | * Given the inode number of the alleged parent of the inode being | |
133 | * scrubbed, try to validate that the parent has exactly one directory | |
134 | * entry pointing back to the inode being scrubbed. | |
135 | */ | |
136 | STATIC int | |
137 | xfs_scrub_parent_validate( | |
138 | struct xfs_scrub_context *sc, | |
139 | xfs_ino_t dnum, | |
140 | bool *try_again) | |
141 | { | |
142 | struct xfs_mount *mp = sc->mp; | |
143 | struct xfs_inode *dp = NULL; | |
144 | xfs_nlink_t expected_nlink; | |
145 | xfs_nlink_t nlink; | |
72f76f73 | 146 | int error = 0; |
0f28b257 DW |
147 | |
148 | *try_again = false; | |
149 | ||
150 | /* '..' must not point to ourselves. */ | |
151 | if (sc->ip->i_ino == dnum) { | |
152 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
153 | goto out; | |
154 | } | |
155 | ||
156 | /* | |
157 | * If we're an unlinked directory, the parent /won't/ have a link | |
158 | * to us. Otherwise, it should have one link. | |
159 | */ | |
160 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
161 | ||
162 | /* | |
163 | * Grab this parent inode. We release the inode before we | |
164 | * cancel the scrub transaction. Since we're don't know a | |
165 | * priori that releasing the inode won't trigger eofblocks | |
166 | * cleanup (which allocates what would be a nested transaction) | |
167 | * if the parent pointer erroneously points to a file, we | |
168 | * can't use DONTCACHE here because DONTCACHE inodes can trigger | |
169 | * immediate inactive cleanup of the inode. | |
5927268f DW |
170 | * |
171 | * If _iget returns -EINVAL then the parent inode number is garbage | |
172 | * and the directory is corrupt. If the _iget returns -EFSCORRUPTED | |
173 | * or -EFSBADCRC then the parent is corrupt which is a cross | |
174 | * referencing error. Any other error is an operational error. | |
0f28b257 | 175 | */ |
5927268f DW |
176 | error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); |
177 | if (error == -EINVAL) { | |
178 | error = -EFSCORRUPTED; | |
179 | xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); | |
180 | goto out; | |
181 | } | |
9a7e2695 | 182 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 | 183 | goto out; |
46c59736 | 184 | if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) { |
0f28b257 DW |
185 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); |
186 | goto out_rele; | |
187 | } | |
188 | ||
189 | /* | |
190 | * We prefer to keep the inode locked while we lock and search | |
191 | * its alleged parent for a forward reference. If we can grab | |
192 | * the iolock, validate the pointers and we're done. We must | |
193 | * use nowait here to avoid an ABBA deadlock on the parent and | |
194 | * the child inodes. | |
195 | */ | |
196 | if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) { | |
197 | error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); | |
9a7e2695 | 198 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, |
0f28b257 DW |
199 | &error)) |
200 | goto out_unlock; | |
201 | if (nlink != expected_nlink) | |
202 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
203 | goto out_unlock; | |
204 | } | |
205 | ||
206 | /* | |
207 | * The game changes if we get here. We failed to lock the parent, | |
208 | * so we're going to try to verify both pointers while only holding | |
209 | * one lock so as to avoid deadlocking with something that's actually | |
210 | * trying to traverse down the directory tree. | |
211 | */ | |
212 | xfs_iunlock(sc->ip, sc->ilock_flags); | |
213 | sc->ilock_flags = 0; | |
214 | xfs_ilock(dp, XFS_IOLOCK_SHARED); | |
215 | ||
216 | /* Go looking for our dentry. */ | |
217 | error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink); | |
9a7e2695 | 218 | if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) |
0f28b257 DW |
219 | goto out_unlock; |
220 | ||
221 | /* Drop the parent lock, relock this inode. */ | |
222 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
223 | sc->ilock_flags = XFS_IOLOCK_EXCL; | |
224 | xfs_ilock(sc->ip, sc->ilock_flags); | |
225 | ||
226 | /* | |
227 | * If we're an unlinked directory, the parent /won't/ have a link | |
228 | * to us. Otherwise, it should have one link. We have to re-set | |
229 | * it here because we dropped the lock on sc->ip. | |
230 | */ | |
231 | expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; | |
232 | ||
233 | /* Look up '..' to see if the inode changed. */ | |
234 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
235 | if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
236 | goto out_rele; | |
237 | ||
238 | /* Drat, parent changed. Try again! */ | |
239 | if (dnum != dp->i_ino) { | |
240 | iput(VFS_I(dp)); | |
241 | *try_again = true; | |
242 | return 0; | |
243 | } | |
244 | iput(VFS_I(dp)); | |
245 | ||
246 | /* | |
247 | * '..' didn't change, so check that there was only one entry | |
248 | * for us in the parent. | |
249 | */ | |
250 | if (nlink != expected_nlink) | |
251 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
252 | return error; | |
253 | ||
254 | out_unlock: | |
255 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | |
256 | out_rele: | |
257 | iput(VFS_I(dp)); | |
258 | out: | |
259 | return error; | |
260 | } | |
261 | ||
262 | /* Scrub a parent pointer. */ | |
263 | int | |
264 | xfs_scrub_parent( | |
265 | struct xfs_scrub_context *sc) | |
266 | { | |
267 | struct xfs_mount *mp = sc->mp; | |
268 | xfs_ino_t dnum; | |
269 | bool try_again; | |
270 | int tries = 0; | |
72f76f73 | 271 | int error = 0; |
0f28b257 DW |
272 | |
273 | /* | |
274 | * If we're a directory, check that the '..' link points up to | |
275 | * a directory that has one entry pointing to us. | |
276 | */ | |
277 | if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) | |
278 | return -ENOENT; | |
279 | ||
280 | /* We're not a special inode, are we? */ | |
281 | if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) { | |
282 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
283 | goto out; | |
284 | } | |
285 | ||
286 | /* | |
287 | * The VFS grabs a read or write lock via i_rwsem before it reads | |
288 | * or writes to a directory. If we've gotten this far we've | |
289 | * already obtained IOLOCK_EXCL, which (since 4.10) is the same as | |
290 | * getting a write lock on i_rwsem. Therefore, it is safe for us | |
291 | * to drop the ILOCK here in order to do directory lookups. | |
292 | */ | |
293 | sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
294 | xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL); | |
295 | ||
296 | /* Look up '..' */ | |
297 | error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL); | |
298 | if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) | |
299 | goto out; | |
300 | if (!xfs_verify_dir_ino(mp, dnum)) { | |
301 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
302 | goto out; | |
303 | } | |
304 | ||
305 | /* Is this the root dir? Then '..' must point to itself. */ | |
306 | if (sc->ip == mp->m_rootip) { | |
307 | if (sc->ip->i_ino != mp->m_sb.sb_rootino || | |
308 | sc->ip->i_ino != dnum) | |
309 | xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); | |
310 | goto out; | |
311 | } | |
312 | ||
313 | do { | |
314 | error = xfs_scrub_parent_validate(sc, dnum, &try_again); | |
315 | if (error) | |
316 | goto out; | |
317 | } while (try_again && ++tries < 20); | |
318 | ||
319 | /* | |
320 | * We gave it our best shot but failed, so mark this scrub | |
321 | * incomplete. Userspace can decide if it wants to try again. | |
322 | */ | |
323 | if (try_again && tries == 20) | |
324 | xfs_scrub_set_incomplete(sc); | |
325 | out: | |
326 | return error; | |
327 | } |