Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / fs / xfs / scrub / dabtree.c
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_inode_fork.h"
34 #include "xfs_da_format.h"
35 #include "xfs_da_btree.h"
36 #include "xfs_dir2.h"
37 #include "xfs_dir2_priv.h"
38 #include "xfs_attr_leaf.h"
39 #include "scrub/xfs_scrub.h"
40 #include "scrub/scrub.h"
41 #include "scrub/common.h"
42 #include "scrub/trace.h"
43 #include "scrub/dabtree.h"
44
45 /* Directory/Attribute Btree */
46
47 /*
48  * Check for da btree operation errors.  See the section about handling
49  * operational errors in common.c.
50  */
51 bool
52 xfs_scrub_da_process_error(
53         struct xfs_scrub_da_btree       *ds,
54         int                             level,
55         int                             *error)
56 {
57         struct xfs_scrub_context        *sc = ds->sc;
58
59         if (*error == 0)
60                 return true;
61
62         switch (*error) {
63         case -EDEADLOCK:
64                 /* Used to restart an op with deadlock avoidance. */
65                 trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
66                 break;
67         case -EFSBADCRC:
68         case -EFSCORRUPTED:
69                 /* Note the badness but don't abort. */
70                 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
71                 *error = 0;
72                 /* fall through */
73         default:
74                 trace_xfs_scrub_file_op_error(sc, ds->dargs.whichfork,
75                                 xfs_dir2_da_to_db(ds->dargs.geo,
76                                         ds->state->path.blk[level].blkno),
77                                 *error, __return_address);
78                 break;
79         }
80         return false;
81 }
82
83 /*
84  * Check for da btree corruption.  See the section about handling
85  * operational errors in common.c.
86  */
87 void
88 xfs_scrub_da_set_corrupt(
89         struct xfs_scrub_da_btree       *ds,
90         int                             level)
91 {
92         struct xfs_scrub_context        *sc = ds->sc;
93
94         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
95
96         trace_xfs_scrub_fblock_error(sc, ds->dargs.whichfork,
97                         xfs_dir2_da_to_db(ds->dargs.geo,
98                                 ds->state->path.blk[level].blkno),
99                         __return_address);
100 }
101
102 /* Find an entry at a certain level in a da btree. */
103 STATIC void *
104 xfs_scrub_da_btree_entry(
105         struct xfs_scrub_da_btree       *ds,
106         int                             level,
107         int                             rec)
108 {
109         char                            *ents;
110         struct xfs_da_state_blk         *blk;
111         void                            *baddr;
112
113         /* Dispatch the entry finding function. */
114         blk = &ds->state->path.blk[level];
115         baddr = blk->bp->b_addr;
116         switch (blk->magic) {
117         case XFS_ATTR_LEAF_MAGIC:
118         case XFS_ATTR3_LEAF_MAGIC:
119                 ents = (char *)xfs_attr3_leaf_entryp(baddr);
120                 return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
121         case XFS_DIR2_LEAFN_MAGIC:
122         case XFS_DIR3_LEAFN_MAGIC:
123                 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
124                 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
125         case XFS_DIR2_LEAF1_MAGIC:
126         case XFS_DIR3_LEAF1_MAGIC:
127                 ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
128                 return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
129         case XFS_DA_NODE_MAGIC:
130         case XFS_DA3_NODE_MAGIC:
131                 ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
132                 return ents + (rec * sizeof(struct xfs_da_node_entry));
133         }
134
135         return NULL;
136 }
137
138 /* Scrub a da btree hash (key). */
139 int
140 xfs_scrub_da_btree_hash(
141         struct xfs_scrub_da_btree       *ds,
142         int                             level,
143         __be32                          *hashp)
144 {
145         struct xfs_da_state_blk         *blks;
146         struct xfs_da_node_entry        *entry;
147         xfs_dahash_t                    hash;
148         xfs_dahash_t                    parent_hash;
149
150         /* Is this hash in order? */
151         hash = be32_to_cpu(*hashp);
152         if (hash < ds->hashes[level])
153                 xfs_scrub_da_set_corrupt(ds, level);
154         ds->hashes[level] = hash;
155
156         if (level == 0)
157                 return 0;
158
159         /* Is this hash no larger than the parent hash? */
160         blks = ds->state->path.blk;
161         entry = xfs_scrub_da_btree_entry(ds, level - 1, blks[level - 1].index);
162         parent_hash = be32_to_cpu(entry->hashval);
163         if (parent_hash < hash)
164                 xfs_scrub_da_set_corrupt(ds, level);
165
166         return 0;
167 }
168
169 /*
170  * Check a da btree pointer.  Returns true if it's ok to use this
171  * pointer.
172  */
173 STATIC bool
174 xfs_scrub_da_btree_ptr_ok(
175         struct xfs_scrub_da_btree       *ds,
176         int                             level,
177         xfs_dablk_t                     blkno)
178 {
179         if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
180                 xfs_scrub_da_set_corrupt(ds, level);
181                 return false;
182         }
183
184         return true;
185 }
186
187 /*
188  * The da btree scrubber can handle leaf1 blocks as a degenerate
189  * form of leafn blocks.  Since the regular da code doesn't handle
190  * leaf1, we must multiplex the verifiers.
191  */
192 static void
193 xfs_scrub_da_btree_read_verify(
194         struct xfs_buf          *bp)
195 {
196         struct xfs_da_blkinfo   *info = bp->b_addr;
197
198         switch (be16_to_cpu(info->magic)) {
199         case XFS_DIR2_LEAF1_MAGIC:
200         case XFS_DIR3_LEAF1_MAGIC:
201                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
202                 bp->b_ops->verify_read(bp);
203                 return;
204         default:
205                 /*
206                  * xfs_da3_node_buf_ops already know how to handle
207                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
208                  */
209                 bp->b_ops = &xfs_da3_node_buf_ops;
210                 bp->b_ops->verify_read(bp);
211                 return;
212         }
213 }
214 static void
215 xfs_scrub_da_btree_write_verify(
216         struct xfs_buf          *bp)
217 {
218         struct xfs_da_blkinfo   *info = bp->b_addr;
219
220         switch (be16_to_cpu(info->magic)) {
221         case XFS_DIR2_LEAF1_MAGIC:
222         case XFS_DIR3_LEAF1_MAGIC:
223                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
224                 bp->b_ops->verify_write(bp);
225                 return;
226         default:
227                 /*
228                  * xfs_da3_node_buf_ops already know how to handle
229                  * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
230                  */
231                 bp->b_ops = &xfs_da3_node_buf_ops;
232                 bp->b_ops->verify_write(bp);
233                 return;
234         }
235 }
236 static void *
237 xfs_scrub_da_btree_verify(
238         struct xfs_buf          *bp)
239 {
240         struct xfs_da_blkinfo   *info = bp->b_addr;
241
242         switch (be16_to_cpu(info->magic)) {
243         case XFS_DIR2_LEAF1_MAGIC:
244         case XFS_DIR3_LEAF1_MAGIC:
245                 bp->b_ops = &xfs_dir3_leaf1_buf_ops;
246                 return bp->b_ops->verify_struct(bp);
247         default:
248                 bp->b_ops = &xfs_da3_node_buf_ops;
249                 return bp->b_ops->verify_struct(bp);
250         }
251 }
252
253 static const struct xfs_buf_ops xfs_scrub_da_btree_buf_ops = {
254         .name = "xfs_scrub_da_btree",
255         .verify_read = xfs_scrub_da_btree_read_verify,
256         .verify_write = xfs_scrub_da_btree_write_verify,
257         .verify_struct = xfs_scrub_da_btree_verify,
258 };
259
260 /* Check a block's sibling. */
261 STATIC int
262 xfs_scrub_da_btree_block_check_sibling(
263         struct xfs_scrub_da_btree       *ds,
264         int                             level,
265         int                             direction,
266         xfs_dablk_t                     sibling)
267 {
268         int                             retval;
269         int                             error;
270
271         memcpy(&ds->state->altpath, &ds->state->path,
272                         sizeof(ds->state->altpath));
273
274         /*
275          * If the pointer is null, we shouldn't be able to move the upper
276          * level pointer anywhere.
277          */
278         if (sibling == 0) {
279                 error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
280                                 direction, false, &retval);
281                 if (error == 0 && retval == 0)
282                         xfs_scrub_da_set_corrupt(ds, level);
283                 error = 0;
284                 goto out;
285         }
286
287         /* Move the alternate cursor one block in the direction given. */
288         error = xfs_da3_path_shift(ds->state, &ds->state->altpath,
289                         direction, false, &retval);
290         if (!xfs_scrub_da_process_error(ds, level, &error))
291                 return error;
292         if (retval) {
293                 xfs_scrub_da_set_corrupt(ds, level);
294                 return error;
295         }
296         if (ds->state->altpath.blk[level].bp)
297                 xfs_scrub_buffer_recheck(ds->sc,
298                                 ds->state->altpath.blk[level].bp);
299
300         /* Compare upper level pointer to sibling pointer. */
301         if (ds->state->altpath.blk[level].blkno != sibling)
302                 xfs_scrub_da_set_corrupt(ds, level);
303         xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
304 out:
305         return error;
306 }
307
308 /* Check a block's sibling pointers. */
309 STATIC int
310 xfs_scrub_da_btree_block_check_siblings(
311         struct xfs_scrub_da_btree       *ds,
312         int                             level,
313         struct xfs_da_blkinfo           *hdr)
314 {
315         xfs_dablk_t                     forw;
316         xfs_dablk_t                     back;
317         int                             error = 0;
318
319         forw = be32_to_cpu(hdr->forw);
320         back = be32_to_cpu(hdr->back);
321
322         /* Top level blocks should not have sibling pointers. */
323         if (level == 0) {
324                 if (forw != 0 || back != 0)
325                         xfs_scrub_da_set_corrupt(ds, level);
326                 return 0;
327         }
328
329         /*
330          * Check back (left) and forw (right) pointers.  These functions
331          * absorb error codes for us.
332          */
333         error = xfs_scrub_da_btree_block_check_sibling(ds, level, 0, back);
334         if (error)
335                 goto out;
336         error = xfs_scrub_da_btree_block_check_sibling(ds, level, 1, forw);
337
338 out:
339         memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
340         return error;
341 }
342
343 /* Load a dir/attribute block from a btree. */
344 STATIC int
345 xfs_scrub_da_btree_block(
346         struct xfs_scrub_da_btree       *ds,
347         int                             level,
348         xfs_dablk_t                     blkno)
349 {
350         struct xfs_da_state_blk         *blk;
351         struct xfs_da_intnode           *node;
352         struct xfs_da_node_entry        *btree;
353         struct xfs_da3_blkinfo          *hdr3;
354         struct xfs_da_args              *dargs = &ds->dargs;
355         struct xfs_inode                *ip = ds->dargs.dp;
356         xfs_ino_t                       owner;
357         int                             *pmaxrecs;
358         struct xfs_da3_icnode_hdr       nodehdr;
359         int                             error = 0;
360
361         blk = &ds->state->path.blk[level];
362         ds->state->path.active = level + 1;
363
364         /* Release old block. */
365         if (blk->bp) {
366                 xfs_trans_brelse(dargs->trans, blk->bp);
367                 blk->bp = NULL;
368         }
369
370         /* Check the pointer. */
371         blk->blkno = blkno;
372         if (!xfs_scrub_da_btree_ptr_ok(ds, level, blkno))
373                 goto out_nobuf;
374
375         /* Read the buffer. */
376         error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
377                         &blk->bp, dargs->whichfork,
378                         &xfs_scrub_da_btree_buf_ops);
379         if (!xfs_scrub_da_process_error(ds, level, &error))
380                 goto out_nobuf;
381         if (blk->bp)
382                 xfs_scrub_buffer_recheck(ds->sc, blk->bp);
383
384         /*
385          * We didn't find a dir btree root block, which means that
386          * there's no LEAF1/LEAFN tree (at least not where it's supposed
387          * to be), so jump out now.
388          */
389         if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
390                         blk->bp == NULL)
391                 goto out_nobuf;
392
393         /* It's /not/ ok for attr trees not to have a da btree. */
394         if (blk->bp == NULL) {
395                 xfs_scrub_da_set_corrupt(ds, level);
396                 goto out_nobuf;
397         }
398
399         hdr3 = blk->bp->b_addr;
400         blk->magic = be16_to_cpu(hdr3->hdr.magic);
401         pmaxrecs = &ds->maxrecs[level];
402
403         /* We only started zeroing the header on v5 filesystems. */
404         if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
405                 xfs_scrub_da_set_corrupt(ds, level);
406
407         /* Check the owner. */
408         if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
409                 owner = be64_to_cpu(hdr3->owner);
410                 if (owner != ip->i_ino)
411                         xfs_scrub_da_set_corrupt(ds, level);
412         }
413
414         /* Check the siblings. */
415         error = xfs_scrub_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
416         if (error)
417                 goto out;
418
419         /* Interpret the buffer. */
420         switch (blk->magic) {
421         case XFS_ATTR_LEAF_MAGIC:
422         case XFS_ATTR3_LEAF_MAGIC:
423                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
424                                 XFS_BLFT_ATTR_LEAF_BUF);
425                 blk->magic = XFS_ATTR_LEAF_MAGIC;
426                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
427                 if (ds->tree_level != 0)
428                         xfs_scrub_da_set_corrupt(ds, level);
429                 break;
430         case XFS_DIR2_LEAFN_MAGIC:
431         case XFS_DIR3_LEAFN_MAGIC:
432                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
433                                 XFS_BLFT_DIR_LEAFN_BUF);
434                 blk->magic = XFS_DIR2_LEAFN_MAGIC;
435                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
436                 if (ds->tree_level != 0)
437                         xfs_scrub_da_set_corrupt(ds, level);
438                 break;
439         case XFS_DIR2_LEAF1_MAGIC:
440         case XFS_DIR3_LEAF1_MAGIC:
441                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
442                                 XFS_BLFT_DIR_LEAF1_BUF);
443                 blk->magic = XFS_DIR2_LEAF1_MAGIC;
444                 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
445                 if (ds->tree_level != 0)
446                         xfs_scrub_da_set_corrupt(ds, level);
447                 break;
448         case XFS_DA_NODE_MAGIC:
449         case XFS_DA3_NODE_MAGIC:
450                 xfs_trans_buf_set_type(dargs->trans, blk->bp,
451                                 XFS_BLFT_DA_NODE_BUF);
452                 blk->magic = XFS_DA_NODE_MAGIC;
453                 node = blk->bp->b_addr;
454                 ip->d_ops->node_hdr_from_disk(&nodehdr, node);
455                 btree = ip->d_ops->node_tree_p(node);
456                 *pmaxrecs = nodehdr.count;
457                 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
458                 if (level == 0) {
459                         if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
460                                 xfs_scrub_da_set_corrupt(ds, level);
461                                 goto out_freebp;
462                         }
463                         ds->tree_level = nodehdr.level;
464                 } else {
465                         if (ds->tree_level != nodehdr.level) {
466                                 xfs_scrub_da_set_corrupt(ds, level);
467                                 goto out_freebp;
468                         }
469                 }
470
471                 /* XXX: Check hdr3.pad32 once we know how to fix it. */
472                 break;
473         default:
474                 xfs_scrub_da_set_corrupt(ds, level);
475                 goto out_freebp;
476         }
477
478 out:
479         return error;
480 out_freebp:
481         xfs_trans_brelse(dargs->trans, blk->bp);
482         blk->bp = NULL;
483 out_nobuf:
484         blk->blkno = 0;
485         return error;
486 }
487
488 /* Visit all nodes and leaves of a da btree. */
489 int
490 xfs_scrub_da_btree(
491         struct xfs_scrub_context        *sc,
492         int                             whichfork,
493         xfs_scrub_da_btree_rec_fn       scrub_fn,
494         void                            *private)
495 {
496         struct xfs_scrub_da_btree       ds = {};
497         struct xfs_mount                *mp = sc->mp;
498         struct xfs_da_state_blk         *blks;
499         struct xfs_da_node_entry        *key;
500         void                            *rec;
501         xfs_dablk_t                     blkno;
502         int                             level;
503         int                             error;
504
505         /* Skip short format data structures; no btree to scan. */
506         if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
507             XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
508                 return 0;
509
510         /* Set up initial da state. */
511         ds.dargs.dp = sc->ip;
512         ds.dargs.whichfork = whichfork;
513         ds.dargs.trans = sc->tp;
514         ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
515         ds.state = xfs_da_state_alloc();
516         ds.state->args = &ds.dargs;
517         ds.state->mp = mp;
518         ds.sc = sc;
519         ds.private = private;
520         if (whichfork == XFS_ATTR_FORK) {
521                 ds.dargs.geo = mp->m_attr_geo;
522                 ds.lowest = 0;
523                 ds.highest = 0;
524         } else {
525                 ds.dargs.geo = mp->m_dir_geo;
526                 ds.lowest = ds.dargs.geo->leafblk;
527                 ds.highest = ds.dargs.geo->freeblk;
528         }
529         blkno = ds.lowest;
530         level = 0;
531
532         /* Find the root of the da tree, if present. */
533         blks = ds.state->path.blk;
534         error = xfs_scrub_da_btree_block(&ds, level, blkno);
535         if (error)
536                 goto out_state;
537         /*
538          * We didn't find a block at ds.lowest, which means that there's
539          * no LEAF1/LEAFN tree (at least not where it's supposed to be),
540          * so jump out now.
541          */
542         if (blks[level].bp == NULL)
543                 goto out_state;
544
545         blks[level].index = 0;
546         while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
547                 /* Handle leaf block. */
548                 if (blks[level].magic != XFS_DA_NODE_MAGIC) {
549                         /* End of leaf, pop back towards the root. */
550                         if (blks[level].index >= ds.maxrecs[level]) {
551                                 if (level > 0)
552                                         blks[level - 1].index++;
553                                 ds.tree_level++;
554                                 level--;
555                                 continue;
556                         }
557
558                         /* Dispatch record scrubbing. */
559                         rec = xfs_scrub_da_btree_entry(&ds, level,
560                                         blks[level].index);
561                         error = scrub_fn(&ds, level, rec);
562                         if (error)
563                                 break;
564                         if (xfs_scrub_should_terminate(sc, &error) ||
565                             (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
566                                 break;
567
568                         blks[level].index++;
569                         continue;
570                 }
571
572
573                 /* End of node, pop back towards the root. */
574                 if (blks[level].index >= ds.maxrecs[level]) {
575                         if (level > 0)
576                                 blks[level - 1].index++;
577                         ds.tree_level++;
578                         level--;
579                         continue;
580                 }
581
582                 /* Hashes in order for scrub? */
583                 key = xfs_scrub_da_btree_entry(&ds, level, blks[level].index);
584                 error = xfs_scrub_da_btree_hash(&ds, level, &key->hashval);
585                 if (error)
586                         goto out;
587
588                 /* Drill another level deeper. */
589                 blkno = be32_to_cpu(key->before);
590                 level++;
591                 ds.tree_level--;
592                 error = xfs_scrub_da_btree_block(&ds, level, blkno);
593                 if (error)
594                         goto out;
595                 if (blks[level].bp == NULL)
596                         goto out;
597
598                 blks[level].index = 0;
599         }
600
601 out:
602         /* Release all the buffers we're tracking. */
603         for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
604                 if (blks[level].bp == NULL)
605                         continue;
606                 xfs_trans_brelse(sc->tp, blks[level].bp);
607                 blks[level].bp = NULL;
608         }
609
610 out_state:
611         xfs_da_state_free(ds.state);
612         return error;
613 }