| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "bcachefs.h" |
| 3 | #include "btree_cache.h" |
| 4 | #include "btree_iter.h" |
| 5 | #include "error.h" |
| 6 | #include "journal.h" |
| 7 | #include "namei.h" |
| 8 | #include "recovery_passes.h" |
| 9 | #include "super.h" |
| 10 | #include "thread_with_file.h" |
| 11 | |
| 12 | #define FSCK_ERR_RATELIMIT_NR 10 |
| 13 | |
| 14 | void __bch2_log_msg_start(const char *fs_or_dev_name, struct printbuf *out) |
| 15 | { |
| 16 | printbuf_indent_add_nextline(out, 2); |
| 17 | |
| 18 | #ifdef BCACHEFS_LOG_PREFIX |
| 19 | prt_printf(out, "bcachefs (%s): ", fs_or_dev_name); |
| 20 | #endif |
| 21 | } |
| 22 | |
| 23 | bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) |
| 24 | { |
| 25 | set_bit(BCH_FS_error, &c->flags); |
| 26 | |
| 27 | switch (c->opts.errors) { |
| 28 | case BCH_ON_ERROR_continue: |
| 29 | return false; |
| 30 | case BCH_ON_ERROR_fix_safe: |
| 31 | case BCH_ON_ERROR_ro: |
| 32 | bch2_fs_emergency_read_only2(c, out); |
| 33 | return true; |
| 34 | case BCH_ON_ERROR_panic: |
| 35 | bch2_print_str(c, KERN_ERR, out->buf); |
| 36 | panic(bch2_fmt(c, "panic after error")); |
| 37 | return true; |
| 38 | default: |
| 39 | BUG(); |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | bool bch2_inconsistent_error(struct bch_fs *c) |
| 44 | { |
| 45 | struct printbuf buf = PRINTBUF; |
| 46 | buf.atomic++; |
| 47 | |
| 48 | printbuf_indent_add_nextline(&buf, 2); |
| 49 | |
| 50 | bool ret = __bch2_inconsistent_error(c, &buf); |
| 51 | if (ret) |
| 52 | bch_err(c, "%s", buf.buf); |
| 53 | printbuf_exit(&buf); |
| 54 | return ret; |
| 55 | } |
| 56 | |
| 57 | __printf(3, 0) |
| 58 | static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, |
| 59 | const char *fmt, va_list args) |
| 60 | { |
| 61 | struct printbuf buf = PRINTBUF; |
| 62 | buf.atomic++; |
| 63 | |
| 64 | bch2_log_msg_start(c, &buf); |
| 65 | |
| 66 | prt_vprintf(&buf, fmt, args); |
| 67 | prt_newline(&buf); |
| 68 | |
| 69 | if (trans) |
| 70 | bch2_trans_updates_to_text(&buf, trans); |
| 71 | bool ret = __bch2_inconsistent_error(c, &buf); |
| 72 | bch2_print_str(c, KERN_ERR, buf.buf); |
| 73 | |
| 74 | printbuf_exit(&buf); |
| 75 | return ret; |
| 76 | } |
| 77 | |
| 78 | bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...) |
| 79 | { |
| 80 | va_list args; |
| 81 | va_start(args, fmt); |
| 82 | bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args); |
| 83 | va_end(args); |
| 84 | return ret; |
| 85 | } |
| 86 | |
| 87 | bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...) |
| 88 | { |
| 89 | va_list args; |
| 90 | va_start(args, fmt); |
| 91 | bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args); |
| 92 | va_end(args); |
| 93 | return ret; |
| 94 | } |
| 95 | |
| 96 | int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) |
| 97 | { |
| 98 | prt_printf(out, "btree topology error: "); |
| 99 | |
| 100 | set_bit(BCH_FS_topology_error, &c->flags); |
| 101 | if (!test_bit(BCH_FS_in_recovery, &c->flags)) { |
| 102 | __bch2_inconsistent_error(c, out); |
| 103 | return bch_err_throw(c, btree_need_topology_repair); |
| 104 | } else { |
| 105 | return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?: |
| 106 | bch_err_throw(c, btree_need_topology_repair); |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) |
| 111 | { |
| 112 | struct printbuf buf = PRINTBUF; |
| 113 | |
| 114 | bch2_log_msg_start(c, &buf); |
| 115 | |
| 116 | va_list args; |
| 117 | va_start(args, fmt); |
| 118 | prt_vprintf(&buf, fmt, args); |
| 119 | va_end(args); |
| 120 | |
| 121 | int ret = __bch2_topology_error(c, &buf); |
| 122 | bch2_print_str(c, KERN_ERR, buf.buf); |
| 123 | |
| 124 | printbuf_exit(&buf); |
| 125 | return ret; |
| 126 | } |
| 127 | |
| 128 | void bch2_fatal_error(struct bch_fs *c) |
| 129 | { |
| 130 | if (bch2_fs_emergency_read_only(c)) |
| 131 | bch_err(c, "fatal error - emergency read only"); |
| 132 | } |
| 133 | |
| 134 | void bch2_io_error_work(struct work_struct *work) |
| 135 | { |
| 136 | struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); |
| 137 | struct bch_fs *c = ca->fs; |
| 138 | |
| 139 | /* XXX: if it's reads or checksums that are failing, set it to failed */ |
| 140 | |
| 141 | down_write(&c->state_lock); |
| 142 | unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); |
| 143 | |
| 144 | if (write_errors_start && |
| 145 | time_after(jiffies, |
| 146 | write_errors_start + c->opts.write_error_timeout * HZ)) { |
| 147 | if (ca->mi.state >= BCH_MEMBER_STATE_ro) |
| 148 | goto out; |
| 149 | |
| 150 | bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, |
| 151 | BCH_FORCE_IF_DEGRADED); |
| 152 | struct printbuf buf = PRINTBUF; |
| 153 | __bch2_log_msg_start(ca->name, &buf); |
| 154 | |
| 155 | prt_printf(&buf, "writes erroring for %u seconds, setting %s ro", |
| 156 | c->opts.write_error_timeout, |
| 157 | dev ? "device" : "filesystem"); |
| 158 | if (!dev) |
| 159 | bch2_fs_emergency_read_only2(c, &buf); |
| 160 | |
| 161 | bch2_print_str(c, KERN_ERR, buf.buf); |
| 162 | printbuf_exit(&buf); |
| 163 | } |
| 164 | out: |
| 165 | up_write(&c->state_lock); |
| 166 | } |
| 167 | |
| 168 | void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) |
| 169 | { |
| 170 | atomic64_inc(&ca->errors[type]); |
| 171 | |
| 172 | if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start) |
| 173 | ca->write_errors_start = jiffies; |
| 174 | |
| 175 | queue_work(system_long_wq, &ca->io_error_work); |
| 176 | } |
| 177 | |
| 178 | enum ask_yn { |
| 179 | YN_NO, |
| 180 | YN_YES, |
| 181 | YN_ALLNO, |
| 182 | YN_ALLYES, |
| 183 | }; |
| 184 | |
| 185 | static enum ask_yn parse_yn_response(char *buf) |
| 186 | { |
| 187 | buf = strim(buf); |
| 188 | |
| 189 | if (strlen(buf) == 1) |
| 190 | switch (buf[0]) { |
| 191 | case 'n': |
| 192 | return YN_NO; |
| 193 | case 'y': |
| 194 | return YN_YES; |
| 195 | case 'N': |
| 196 | return YN_ALLNO; |
| 197 | case 'Y': |
| 198 | return YN_ALLYES; |
| 199 | } |
| 200 | return -1; |
| 201 | } |
| 202 | |
| 203 | #ifdef __KERNEL__ |
| 204 | static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) |
| 205 | { |
| 206 | struct stdio_redirect *stdio = c->stdio; |
| 207 | |
| 208 | if (c->stdio_filter && c->stdio_filter != current) |
| 209 | stdio = NULL; |
| 210 | |
| 211 | if (!stdio) |
| 212 | return YN_NO; |
| 213 | |
| 214 | if (trans) |
| 215 | bch2_trans_unlock(trans); |
| 216 | |
| 217 | unsigned long unlock_long_at = trans ? jiffies + HZ * 2 : 0; |
| 218 | darray_char line = {}; |
| 219 | int ret; |
| 220 | |
| 221 | do { |
| 222 | unsigned long t; |
| 223 | bch2_print(c, " (y,n, or Y,N for all errors of this type) "); |
| 224 | rewait: |
| 225 | t = unlock_long_at |
| 226 | ? max_t(long, unlock_long_at - jiffies, 0) |
| 227 | : MAX_SCHEDULE_TIMEOUT; |
| 228 | |
| 229 | int r = bch2_stdio_redirect_readline_timeout(stdio, &line, t); |
| 230 | if (r == -ETIME) { |
| 231 | bch2_trans_unlock_long(trans); |
| 232 | unlock_long_at = 0; |
| 233 | goto rewait; |
| 234 | } |
| 235 | |
| 236 | if (r < 0) { |
| 237 | ret = YN_NO; |
| 238 | break; |
| 239 | } |
| 240 | |
| 241 | darray_last(line) = '\0'; |
| 242 | } while ((ret = parse_yn_response(line.data)) < 0); |
| 243 | |
| 244 | darray_exit(&line); |
| 245 | return ret; |
| 246 | } |
| 247 | #else |
| 248 | |
| 249 | #include "tools-util.h" |
| 250 | |
| 251 | static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) |
| 252 | { |
| 253 | char *buf = NULL; |
| 254 | size_t buflen = 0; |
| 255 | int ret; |
| 256 | |
| 257 | do { |
| 258 | fputs(" (y,n, or Y,N for all errors of this type) ", stdout); |
| 259 | fflush(stdout); |
| 260 | |
| 261 | if (getline(&buf, &buflen, stdin) < 0) |
| 262 | die("error reading from standard input"); |
| 263 | } while ((ret = parse_yn_response(buf)) < 0); |
| 264 | |
| 265 | free(buf); |
| 266 | return ret; |
| 267 | } |
| 268 | |
| 269 | #endif |
| 270 | |
| 271 | static struct fsck_err_state *fsck_err_get(struct bch_fs *c, |
| 272 | enum bch_sb_error_id id) |
| 273 | { |
| 274 | struct fsck_err_state *s; |
| 275 | |
| 276 | list_for_each_entry(s, &c->fsck_error_msgs, list) |
| 277 | if (s->id == id) { |
| 278 | /* |
| 279 | * move it to the head of the list: repeated fsck errors |
| 280 | * are common |
| 281 | */ |
| 282 | list_move(&s->list, &c->fsck_error_msgs); |
| 283 | return s; |
| 284 | } |
| 285 | |
| 286 | s = kzalloc(sizeof(*s), GFP_NOFS); |
| 287 | if (!s) { |
| 288 | if (!c->fsck_alloc_msgs_err) |
| 289 | bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); |
| 290 | c->fsck_alloc_msgs_err = true; |
| 291 | return NULL; |
| 292 | } |
| 293 | |
| 294 | INIT_LIST_HEAD(&s->list); |
| 295 | s->id = id; |
| 296 | list_add(&s->list, &c->fsck_error_msgs); |
| 297 | return s; |
| 298 | } |
| 299 | |
| 300 | /* s/fix?/fixing/ s/recreate?/recreating/ */ |
| 301 | static void prt_actioning(struct printbuf *out, const char *action) |
| 302 | { |
| 303 | unsigned len = strlen(action); |
| 304 | |
| 305 | BUG_ON(action[len - 1] != '?'); |
| 306 | --len; |
| 307 | |
| 308 | if (action[len - 1] == 'e') |
| 309 | --len; |
| 310 | |
| 311 | prt_bytes(out, action, len); |
| 312 | prt_str(out, "ing"); |
| 313 | } |
| 314 | |
| 315 | static const u8 fsck_flags_extra[] = { |
| 316 | #define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, |
| 317 | BCH_SB_ERRS() |
| 318 | #undef x |
| 319 | }; |
| 320 | |
| 321 | static int do_fsck_ask_yn(struct bch_fs *c, |
| 322 | struct btree_trans *trans, |
| 323 | struct printbuf *question, |
| 324 | const char *action) |
| 325 | { |
| 326 | prt_str(question, ", "); |
| 327 | prt_str(question, action); |
| 328 | |
| 329 | if (bch2_fs_stdio_redirect(c)) |
| 330 | bch2_print(c, "%s", question->buf); |
| 331 | else |
| 332 | bch2_print_str(c, KERN_ERR, question->buf); |
| 333 | |
| 334 | int ask = bch2_fsck_ask_yn(c, trans); |
| 335 | |
| 336 | if (trans) { |
| 337 | int ret = bch2_trans_relock(trans); |
| 338 | if (ret) |
| 339 | return ret; |
| 340 | } |
| 341 | |
| 342 | return ask; |
| 343 | } |
| 344 | |
| 345 | static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, |
| 346 | enum bch_sb_error_id id, const char *msg, |
| 347 | bool *repeat, bool *print, bool *suppress) |
| 348 | { |
| 349 | bch2_sb_error_count(c, id); |
| 350 | |
| 351 | struct fsck_err_state *s = fsck_err_get(c, id); |
| 352 | if (s) { |
| 353 | /* |
| 354 | * We may be called multiple times for the same error on |
| 355 | * transaction restart - this memoizes instead of asking the user |
| 356 | * multiple times for the same error: |
| 357 | */ |
| 358 | if (s->last_msg && !strcmp(msg, s->last_msg)) { |
| 359 | *repeat = true; |
| 360 | *print = false; |
| 361 | return s; |
| 362 | } |
| 363 | |
| 364 | kfree(s->last_msg); |
| 365 | s->last_msg = kstrdup(msg, GFP_KERNEL); |
| 366 | |
| 367 | if (c->opts.ratelimit_errors && |
| 368 | s->nr >= FSCK_ERR_RATELIMIT_NR) { |
| 369 | if (s->nr == FSCK_ERR_RATELIMIT_NR) |
| 370 | *suppress = true; |
| 371 | else |
| 372 | *print = false; |
| 373 | } |
| 374 | |
| 375 | s->nr++; |
| 376 | } |
| 377 | return s; |
| 378 | } |
| 379 | |
| 380 | bool __bch2_count_fsck_err(struct bch_fs *c, |
| 381 | enum bch_sb_error_id id, struct printbuf *msg) |
| 382 | { |
| 383 | bch2_sb_error_count(c, id); |
| 384 | |
| 385 | mutex_lock(&c->fsck_error_msgs_lock); |
| 386 | bool print = true, repeat = false, suppress = false; |
| 387 | |
| 388 | count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); |
| 389 | mutex_unlock(&c->fsck_error_msgs_lock); |
| 390 | |
| 391 | if (suppress) |
| 392 | prt_printf(msg, "Ratelimiting new instances of previous error\n"); |
| 393 | |
| 394 | return print && !repeat; |
| 395 | } |
| 396 | |
| 397 | int bch2_fsck_err_opt(struct bch_fs *c, |
| 398 | enum bch_fsck_flags flags, |
| 399 | enum bch_sb_error_id err) |
| 400 | { |
| 401 | if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) |
| 402 | flags |= fsck_flags_extra[err]; |
| 403 | |
| 404 | if (test_bit(BCH_FS_in_fsck, &c->flags)) { |
| 405 | if (!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) |
| 406 | return bch_err_throw(c, fsck_repair_unimplemented); |
| 407 | |
| 408 | switch (c->opts.fix_errors) { |
| 409 | case FSCK_FIX_exit: |
| 410 | return bch_err_throw(c, fsck_errors_not_fixed); |
| 411 | case FSCK_FIX_yes: |
| 412 | if (flags & FSCK_CAN_FIX) |
| 413 | return bch_err_throw(c, fsck_fix); |
| 414 | fallthrough; |
| 415 | case FSCK_FIX_no: |
| 416 | if (flags & FSCK_CAN_IGNORE) |
| 417 | return bch_err_throw(c, fsck_ignore); |
| 418 | return bch_err_throw(c, fsck_errors_not_fixed); |
| 419 | case FSCK_FIX_ask: |
| 420 | if (flags & FSCK_AUTOFIX) |
| 421 | return bch_err_throw(c, fsck_fix); |
| 422 | return bch_err_throw(c, fsck_ask); |
| 423 | default: |
| 424 | BUG(); |
| 425 | } |
| 426 | } else { |
| 427 | if ((flags & FSCK_AUTOFIX) && |
| 428 | (c->opts.errors == BCH_ON_ERROR_continue || |
| 429 | c->opts.errors == BCH_ON_ERROR_fix_safe)) |
| 430 | return bch_err_throw(c, fsck_fix); |
| 431 | |
| 432 | if (c->opts.errors == BCH_ON_ERROR_continue && |
| 433 | (flags & FSCK_CAN_IGNORE)) |
| 434 | return bch_err_throw(c, fsck_ignore); |
| 435 | return bch_err_throw(c, fsck_errors_not_fixed); |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | int __bch2_fsck_err(struct bch_fs *c, |
| 440 | struct btree_trans *trans, |
| 441 | enum bch_fsck_flags flags, |
| 442 | enum bch_sb_error_id err, |
| 443 | const char *fmt, ...) |
| 444 | { |
| 445 | va_list args; |
| 446 | struct printbuf buf = PRINTBUF, *out = &buf; |
| 447 | int ret = 0; |
| 448 | const char *action_orig = "fix?", *action = action_orig; |
| 449 | |
| 450 | might_sleep(); |
| 451 | |
| 452 | if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) |
| 453 | flags |= fsck_flags_extra[err]; |
| 454 | |
| 455 | if (!c) |
| 456 | c = trans->c; |
| 457 | |
| 458 | /* |
| 459 | * Ugly: if there's a transaction in the current task it has to be |
| 460 | * passed in to unlock if we prompt for user input. |
| 461 | * |
| 462 | * But, plumbing a transaction and transaction restarts into |
| 463 | * bkey_validate() is problematic. |
| 464 | * |
| 465 | * So: |
| 466 | * - make all bkey errors AUTOFIX, they're simple anyways (we just |
| 467 | * delete the key) |
| 468 | * - and we don't need to warn if we're not prompting |
| 469 | */ |
| 470 | WARN_ON((flags & FSCK_CAN_FIX) && |
| 471 | !(flags & FSCK_AUTOFIX) && |
| 472 | !trans && |
| 473 | bch2_current_has_btree_trans(c)); |
| 474 | |
| 475 | if (test_bit(err, c->sb.errors_silent)) |
| 476 | return flags & FSCK_CAN_FIX |
| 477 | ? bch_err_throw(c, fsck_fix) |
| 478 | : bch_err_throw(c, fsck_ignore); |
| 479 | |
| 480 | printbuf_indent_add_nextline(out, 2); |
| 481 | |
| 482 | #ifdef BCACHEFS_LOG_PREFIX |
| 483 | if (strncmp(fmt, "bcachefs", 8)) |
| 484 | prt_printf(out, bch2_log_msg(c, "")); |
| 485 | #endif |
| 486 | |
| 487 | va_start(args, fmt); |
| 488 | prt_vprintf(out, fmt, args); |
| 489 | va_end(args); |
| 490 | |
| 491 | /* Custom fix/continue/recreate/etc.? */ |
| 492 | if (out->buf[out->pos - 1] == '?') { |
| 493 | const char *p = strrchr(out->buf, ','); |
| 494 | if (p) { |
| 495 | out->pos = p - out->buf; |
| 496 | action = kstrdup(p + 2, GFP_KERNEL); |
| 497 | if (!action) { |
| 498 | ret = -ENOMEM; |
| 499 | goto err; |
| 500 | } |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | mutex_lock(&c->fsck_error_msgs_lock); |
| 505 | bool repeat = false, print = true, suppress = false; |
| 506 | bool inconsistent = false, exiting = false; |
| 507 | struct fsck_err_state *s = |
| 508 | count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress); |
| 509 | if (repeat) { |
| 510 | ret = s->ret; |
| 511 | goto err_unlock; |
| 512 | } |
| 513 | |
| 514 | if ((flags & FSCK_AUTOFIX) && |
| 515 | (c->opts.errors == BCH_ON_ERROR_continue || |
| 516 | c->opts.errors == BCH_ON_ERROR_fix_safe)) { |
| 517 | prt_str(out, ", "); |
| 518 | if (flags & FSCK_CAN_FIX) { |
| 519 | prt_actioning(out, action); |
| 520 | ret = bch_err_throw(c, fsck_fix); |
| 521 | } else { |
| 522 | prt_str(out, ", continuing"); |
| 523 | ret = bch_err_throw(c, fsck_ignore); |
| 524 | } |
| 525 | |
| 526 | goto print; |
| 527 | } else if (!test_bit(BCH_FS_in_fsck, &c->flags)) { |
| 528 | if (c->opts.errors != BCH_ON_ERROR_continue || |
| 529 | !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { |
| 530 | prt_str_indented(out, ", shutting down\n" |
| 531 | "error not marked as autofix and not in fsck\n" |
| 532 | "run fsck, and forward to devs so error can be marked for self-healing"); |
| 533 | inconsistent = true; |
| 534 | print = true; |
| 535 | ret = bch_err_throw(c, fsck_errors_not_fixed); |
| 536 | } else if (flags & FSCK_CAN_FIX) { |
| 537 | prt_str(out, ", "); |
| 538 | prt_actioning(out, action); |
| 539 | ret = bch_err_throw(c, fsck_fix); |
| 540 | } else { |
| 541 | prt_str(out, ", continuing"); |
| 542 | ret = bch_err_throw(c, fsck_ignore); |
| 543 | } |
| 544 | } else if (c->opts.fix_errors == FSCK_FIX_exit) { |
| 545 | prt_str(out, ", exiting"); |
| 546 | ret = bch_err_throw(c, fsck_errors_not_fixed); |
| 547 | } else if (flags & FSCK_CAN_FIX) { |
| 548 | int fix = s && s->fix |
| 549 | ? s->fix |
| 550 | : c->opts.fix_errors; |
| 551 | |
| 552 | if (fix == FSCK_FIX_ask) { |
| 553 | print = false; |
| 554 | |
| 555 | ret = do_fsck_ask_yn(c, trans, out, action); |
| 556 | if (ret < 0) |
| 557 | goto err_unlock; |
| 558 | |
| 559 | if (ret >= YN_ALLNO && s) |
| 560 | s->fix = ret == YN_ALLNO |
| 561 | ? FSCK_FIX_no |
| 562 | : FSCK_FIX_yes; |
| 563 | |
| 564 | ret = ret & 1 |
| 565 | ? bch_err_throw(c, fsck_fix) |
| 566 | : bch_err_throw(c, fsck_ignore); |
| 567 | } else if (fix == FSCK_FIX_yes || |
| 568 | (c->opts.nochanges && |
| 569 | !(flags & FSCK_CAN_IGNORE))) { |
| 570 | prt_str(out, ", "); |
| 571 | prt_actioning(out, action); |
| 572 | ret = bch_err_throw(c, fsck_fix); |
| 573 | } else { |
| 574 | prt_str(out, ", not "); |
| 575 | prt_actioning(out, action); |
| 576 | ret = bch_err_throw(c, fsck_ignore); |
| 577 | } |
| 578 | } else { |
| 579 | if (flags & FSCK_CAN_IGNORE) { |
| 580 | prt_str(out, ", continuing"); |
| 581 | ret = bch_err_throw(c, fsck_ignore); |
| 582 | } else { |
| 583 | prt_str(out, " (repair unimplemented)"); |
| 584 | ret = bch_err_throw(c, fsck_repair_unimplemented); |
| 585 | } |
| 586 | } |
| 587 | |
| 588 | if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) && |
| 589 | (c->opts.fix_errors == FSCK_FIX_exit || |
| 590 | !(flags & FSCK_CAN_IGNORE))) |
| 591 | ret = bch_err_throw(c, fsck_errors_not_fixed); |
| 592 | |
| 593 | if (test_bit(BCH_FS_in_fsck, &c->flags) && |
| 594 | (!bch2_err_matches(ret, BCH_ERR_fsck_fix) && |
| 595 | !bch2_err_matches(ret, BCH_ERR_fsck_ignore))) { |
| 596 | exiting = true; |
| 597 | print = true; |
| 598 | } |
| 599 | print: |
| 600 | prt_newline(out); |
| 601 | |
| 602 | if (inconsistent) |
| 603 | __bch2_inconsistent_error(c, out); |
| 604 | else if (exiting) |
| 605 | prt_printf(out, "Unable to continue, halting\n"); |
| 606 | else if (suppress) |
| 607 | prt_printf(out, "Ratelimiting new instances of previous error\n"); |
| 608 | |
| 609 | if (print) { |
| 610 | /* possibly strip an empty line, from printbuf_indent_add */ |
| 611 | while (out->pos && out->buf[out->pos - 1] == ' ') |
| 612 | --out->pos; |
| 613 | printbuf_nul_terminate(out); |
| 614 | |
| 615 | if (bch2_fs_stdio_redirect(c)) |
| 616 | bch2_print(c, "%s", out->buf); |
| 617 | else |
| 618 | bch2_print_str(c, KERN_ERR, out->buf); |
| 619 | } |
| 620 | |
| 621 | if (s) |
| 622 | s->ret = ret; |
| 623 | |
| 624 | if (trans && |
| 625 | !(flags & FSCK_ERR_NO_LOG) && |
| 626 | ret == -BCH_ERR_fsck_fix) |
| 627 | ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; |
| 628 | err_unlock: |
| 629 | mutex_unlock(&c->fsck_error_msgs_lock); |
| 630 | err: |
| 631 | /* |
| 632 | * We don't yet track whether the filesystem currently has errors, for |
| 633 | * log_fsck_err()s: that would require us to track for every error type |
| 634 | * which recovery pass corrects it, to get the fsck exit status correct: |
| 635 | */ |
| 636 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { |
| 637 | /* nothing */ |
| 638 | } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { |
| 639 | set_bit(BCH_FS_errors_fixed, &c->flags); |
| 640 | } else { |
| 641 | set_bit(BCH_FS_errors_not_fixed, &c->flags); |
| 642 | set_bit(BCH_FS_error, &c->flags); |
| 643 | } |
| 644 | |
| 645 | if (action != action_orig) |
| 646 | kfree(action); |
| 647 | printbuf_exit(&buf); |
| 648 | |
| 649 | BUG_ON(!ret); |
| 650 | return ret; |
| 651 | } |
| 652 | |
| 653 | static const char * const bch2_bkey_validate_contexts[] = { |
| 654 | #define x(n) #n, |
| 655 | BKEY_VALIDATE_CONTEXTS() |
| 656 | #undef x |
| 657 | NULL |
| 658 | }; |
| 659 | |
| 660 | int __bch2_bkey_fsck_err(struct bch_fs *c, |
| 661 | struct bkey_s_c k, |
| 662 | struct bkey_validate_context from, |
| 663 | enum bch_sb_error_id err, |
| 664 | const char *fmt, ...) |
| 665 | { |
| 666 | if (from.flags & BCH_VALIDATE_silent) |
| 667 | return bch_err_throw(c, fsck_delete_bkey); |
| 668 | |
| 669 | unsigned fsck_flags = 0; |
| 670 | if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { |
| 671 | if (test_bit(err, c->sb.errors_silent)) |
| 672 | return bch_err_throw(c, fsck_delete_bkey); |
| 673 | |
| 674 | fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; |
| 675 | } |
| 676 | if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) |
| 677 | fsck_flags |= fsck_flags_extra[err]; |
| 678 | |
| 679 | struct printbuf buf = PRINTBUF; |
| 680 | prt_printf(&buf, "invalid bkey in %s", |
| 681 | bch2_bkey_validate_contexts[from.from]); |
| 682 | |
| 683 | if (from.from == BKEY_VALIDATE_journal) |
| 684 | prt_printf(&buf, " journal seq=%llu offset=%u", |
| 685 | from.journal_seq, from.journal_offset); |
| 686 | |
| 687 | prt_str(&buf, " btree="); |
| 688 | bch2_btree_id_to_text(&buf, from.btree); |
| 689 | prt_printf(&buf, " level=%u: ", from.level); |
| 690 | |
| 691 | bch2_bkey_val_to_text(&buf, c, k); |
| 692 | prt_newline(&buf); |
| 693 | |
| 694 | va_list args; |
| 695 | va_start(args, fmt); |
| 696 | prt_vprintf(&buf, fmt, args); |
| 697 | va_end(args); |
| 698 | |
| 699 | int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); |
| 700 | printbuf_exit(&buf); |
| 701 | return ret; |
| 702 | } |
| 703 | |
| 704 | static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) |
| 705 | { |
| 706 | struct fsck_err_state *s, *n; |
| 707 | |
| 708 | mutex_lock(&c->fsck_error_msgs_lock); |
| 709 | |
| 710 | list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { |
| 711 | if (print && s->ratelimited && s->last_msg) |
| 712 | bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); |
| 713 | |
| 714 | list_del(&s->list); |
| 715 | kfree(s->last_msg); |
| 716 | kfree(s); |
| 717 | } |
| 718 | |
| 719 | mutex_unlock(&c->fsck_error_msgs_lock); |
| 720 | } |
| 721 | |
| 722 | void bch2_flush_fsck_errs(struct bch_fs *c) |
| 723 | { |
| 724 | __bch2_flush_fsck_errs(c, true); |
| 725 | } |
| 726 | |
| 727 | void bch2_free_fsck_errs(struct bch_fs *c) |
| 728 | { |
| 729 | __bch2_flush_fsck_errs(c, false); |
| 730 | } |
| 731 | |
| 732 | int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, |
| 733 | subvol_inum inum, u64 offset) |
| 734 | { |
| 735 | u32 restart_count = trans->restart_count; |
| 736 | int ret = 0; |
| 737 | |
| 738 | if (inum.subvol) { |
| 739 | ret = bch2_inum_to_path(trans, inum, out); |
| 740 | if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) |
| 741 | return ret; |
| 742 | } |
| 743 | if (!inum.subvol || ret) |
| 744 | prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); |
| 745 | prt_printf(out, " offset %llu: ", offset); |
| 746 | |
| 747 | return trans_was_restarted(trans, restart_count); |
| 748 | } |
| 749 | |
| 750 | void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, |
| 751 | subvol_inum inum, u64 offset) |
| 752 | { |
| 753 | bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); |
| 754 | } |
| 755 | |
| 756 | int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, |
| 757 | struct bpos pos) |
| 758 | { |
| 759 | int ret = bch2_inum_snapshot_to_path(trans, pos.inode, pos.snapshot, NULL, out); |
| 760 | if (ret) |
| 761 | return ret; |
| 762 | |
| 763 | prt_printf(out, " offset %llu: ", pos.offset << 8); |
| 764 | return 0; |
| 765 | } |
| 766 | |
| 767 | void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, |
| 768 | struct bpos pos) |
| 769 | { |
| 770 | bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); |
| 771 | } |