1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_CHARDEV
5 #include "bcachefs_ioctl.h"
8 #include "disk_accounting.h"
12 #include "recovery_passes.h"
14 #include "sb-counters.h"
16 #include "thread_with_file.h"
18 #include <linux/cdev.h>
19 #include <linux/device.h>
21 #include <linux/ioctl.h>
22 #include <linux/major.h>
23 #include <linux/sched/task.h>
24 #include <linux/slab.h>
25 #include <linux/uaccess.h>
27 /* returns with ref on ca->ref */
28 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
33 if (flags & BCH_BY_INDEX) {
34 if (dev >= c->sb.nr_devices)
35 return ERR_PTR(-EINVAL);
37 ca = bch2_dev_tryget_noerror(c, dev);
39 return ERR_PTR(-EINVAL);
43 path = strndup_user((const char __user *)
44 (unsigned long) dev, PATH_MAX);
46 return ERR_CAST(path);
48 ca = bch2_dev_lookup(c, path);
56 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
58 struct bch_ioctl_assemble arg;
60 u64 *user_devs = NULL;
65 if (copy_from_user(&arg, user_arg, sizeof(arg)))
68 if (arg.flags || arg.pad)
71 user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
75 devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
77 if (copy_from_user(user_devs, user_arg->devs,
78 sizeof(u64) * arg.nr_devs))
81 for (i = 0; i < arg.nr_devs; i++) {
82 devs[i] = strndup_user((const char __user *)(unsigned long)
85 ret= PTR_ERR_OR_ZERO(devs[i]);
90 c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
91 ret = PTR_ERR_OR_ZERO(c);
96 for (i = 0; i < arg.nr_devs; i++)
102 static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
104 struct bch_ioctl_incremental arg;
108 if (copy_from_user(&arg, user_arg, sizeof(arg)))
111 if (arg.flags || arg.pad)
114 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
115 ret = PTR_ERR_OR_ZERO(path);
119 err = bch2_fs_open_incremental(path);
123 pr_err("Could not register bcachefs devices: %s", err);
131 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
137 case BCH_IOCTL_ASSEMBLE:
138 return bch2_ioctl_assemble(arg);
139 case BCH_IOCTL_INCREMENTAL:
140 return bch2_ioctl_incremental(arg);
142 case BCH_IOCTL_FSCK_OFFLINE: {
143 ret = bch2_ioctl_fsck_offline(arg);
152 ret = bch2_err_class(ret);
156 static long bch2_ioctl_query_uuid(struct bch_fs *c,
157 struct bch_ioctl_query_uuid __user *user_arg)
159 return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
160 sizeof(c->sb.user_uuid));
164 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
166 if (!capable(CAP_SYS_ADMIN))
169 if (arg.flags || arg.pad)
172 return bch2_fs_start(c);
175 static long bch2_ioctl_stop(struct bch_fs *c)
177 if (!capable(CAP_SYS_ADMIN))
185 static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
190 if (!capable(CAP_SYS_ADMIN))
193 if (arg.flags || arg.pad)
196 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
197 ret = PTR_ERR_OR_ZERO(path);
201 ret = bch2_dev_add(c, path);
208 static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
212 if (!capable(CAP_SYS_ADMIN))
215 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
216 BCH_FORCE_IF_METADATA_LOST|
217 BCH_FORCE_IF_DEGRADED|
222 ca = bch2_device_lookup(c, arg.dev, arg.flags);
226 return bch2_dev_remove(c, ca, arg.flags);
229 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
234 if (!capable(CAP_SYS_ADMIN))
237 if (arg.flags || arg.pad)
240 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
241 ret = PTR_ERR_OR_ZERO(path);
245 ret = bch2_dev_online(c, path);
250 static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
255 if (!capable(CAP_SYS_ADMIN))
258 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
259 BCH_FORCE_IF_METADATA_LOST|
260 BCH_FORCE_IF_DEGRADED|
265 ca = bch2_device_lookup(c, arg.dev, arg.flags);
269 ret = bch2_dev_offline(c, ca, arg.flags);
274 static long bch2_ioctl_disk_set_state(struct bch_fs *c,
275 struct bch_ioctl_disk_set_state arg)
280 if (!capable(CAP_SYS_ADMIN))
283 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
284 BCH_FORCE_IF_METADATA_LOST|
285 BCH_FORCE_IF_DEGRADED|
287 arg.pad[0] || arg.pad[1] || arg.pad[2] ||
288 arg.new_state >= BCH_MEMBER_STATE_NR)
291 ca = bch2_device_lookup(c, arg.dev, arg.flags);
295 ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
297 bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
303 struct bch_data_ctx {
304 struct thread_with_file thr;
307 struct bch_ioctl_data arg;
308 struct bch_move_stats stats;
311 static int bch2_data_thread(void *arg)
313 struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
315 ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
316 if (ctx->thr.ret == -BCH_ERR_device_offline)
317 ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline;
319 ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_done;
320 ctx->stats.data_type = (int) DATA_PROGRESS_DATA_TYPE_done;
322 enumerated_ref_put(&ctx->c->writes, BCH_WRITE_REF_ioctl_data);
326 static int bch2_data_job_release(struct inode *inode, struct file *file)
328 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
330 bch2_thread_with_file_exit(&ctx->thr);
335 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
336 size_t len, loff_t *ppos)
338 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
339 struct bch_fs *c = ctx->c;
340 struct bch_ioctl_data_event e = {
341 .type = BCH_DATA_EVENT_PROGRESS,
342 .ret = ctx->stats.ret,
343 .p.data_type = ctx->stats.data_type,
344 .p.btree_id = ctx->stats.pos.btree,
345 .p.pos = ctx->stats.pos.pos,
346 .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
347 .p.sectors_error_corrected = atomic64_read(&ctx->stats.sectors_error_corrected),
348 .p.sectors_error_uncorrected = atomic64_read(&ctx->stats.sectors_error_uncorrected),
351 if (ctx->arg.op == BCH_DATA_OP_scrub) {
352 struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev);
354 struct bch_dev_usage_full u;
355 bch2_dev_usage_full_read_fast(ca, &u);
356 for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++)
357 if (ctx->arg.scrub.data_types & BIT(i))
358 e.p.sectors_total += u.d[i].sectors;
362 e.p.sectors_total = bch2_fs_usage_read_short(c).used;
368 return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
371 static const struct file_operations bcachefs_data_ops = {
372 .release = bch2_data_job_release,
373 .read = bch2_data_job_read,
376 static long bch2_ioctl_data(struct bch_fs *c,
377 struct bch_ioctl_data arg)
379 struct bch_data_ctx *ctx;
382 if (!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_ioctl_data))
385 if (!capable(CAP_SYS_ADMIN)) {
390 if (arg.op >= BCH_DATA_OP_NR || arg.flags) {
395 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
404 ret = bch2_run_thread_with_file(&ctx->thr,
413 enumerated_ref_put(&c->writes, BCH_WRITE_REF_ioctl_data);
417 static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
418 struct bch_ioctl_fs_usage __user *user_arg)
420 struct bch_ioctl_fs_usage arg = {};
421 darray_char replicas = {};
422 u32 replica_entries_bytes;
425 if (!test_bit(BCH_FS_started, &c->flags))
428 if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
431 ret = bch2_fs_replicas_usage_read(c, &replicas) ?:
432 (replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?:
433 copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr);
437 struct bch_fs_usage_short u = bch2_fs_usage_read_short(c);
438 arg.capacity = c->capacity;
440 arg.online_reserved = percpu_u64_get(c->online_reserved);
441 arg.replica_entries_bytes = replicas.nr;
443 for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
444 struct disk_accounting_pos k;
445 disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i);
447 bch2_accounting_mem_read(c,
448 disk_accounting_pos_to_bpos(&k),
449 &arg.persistent_reserved[i], 1);
452 ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
454 darray_exit(&replicas);
458 static long bch2_ioctl_query_accounting(struct bch_fs *c,
459 struct bch_ioctl_query_accounting __user *user_arg)
461 struct bch_ioctl_query_accounting arg;
462 darray_char accounting = {};
465 if (!test_bit(BCH_FS_started, &c->flags))
468 ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?:
469 bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?:
470 (arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?:
471 copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr);
475 arg.capacity = c->capacity;
476 arg.used = bch2_fs_usage_read_short(c).used;
477 arg.online_reserved = percpu_u64_get(c->online_reserved);
478 arg.accounting_u64s = accounting.nr / sizeof(u64);
480 ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
482 darray_exit(&accounting);
486 /* obsolete, didn't allow for new data types: */
487 static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c,
488 struct bch_ioctl_dev_usage __user *user_arg)
490 struct bch_ioctl_dev_usage arg;
491 struct bch_dev_usage_full src;
495 if (!test_bit(BCH_FS_started, &c->flags))
498 if (copy_from_user(&arg, user_arg, sizeof(arg)))
501 if ((arg.flags & ~BCH_BY_INDEX) ||
507 ca = bch2_device_lookup(c, arg.dev, arg.flags);
511 src = bch2_dev_usage_full_read(ca);
513 arg.state = ca->mi.state;
514 arg.bucket_size = ca->mi.bucket_size;
515 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
517 for (i = 0; i < ARRAY_SIZE(arg.d); i++) {
518 arg.d[i].buckets = src.d[i].buckets;
519 arg.d[i].sectors = src.d[i].sectors;
520 arg.d[i].fragmented = src.d[i].fragmented;
525 return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
528 static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
529 struct bch_ioctl_dev_usage_v2 __user *user_arg)
531 struct bch_ioctl_dev_usage_v2 arg;
532 struct bch_dev_usage_full src;
536 if (!test_bit(BCH_FS_started, &c->flags))
539 if (copy_from_user(&arg, user_arg, sizeof(arg)))
542 if ((arg.flags & ~BCH_BY_INDEX) ||
548 ca = bch2_device_lookup(c, arg.dev, arg.flags);
552 src = bch2_dev_usage_full_read(ca);
554 arg.state = ca->mi.state;
555 arg.bucket_size = ca->mi.bucket_size;
556 arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
557 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
559 ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
563 for (unsigned i = 0; i < arg.nr_data_types; i++) {
564 struct bch_ioctl_dev_usage_type t = {
565 .buckets = src.d[i].buckets,
566 .sectors = src.d[i].sectors,
567 .fragmented = src.d[i].fragmented,
570 ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
579 static long bch2_ioctl_read_super(struct bch_fs *c,
580 struct bch_ioctl_read_super arg)
582 struct bch_dev *ca = NULL;
586 if (!capable(CAP_SYS_ADMIN))
589 if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
593 mutex_lock(&c->sb_lock);
595 if (arg.flags & BCH_READ_DEV) {
596 ca = bch2_device_lookup(c, arg.dev, arg.flags);
597 ret = PTR_ERR_OR_ZERO(ca);
606 if (vstruct_bytes(sb) > arg.size) {
611 ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
616 mutex_unlock(&c->sb_lock);
620 static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
621 struct bch_ioctl_disk_get_idx arg)
623 dev_t dev = huge_decode_dev(arg.dev);
625 if (!capable(CAP_SYS_ADMIN))
632 for_each_online_member_rcu(c, ca)
636 return bch_err_throw(c, ENOENT_dev_idx_not_found);
639 static long bch2_ioctl_disk_resize(struct bch_fs *c,
640 struct bch_ioctl_disk_resize arg)
645 if (!capable(CAP_SYS_ADMIN))
648 if ((arg.flags & ~BCH_BY_INDEX) ||
652 ca = bch2_device_lookup(c, arg.dev, arg.flags);
656 ret = bch2_dev_resize(c, ca, arg.nbuckets);
662 static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
663 struct bch_ioctl_disk_resize_journal arg)
668 if (!capable(CAP_SYS_ADMIN))
671 if ((arg.flags & ~BCH_BY_INDEX) ||
675 if (arg.nbuckets > U32_MAX)
678 ca = bch2_device_lookup(c, arg.dev, arg.flags);
682 ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
688 #define BCH_IOCTL(_name, _argtype) \
692 if (copy_from_user(&i, arg, sizeof(i))) \
694 ret = bch2_ioctl_##_name(c, i); \
698 long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
703 case BCH_IOCTL_QUERY_UUID:
704 return bch2_ioctl_query_uuid(c, arg);
705 case BCH_IOCTL_FS_USAGE:
706 return bch2_ioctl_fs_usage(c, arg);
707 case BCH_IOCTL_DEV_USAGE:
708 return bch2_ioctl_dev_usage(c, arg);
709 case BCH_IOCTL_DEV_USAGE_V2:
710 return bch2_ioctl_dev_usage_v2(c, arg);
712 case BCH_IOCTL_START:
713 BCH_IOCTL(start, struct bch_ioctl_start);
715 return bch2_ioctl_stop(c);
717 case BCH_IOCTL_READ_SUPER:
718 BCH_IOCTL(read_super, struct bch_ioctl_read_super);
719 case BCH_IOCTL_DISK_GET_IDX:
720 BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
723 if (!test_bit(BCH_FS_started, &c->flags))
727 case BCH_IOCTL_DISK_ADD:
728 BCH_IOCTL(disk_add, struct bch_ioctl_disk);
729 case BCH_IOCTL_DISK_REMOVE:
730 BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
731 case BCH_IOCTL_DISK_ONLINE:
732 BCH_IOCTL(disk_online, struct bch_ioctl_disk);
733 case BCH_IOCTL_DISK_OFFLINE:
734 BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
735 case BCH_IOCTL_DISK_SET_STATE:
736 BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
738 BCH_IOCTL(data, struct bch_ioctl_data);
739 case BCH_IOCTL_DISK_RESIZE:
740 BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
741 case BCH_IOCTL_DISK_RESIZE_JOURNAL:
742 BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
743 case BCH_IOCTL_FSCK_ONLINE:
744 BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
745 case BCH_IOCTL_QUERY_ACCOUNTING:
746 return bch2_ioctl_query_accounting(c, arg);
747 case BCH_IOCTL_QUERY_COUNTERS:
748 return bch2_ioctl_query_counters(c, arg);
754 ret = bch2_err_class(ret);
758 static DEFINE_IDR(bch_chardev_minor);
760 static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
762 unsigned minor = iminor(file_inode(filp));
763 struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
764 void __user *arg = (void __user *) v;
767 ? bch2_fs_ioctl(c, cmd, arg)
768 : bch2_global_ioctl(cmd, arg);
771 static const struct file_operations bch_chardev_fops = {
772 .owner = THIS_MODULE,
773 .unlocked_ioctl = bch2_chardev_ioctl,
774 .open = nonseekable_open,
777 static int bch_chardev_major;
778 static const struct class bch_chardev_class = {
781 static struct device *bch_chardev;
783 void bch2_fs_chardev_exit(struct bch_fs *c)
785 if (!IS_ERR_OR_NULL(c->chardev))
786 device_unregister(c->chardev);
788 idr_remove(&bch_chardev_minor, c->minor);
791 int bch2_fs_chardev_init(struct bch_fs *c)
793 c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
797 c->chardev = device_create(&bch_chardev_class, NULL,
798 MKDEV(bch_chardev_major, c->minor), c,
799 "bcachefs%u-ctl", c->minor);
800 if (IS_ERR(c->chardev))
801 return PTR_ERR(c->chardev);
806 void bch2_chardev_exit(void)
808 device_destroy(&bch_chardev_class, MKDEV(bch_chardev_major, U8_MAX));
809 class_unregister(&bch_chardev_class);
810 if (bch_chardev_major > 0)
811 unregister_chrdev(bch_chardev_major, "bcachefs");
814 int __init bch2_chardev_init(void)
818 bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
819 if (bch_chardev_major < 0)
820 return bch_chardev_major;
822 ret = class_register(&bch_chardev_class);
826 bch_chardev = device_create(&bch_chardev_class, NULL,
827 MKDEV(bch_chardev_major, U8_MAX),
828 NULL, "bcachefs-ctl");
829 if (IS_ERR(bch_chardev)) {
830 ret = PTR_ERR(bch_chardev);
837 class_unregister(&bch_chardev_class);
839 unregister_chrdev(bch_chardev_major, "bcachefs-ctl");
843 #endif /* NO_BCACHEFS_CHARDEV */