bcachefs: Make bkey types globally unique
[linux-block.git] / fs / bcachefs / sysfs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * bcache sysfs interfaces
4  *
5  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
6  * Copyright 2012 Google, Inc.
7  */
8
9 #ifndef NO_BCACHEFS_SYSFS
10
11 #include "bcachefs.h"
12 #include "alloc_background.h"
13 #include "sysfs.h"
14 #include "btree_cache.h"
15 #include "btree_io.h"
16 #include "btree_iter.h"
17 #include "btree_update.h"
18 #include "btree_update_interior.h"
19 #include "btree_gc.h"
20 #include "buckets.h"
21 #include "disk_groups.h"
22 #include "ec.h"
23 #include "inode.h"
24 #include "journal.h"
25 #include "keylist.h"
26 #include "move.h"
27 #include "opts.h"
28 #include "rebalance.h"
29 #include "replicas.h"
30 #include "super-io.h"
31 #include "tests.h"
32
33 #include <linux/blkdev.h>
34 #include <linux/sort.h>
35 #include <linux/sched/clock.h>
36
37 #include "util.h"
38
39 #define SYSFS_OPS(type)                                                 \
40 struct sysfs_ops type ## _sysfs_ops = {                                 \
41         .show   = type ## _show,                                        \
42         .store  = type ## _store                                        \
43 }
44
45 #define SHOW(fn)                                                        \
46 static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\
47                            char *buf)                                   \
48
49 #define STORE(fn)                                                       \
50 static ssize_t fn ## _store(struct kobject *kobj, struct attribute *attr,\
51                             const char *buf, size_t size)               \
52
53 #define __sysfs_attribute(_name, _mode)                                 \
54         static struct attribute sysfs_##_name =                         \
55                 { .name = #_name, .mode = _mode }
56
57 #define write_attribute(n)      __sysfs_attribute(n, S_IWUSR)
58 #define read_attribute(n)       __sysfs_attribute(n, S_IRUGO)
59 #define rw_attribute(n)         __sysfs_attribute(n, S_IRUGO|S_IWUSR)
60
61 #define sysfs_printf(file, fmt, ...)                                    \
62 do {                                                                    \
63         if (attr == &sysfs_ ## file)                                    \
64                 return scnprintf(buf, PAGE_SIZE, fmt "\n", __VA_ARGS__);\
65 } while (0)
66
67 #define sysfs_print(file, var)                                          \
68 do {                                                                    \
69         if (attr == &sysfs_ ## file)                                    \
70                 return snprint(buf, PAGE_SIZE, var);                    \
71 } while (0)
72
73 #define sysfs_hprint(file, val)                                         \
74 do {                                                                    \
75         if (attr == &sysfs_ ## file) {                                  \
76                 ssize_t ret = bch2_hprint(buf, val);                    \
77                 strcat(buf, "\n");                                      \
78                 return ret + 1;                                         \
79         }                                                               \
80 } while (0)
81
82 #define var_printf(_var, fmt)   sysfs_printf(_var, fmt, var(_var))
83 #define var_print(_var)         sysfs_print(_var, var(_var))
84 #define var_hprint(_var)        sysfs_hprint(_var, var(_var))
85
86 #define sysfs_strtoul(file, var)                                        \
87 do {                                                                    \
88         if (attr == &sysfs_ ## file)                                    \
89                 return strtoul_safe(buf, var) ?: (ssize_t) size;        \
90 } while (0)
91
92 #define sysfs_strtoul_clamp(file, var, min, max)                        \
93 do {                                                                    \
94         if (attr == &sysfs_ ## file)                                    \
95                 return strtoul_safe_clamp(buf, var, min, max)           \
96                         ?: (ssize_t) size;                              \
97 } while (0)
98
99 #define strtoul_or_return(cp)                                           \
100 ({                                                                      \
101         unsigned long _v;                                               \
102         int _r = kstrtoul(cp, 10, &_v);                                 \
103         if (_r)                                                         \
104                 return _r;                                              \
105         _v;                                                             \
106 })
107
108 #define strtoul_restrict_or_return(cp, min, max)                        \
109 ({                                                                      \
110         unsigned long __v = 0;                                          \
111         int _r = strtoul_safe_restrict(cp, __v, min, max);              \
112         if (_r)                                                         \
113                 return _r;                                              \
114         __v;                                                            \
115 })
116
117 #define strtoi_h_or_return(cp)                                          \
118 ({                                                                      \
119         u64 _v;                                                         \
120         int _r = strtoi_h(cp, &_v);                                     \
121         if (_r)                                                         \
122                 return _r;                                              \
123         _v;                                                             \
124 })
125
126 #define sysfs_hatoi(file, var)                                          \
127 do {                                                                    \
128         if (attr == &sysfs_ ## file)                                    \
129                 return strtoi_h(buf, &var) ?: (ssize_t) size;           \
130 } while (0)
131
132 write_attribute(trigger_journal_flush);
133 write_attribute(trigger_btree_coalesce);
134 write_attribute(trigger_gc);
135 write_attribute(prune_cache);
136 rw_attribute(btree_gc_periodic);
137
138 read_attribute(uuid);
139 read_attribute(minor);
140 read_attribute(bucket_size);
141 read_attribute(block_size);
142 read_attribute(btree_node_size);
143 read_attribute(first_bucket);
144 read_attribute(nbuckets);
145 read_attribute(durability);
146 read_attribute(iodone);
147
148 read_attribute(io_latency_read);
149 read_attribute(io_latency_write);
150 read_attribute(io_latency_stats_read);
151 read_attribute(io_latency_stats_write);
152 read_attribute(congested);
153
154 read_attribute(bucket_quantiles_last_read);
155 read_attribute(bucket_quantiles_last_write);
156 read_attribute(bucket_quantiles_fragmentation);
157 read_attribute(bucket_quantiles_oldest_gen);
158
159 read_attribute(reserve_stats);
160 read_attribute(btree_cache_size);
161 read_attribute(compression_stats);
162 read_attribute(journal_debug);
163 read_attribute(journal_pins);
164 read_attribute(btree_updates);
165 read_attribute(dirty_btree_nodes);
166
167 read_attribute(internal_uuid);
168
169 read_attribute(has_data);
170 read_attribute(alloc_debug);
171 write_attribute(wake_allocator);
172
173 read_attribute(read_realloc_races);
174 read_attribute(extent_migrate_done);
175 read_attribute(extent_migrate_raced);
176
177 rw_attribute(journal_write_delay_ms);
178 rw_attribute(journal_reclaim_delay_ms);
179
180 rw_attribute(discard);
181 rw_attribute(cache_replacement_policy);
182 rw_attribute(label);
183
184 rw_attribute(copy_gc_enabled);
185 sysfs_pd_controller_attribute(copy_gc);
186
187 rw_attribute(rebalance_enabled);
188 sysfs_pd_controller_attribute(rebalance);
189 read_attribute(rebalance_work);
190 rw_attribute(promote_whole_extents);
191
192 read_attribute(new_stripes);
193
194 rw_attribute(pd_controllers_update_seconds);
195
196 read_attribute(meta_replicas_have);
197 read_attribute(data_replicas_have);
198
199 #ifdef CONFIG_BCACHEFS_TESTS
200 write_attribute(perf_test);
201 #endif /* CONFIG_BCACHEFS_TESTS */
202
203 #define BCH_DEBUG_PARAM(name, description)                              \
204         rw_attribute(name);
205
206         BCH_DEBUG_PARAMS()
207 #undef BCH_DEBUG_PARAM
208
209 #define x(_name)                                                \
210         static struct attribute sysfs_time_stat_##_name =               \
211                 { .name = #_name, .mode = S_IRUGO };
212         BCH_TIME_STATS()
213 #undef x
214
215 static struct attribute sysfs_state_rw = {
216         .name = "state",
217         .mode = S_IRUGO
218 };
219
220 static size_t bch2_btree_cache_size(struct bch_fs *c)
221 {
222         size_t ret = 0;
223         struct btree *b;
224
225         mutex_lock(&c->btree_cache.lock);
226         list_for_each_entry(b, &c->btree_cache.live, list)
227                 ret += btree_bytes(c);
228
229         mutex_unlock(&c->btree_cache.lock);
230         return ret;
231 }
232
233 static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
234 {
235         struct printbuf out = _PBUF(buf, PAGE_SIZE);
236         struct bch_fs_usage stats = bch2_fs_usage_read(c);
237         unsigned replicas, type;
238
239         pr_buf(&out, "capacity:\t\t%llu\n", c->capacity);
240
241         for (replicas = 0; replicas < ARRAY_SIZE(stats.replicas); replicas++) {
242                 pr_buf(&out, "%u replicas:\n", replicas + 1);
243
244                 for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
245                         pr_buf(&out, "\t%s:\t\t%llu\n",
246                                bch2_data_types[type],
247                                stats.replicas[replicas].data[type]);
248                 pr_buf(&out, "\terasure coded:\t%llu\n",
249                        stats.replicas[replicas].ec_data);
250                 pr_buf(&out, "\treserved:\t%llu\n",
251                        stats.replicas[replicas].persistent_reserved);
252         }
253
254         pr_buf(&out, "bucket usage\n");
255
256         for (type = BCH_DATA_SB; type < BCH_DATA_NR; type++)
257                 pr_buf(&out, "\t%s:\t\t%llu\n",
258                        bch2_data_types[type],
259                        stats.buckets[type]);
260
261         pr_buf(&out, "online reserved:\t%llu\n",
262                stats.online_reserved);
263
264         return out.pos - buf;
265 }
266
267 static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
268 {
269         struct btree_iter iter;
270         struct bkey_s_c k;
271         u64 nr_uncompressed_extents = 0, uncompressed_sectors = 0,
272             nr_compressed_extents = 0,
273             compressed_sectors_compressed = 0,
274             compressed_sectors_uncompressed = 0;
275
276         if (!bch2_fs_running(c))
277                 return -EPERM;
278
279         for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
280                 if (k.k->type == KEY_TYPE_extent) {
281                         struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
282                         const union bch_extent_entry *entry;
283                         struct extent_ptr_decoded p;
284
285                         extent_for_each_ptr_decode(e, p, entry) {
286                                 if (p.crc.compression_type == BCH_COMPRESSION_NONE) {
287                                         nr_uncompressed_extents++;
288                                         uncompressed_sectors += e.k->size;
289                                 } else {
290                                         nr_compressed_extents++;
291                                         compressed_sectors_compressed +=
292                                                 p.crc.compressed_size;
293                                         compressed_sectors_uncompressed +=
294                                                 p.crc.uncompressed_size;
295                                 }
296
297                                 /* only looking at the first ptr */
298                                 break;
299                         }
300                 }
301         bch2_btree_iter_unlock(&iter);
302
303         return scnprintf(buf, PAGE_SIZE,
304                         "uncompressed data:\n"
305                         "       nr extents:                     %llu\n"
306                         "       size (bytes):                   %llu\n"
307                         "compressed data:\n"
308                         "       nr extents:                     %llu\n"
309                         "       compressed size (bytes):        %llu\n"
310                         "       uncompressed size (bytes):      %llu\n",
311                         nr_uncompressed_extents,
312                         uncompressed_sectors << 9,
313                         nr_compressed_extents,
314                         compressed_sectors_compressed << 9,
315                         compressed_sectors_uncompressed << 9);
316 }
317
318 static ssize_t bch2_new_stripes(struct bch_fs *c, char *buf)
319 {
320         char *out = buf, *end = buf + PAGE_SIZE;
321         struct ec_stripe_head *h;
322         struct ec_stripe_new *s;
323
324         mutex_lock(&c->ec_new_stripe_lock);
325         list_for_each_entry(h, &c->ec_new_stripe_list, list) {
326                 out += scnprintf(out, end - out,
327                                  "target %u algo %u redundancy %u:\n",
328                                  h->target, h->algo, h->redundancy);
329
330                 if (h->s)
331                         out += scnprintf(out, end - out,
332                                          "\tpending: blocks %u allocated %u\n",
333                                          h->s->blocks.nr,
334                                          bitmap_weight(h->s->blocks_allocated,
335                                                        h->s->blocks.nr));
336
337                 mutex_lock(&h->lock);
338                 list_for_each_entry(s, &h->stripes, list)
339                         out += scnprintf(out, end - out,
340                                          "\tin flight: blocks %u allocated %u pin %u\n",
341                                          s->blocks.nr,
342                                          bitmap_weight(s->blocks_allocated,
343                                                        s->blocks.nr),
344                                          atomic_read(&s->pin));
345                 mutex_unlock(&h->lock);
346
347         }
348         mutex_unlock(&c->ec_new_stripe_lock);
349
350         return out - buf;
351 }
352
353 SHOW(bch2_fs)
354 {
355         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
356
357         sysfs_print(minor,                      c->minor);
358         sysfs_printf(internal_uuid, "%pU",      c->sb.uuid.b);
359
360         sysfs_print(journal_write_delay_ms,     c->journal.write_delay_ms);
361         sysfs_print(journal_reclaim_delay_ms,   c->journal.reclaim_delay_ms);
362
363         sysfs_print(block_size,                 block_bytes(c));
364         sysfs_print(btree_node_size,            btree_bytes(c));
365         sysfs_hprint(btree_cache_size,          bch2_btree_cache_size(c));
366
367         sysfs_print(read_realloc_races,
368                     atomic_long_read(&c->read_realloc_races));
369         sysfs_print(extent_migrate_done,
370                     atomic_long_read(&c->extent_migrate_done));
371         sysfs_print(extent_migrate_raced,
372                     atomic_long_read(&c->extent_migrate_raced));
373
374         sysfs_printf(btree_gc_periodic, "%u",   (int) c->btree_gc_periodic);
375
376         sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
377
378         sysfs_print(pd_controllers_update_seconds,
379                     c->pd_controllers_update_seconds);
380
381         sysfs_printf(rebalance_enabled,         "%i", c->rebalance.enabled);
382         sysfs_pd_controller_show(rebalance,     &c->rebalance.pd); /* XXX */
383
384         if (attr == &sysfs_rebalance_work)
385                 return bch2_rebalance_work_show(c, buf);
386
387         sysfs_print(promote_whole_extents,      c->promote_whole_extents);
388
389         sysfs_printf(meta_replicas_have, "%i",  bch2_replicas_online(c, true));
390         sysfs_printf(data_replicas_have, "%i",  bch2_replicas_online(c, false));
391
392         /* Debugging: */
393
394         if (attr == &sysfs_alloc_debug)
395                 return show_fs_alloc_debug(c, buf);
396
397         if (attr == &sysfs_journal_debug)
398                 return bch2_journal_print_debug(&c->journal, buf);
399
400         if (attr == &sysfs_journal_pins)
401                 return bch2_journal_print_pins(&c->journal, buf);
402
403         if (attr == &sysfs_btree_updates)
404                 return bch2_btree_updates_print(c, buf);
405
406         if (attr == &sysfs_dirty_btree_nodes)
407                 return bch2_dirty_btree_nodes_print(c, buf);
408
409         if (attr == &sysfs_compression_stats)
410                 return bch2_compression_stats(c, buf);
411
412         if (attr == &sysfs_new_stripes)
413                 return bch2_new_stripes(c, buf);
414
415 #define BCH_DEBUG_PARAM(name, description) sysfs_print(name, c->name);
416         BCH_DEBUG_PARAMS()
417 #undef BCH_DEBUG_PARAM
418
419         return 0;
420 }
421
422 STORE(__bch2_fs)
423 {
424         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
425
426         sysfs_strtoul(journal_write_delay_ms, c->journal.write_delay_ms);
427         sysfs_strtoul(journal_reclaim_delay_ms, c->journal.reclaim_delay_ms);
428
429         if (attr == &sysfs_btree_gc_periodic) {
430                 ssize_t ret = strtoul_safe(buf, c->btree_gc_periodic)
431                         ?: (ssize_t) size;
432
433                 wake_up_process(c->gc_thread);
434                 return ret;
435         }
436
437         if (attr == &sysfs_copy_gc_enabled) {
438                 struct bch_dev *ca;
439                 unsigned i;
440                 ssize_t ret = strtoul_safe(buf, c->copy_gc_enabled)
441                         ?: (ssize_t) size;
442
443                 for_each_member_device(ca, c, i)
444                         if (ca->copygc_thread)
445                                 wake_up_process(ca->copygc_thread);
446                 return ret;
447         }
448
449         if (attr == &sysfs_rebalance_enabled) {
450                 ssize_t ret = strtoul_safe(buf, c->rebalance.enabled)
451                         ?: (ssize_t) size;
452
453                 rebalance_wakeup(c);
454                 return ret;
455         }
456
457         sysfs_strtoul(pd_controllers_update_seconds,
458                       c->pd_controllers_update_seconds);
459         sysfs_pd_controller_store(rebalance,    &c->rebalance.pd);
460
461         sysfs_strtoul(promote_whole_extents,    c->promote_whole_extents);
462
463         /* Debugging: */
464
465 #define BCH_DEBUG_PARAM(name, description) sysfs_strtoul(name, c->name);
466         BCH_DEBUG_PARAMS()
467 #undef BCH_DEBUG_PARAM
468
469         if (!bch2_fs_running(c))
470                 return -EPERM;
471
472         /* Debugging: */
473
474         if (attr == &sysfs_trigger_journal_flush)
475                 bch2_journal_meta_async(&c->journal, NULL);
476
477         if (attr == &sysfs_trigger_btree_coalesce)
478                 bch2_coalesce(c);
479
480         if (attr == &sysfs_trigger_gc)
481                 bch2_gc(c, NULL, false);
482
483         if (attr == &sysfs_prune_cache) {
484                 struct shrink_control sc;
485
486                 sc.gfp_mask = GFP_KERNEL;
487                 sc.nr_to_scan = strtoul_or_return(buf);
488                 c->btree_cache.shrink.scan_objects(&c->btree_cache.shrink, &sc);
489         }
490 #ifdef CONFIG_BCACHEFS_TESTS
491         if (attr == &sysfs_perf_test) {
492                 char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
493                 char *test              = strsep(&p, " \t\n");
494                 char *nr_str            = strsep(&p, " \t\n");
495                 char *threads_str       = strsep(&p, " \t\n");
496                 unsigned threads;
497                 u64 nr;
498                 int ret = -EINVAL;
499
500                 if (threads_str &&
501                     !(ret = kstrtouint(threads_str, 10, &threads)) &&
502                     !(ret = bch2_strtoull_h(nr_str, &nr)))
503                         bch2_btree_perf_test(c, test, nr, threads);
504                 else
505                         size = ret;
506                 kfree(tmp);
507         }
508 #endif
509         return size;
510 }
511
512 STORE(bch2_fs)
513 {
514         struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
515
516         mutex_lock(&c->state_lock);
517         size = __bch2_fs_store(kobj, attr, buf, size);
518         mutex_unlock(&c->state_lock);
519
520         return size;
521 }
522 SYSFS_OPS(bch2_fs);
523
524 struct attribute *bch2_fs_files[] = {
525         &sysfs_minor,
526         &sysfs_block_size,
527         &sysfs_btree_node_size,
528         &sysfs_btree_cache_size,
529
530         &sysfs_meta_replicas_have,
531         &sysfs_data_replicas_have,
532
533         &sysfs_journal_write_delay_ms,
534         &sysfs_journal_reclaim_delay_ms,
535
536         &sysfs_promote_whole_extents,
537
538         &sysfs_compression_stats,
539
540 #ifdef CONFIG_BCACHEFS_TESTS
541         &sysfs_perf_test,
542 #endif
543         NULL
544 };
545
546 /* internal dir - just a wrapper */
547
548 SHOW(bch2_fs_internal)
549 {
550         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
551         return bch2_fs_show(&c->kobj, attr, buf);
552 }
553
554 STORE(bch2_fs_internal)
555 {
556         struct bch_fs *c = container_of(kobj, struct bch_fs, internal);
557         return bch2_fs_store(&c->kobj, attr, buf, size);
558 }
559 SYSFS_OPS(bch2_fs_internal);
560
561 struct attribute *bch2_fs_internal_files[] = {
562         &sysfs_alloc_debug,
563         &sysfs_journal_debug,
564         &sysfs_journal_pins,
565         &sysfs_btree_updates,
566         &sysfs_dirty_btree_nodes,
567
568         &sysfs_read_realloc_races,
569         &sysfs_extent_migrate_done,
570         &sysfs_extent_migrate_raced,
571
572         &sysfs_trigger_journal_flush,
573         &sysfs_trigger_btree_coalesce,
574         &sysfs_trigger_gc,
575         &sysfs_prune_cache,
576
577         &sysfs_copy_gc_enabled,
578
579         &sysfs_rebalance_enabled,
580         &sysfs_rebalance_work,
581         sysfs_pd_controller_files(rebalance),
582
583         &sysfs_new_stripes,
584
585         &sysfs_internal_uuid,
586
587 #define BCH_DEBUG_PARAM(name, description) &sysfs_##name,
588         BCH_DEBUG_PARAMS()
589 #undef BCH_DEBUG_PARAM
590
591         NULL
592 };
593
594 /* options */
595
596 SHOW(bch2_fs_opts_dir)
597 {
598         struct printbuf out = _PBUF(buf, PAGE_SIZE);
599         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
600         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
601         int id = opt - bch2_opt_table;
602         u64 v = bch2_opt_get_by_id(&c->opts, id);
603
604         bch2_opt_to_text(&out, c, opt, v, OPT_SHOW_FULL_LIST);
605         pr_buf(&out, "\n");
606
607         return out.pos - buf;
608 }
609
610 STORE(bch2_fs_opts_dir)
611 {
612         struct bch_fs *c = container_of(kobj, struct bch_fs, opts_dir);
613         const struct bch_option *opt = container_of(attr, struct bch_option, attr);
614         int ret, id = opt - bch2_opt_table;
615         char *tmp;
616         u64 v;
617
618         tmp = kstrdup(buf, GFP_KERNEL);
619         if (!tmp)
620                 return -ENOMEM;
621
622         ret = bch2_opt_parse(c, opt, strim(tmp), &v);
623         kfree(tmp);
624
625         if (ret < 0)
626                 return ret;
627
628         ret = bch2_opt_check_may_set(c, id, v);
629         if (ret < 0)
630                 return ret;
631
632         if (opt->set_sb != SET_NO_SB_OPT) {
633                 mutex_lock(&c->sb_lock);
634                 opt->set_sb(c->disk_sb.sb, v);
635                 bch2_write_super(c);
636                 mutex_unlock(&c->sb_lock);
637         }
638
639         bch2_opt_set_by_id(&c->opts, id, v);
640
641         if ((id == Opt_background_target ||
642              id == Opt_background_compression) && v) {
643                 bch2_rebalance_add_work(c, S64_MAX);
644                 rebalance_wakeup(c);
645         }
646
647         return size;
648 }
649 SYSFS_OPS(bch2_fs_opts_dir);
650
651 struct attribute *bch2_fs_opts_dir_files[] = { NULL };
652
653 int bch2_opts_create_sysfs_files(struct kobject *kobj)
654 {
655         const struct bch_option *i;
656         int ret;
657
658         for (i = bch2_opt_table;
659              i < bch2_opt_table + bch2_opts_nr;
660              i++) {
661                 if (i->mode == OPT_INTERNAL)
662                         continue;
663
664                 ret = sysfs_create_file(kobj, &i->attr);
665                 if (ret)
666                         return ret;
667         }
668
669         return 0;
670 }
671
672 /* time stats */
673
674 SHOW(bch2_fs_time_stats)
675 {
676         struct bch_fs *c = container_of(kobj, struct bch_fs, time_stats);
677
678 #define x(name)                                         \
679         if (attr == &sysfs_time_stat_##name)                            \
680                 return bch2_time_stats_print(&c->times[BCH_TIME_##name],\
681                                              buf, PAGE_SIZE);
682         BCH_TIME_STATS()
683 #undef x
684
685         return 0;
686 }
687
688 STORE(bch2_fs_time_stats)
689 {
690         return size;
691 }
692 SYSFS_OPS(bch2_fs_time_stats);
693
694 struct attribute *bch2_fs_time_stats_files[] = {
695 #define x(name)                                         \
696         &sysfs_time_stat_##name,
697         BCH_TIME_STATS()
698 #undef x
699         NULL
700 };
701
702 typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
703                                  size_t, void *);
704
705 static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
706                                   size_t b, void *private)
707 {
708         int rw = (private ? 1 : 0);
709
710         return bucket_last_io(c, bucket(ca, b), rw);
711 }
712
713 static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
714                                        size_t b, void *private)
715 {
716         struct bucket *g = bucket(ca, b);
717         return bucket_sectors_used(g->mark);
718 }
719
720 static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
721                                      size_t b, void *private)
722 {
723         return bucket_gc_gen(ca, b);
724 }
725
726 static int unsigned_cmp(const void *_l, const void *_r)
727 {
728         unsigned l = *((unsigned *) _l);
729         unsigned r = *((unsigned *) _r);
730
731         return (l > r) - (l < r);
732 }
733
734 static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
735                               char *buf, bucket_map_fn *fn, void *private)
736 {
737         size_t i, n;
738         /* Compute 31 quantiles */
739         unsigned q[31], *p;
740         ssize_t ret = 0;
741
742         down_read(&ca->bucket_lock);
743         n = ca->mi.nbuckets;
744
745         p = vzalloc(n * sizeof(unsigned));
746         if (!p) {
747                 up_read(&ca->bucket_lock);
748                 return -ENOMEM;
749         }
750
751         for (i = ca->mi.first_bucket; i < n; i++)
752                 p[i] = fn(c, ca, i, private);
753
754         sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
755         up_read(&ca->bucket_lock);
756
757         while (n &&
758                !p[n - 1])
759                 --n;
760
761         for (i = 0; i < ARRAY_SIZE(q); i++)
762                 q[i] = p[n * (i + 1) / (ARRAY_SIZE(q) + 1)];
763
764         vfree(p);
765
766         for (i = 0; i < ARRAY_SIZE(q); i++)
767                 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
768                                  "%u ", q[i]);
769         buf[ret - 1] = '\n';
770
771         return ret;
772 }
773
774 static ssize_t show_reserve_stats(struct bch_dev *ca, char *buf)
775 {
776         struct printbuf out = _PBUF(buf, PAGE_SIZE);
777         enum alloc_reserve i;
778
779         spin_lock(&ca->freelist_lock);
780
781         pr_buf(&out, "free_inc:\t%zu\t%zu\n",
782                fifo_used(&ca->free_inc),
783                ca->free_inc.size);
784
785         for (i = 0; i < RESERVE_NR; i++)
786                 pr_buf(&out, "free[%u]:\t%zu\t%zu\n", i,
787                        fifo_used(&ca->free[i]),
788                        ca->free[i].size);
789
790         spin_unlock(&ca->freelist_lock);
791
792         return out.pos - buf;
793 }
794
795 static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
796 {
797         struct bch_fs *c = ca->fs;
798         struct bch_dev_usage stats = bch2_dev_usage_read(c, ca);
799
800         return scnprintf(buf, PAGE_SIZE,
801                 "free_inc:               %zu/%zu\n"
802                 "free[RESERVE_BTREE]:    %zu/%zu\n"
803                 "free[RESERVE_MOVINGGC]: %zu/%zu\n"
804                 "free[RESERVE_NONE]:     %zu/%zu\n"
805                 "buckets:\n"
806                 "    capacity:           %llu\n"
807                 "    alloc:              %llu\n"
808                 "    sb:                 %llu\n"
809                 "    journal:            %llu\n"
810                 "    meta:               %llu\n"
811                 "    user:               %llu\n"
812                 "    cached:             %llu\n"
813                 "    erasure coded:      %llu\n"
814                 "    available:          %lli\n"
815                 "sectors:\n"
816                 "    sb:                 %llu\n"
817                 "    journal:            %llu\n"
818                 "    meta:               %llu\n"
819                 "    user:               %llu\n"
820                 "    cached:             %llu\n"
821                 "    fragmented:         %llu\n"
822                 "    copygc threshold:   %llu\n"
823                 "freelist_wait:          %s\n"
824                 "open buckets:           %u/%u (reserved %u)\n"
825                 "open_buckets_wait:      %s\n",
826                 fifo_used(&ca->free_inc),               ca->free_inc.size,
827                 fifo_used(&ca->free[RESERVE_BTREE]),    ca->free[RESERVE_BTREE].size,
828                 fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
829                 fifo_used(&ca->free[RESERVE_NONE]),     ca->free[RESERVE_NONE].size,
830                 ca->mi.nbuckets - ca->mi.first_bucket,
831                 stats.buckets_alloc,
832                 stats.buckets[BCH_DATA_SB],
833                 stats.buckets[BCH_DATA_JOURNAL],
834                 stats.buckets[BCH_DATA_BTREE],
835                 stats.buckets[BCH_DATA_USER],
836                 stats.buckets[BCH_DATA_CACHED],
837                 stats.buckets_ec,
838                 ca->mi.nbuckets - ca->mi.first_bucket - stats.buckets_unavailable,
839                 stats.sectors[BCH_DATA_SB],
840                 stats.sectors[BCH_DATA_JOURNAL],
841                 stats.sectors[BCH_DATA_BTREE],
842                 stats.sectors[BCH_DATA_USER],
843                 stats.sectors[BCH_DATA_CACHED],
844                 stats.sectors_fragmented,
845                 ca->copygc_threshold,
846                 c->freelist_wait.list.first             ? "waiting" : "empty",
847                 c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
848                 c->open_buckets_wait.list.first         ? "waiting" : "empty");
849 }
850
851 static const char * const bch2_rw[] = {
852         "read",
853         "write",
854         NULL
855 };
856
857 static ssize_t show_dev_iodone(struct bch_dev *ca, char *buf)
858 {
859         struct printbuf out = _PBUF(buf, PAGE_SIZE);
860         int rw, i, cpu;
861
862         for (rw = 0; rw < 2; rw++) {
863                 pr_buf(&out, "%s:\n", bch2_rw[rw]);
864
865                 for (i = 1; i < BCH_DATA_NR; i++) {
866                         u64 n = 0;
867
868                         for_each_possible_cpu(cpu)
869                                 n += per_cpu_ptr(ca->io_done, cpu)->sectors[rw][i];
870
871                         pr_buf(&out, "%-12s:%12llu\n",
872                                bch2_data_types[i], n << 9);
873                 }
874         }
875
876         return out.pos - buf;
877 }
878
879 SHOW(bch2_dev)
880 {
881         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
882         struct bch_fs *c = ca->fs;
883         struct printbuf out = _PBUF(buf, PAGE_SIZE);
884
885         sysfs_printf(uuid,              "%pU\n", ca->uuid.b);
886
887         sysfs_print(bucket_size,        bucket_bytes(ca));
888         sysfs_print(block_size,         block_bytes(c));
889         sysfs_print(first_bucket,       ca->mi.first_bucket);
890         sysfs_print(nbuckets,           ca->mi.nbuckets);
891         sysfs_print(durability,         ca->mi.durability);
892         sysfs_print(discard,            ca->mi.discard);
893
894         if (attr == &sysfs_label) {
895                 if (ca->mi.group) {
896                         mutex_lock(&c->sb_lock);
897                         bch2_disk_path_to_text(&out, &c->disk_sb,
898                                                ca->mi.group - 1);
899                         mutex_unlock(&c->sb_lock);
900                 } else {
901                         pr_buf(&out, "none");
902                 }
903
904                 pr_buf(&out, "\n");
905                 return out.pos - buf;
906         }
907
908         if (attr == &sysfs_has_data) {
909                 bch2_flags_to_text(&out, bch2_data_types,
910                                    bch2_dev_has_data(c, ca));
911                 pr_buf(&out, "\n");
912                 return out.pos - buf;
913         }
914
915         sysfs_pd_controller_show(copy_gc, &ca->copygc_pd);
916
917         if (attr == &sysfs_cache_replacement_policy) {
918                 bch2_string_opt_to_text(&out,
919                                         bch2_cache_replacement_policies,
920                                         ca->mi.replacement);
921                 pr_buf(&out, "\n");
922                 return out.pos - buf;
923         }
924
925         if (attr == &sysfs_state_rw) {
926                 bch2_string_opt_to_text(&out, bch2_dev_state,
927                                         ca->mi.state);
928                 pr_buf(&out, "\n");
929                 return out.pos - buf;
930         }
931
932         if (attr == &sysfs_iodone)
933                 return show_dev_iodone(ca, buf);
934
935         sysfs_print(io_latency_read,            atomic64_read(&ca->cur_latency[READ]));
936         sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
937
938         if (attr == &sysfs_io_latency_stats_read)
939                 return bch2_time_stats_print(&ca->io_latency[READ], buf, PAGE_SIZE);
940         if (attr == &sysfs_io_latency_stats_write)
941                 return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
942
943         sysfs_printf(congested,                 "%u%%",
944                      clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
945                      * 100 / CONGESTED_MAX);
946
947         if (attr == &sysfs_bucket_quantiles_last_read)
948                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
949         if (attr == &sysfs_bucket_quantiles_last_write)
950                 return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
951         if (attr == &sysfs_bucket_quantiles_fragmentation)
952                 return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
953         if (attr == &sysfs_bucket_quantiles_oldest_gen)
954                 return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
955
956         if (attr == &sysfs_reserve_stats)
957                 return show_reserve_stats(ca, buf);
958         if (attr == &sysfs_alloc_debug)
959                 return show_dev_alloc_debug(ca, buf);
960
961         return 0;
962 }
963
964 STORE(bch2_dev)
965 {
966         struct bch_dev *ca = container_of(kobj, struct bch_dev, kobj);
967         struct bch_fs *c = ca->fs;
968         struct bch_member *mi;
969
970         sysfs_pd_controller_store(copy_gc, &ca->copygc_pd);
971
972         if (attr == &sysfs_discard) {
973                 bool v = strtoul_or_return(buf);
974
975                 mutex_lock(&c->sb_lock);
976                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
977
978                 if (v != BCH_MEMBER_DISCARD(mi)) {
979                         SET_BCH_MEMBER_DISCARD(mi, v);
980                         bch2_write_super(c);
981                 }
982                 mutex_unlock(&c->sb_lock);
983         }
984
985         if (attr == &sysfs_cache_replacement_policy) {
986                 ssize_t v = __sysfs_match_string(bch2_cache_replacement_policies, -1, buf);
987
988                 if (v < 0)
989                         return v;
990
991                 mutex_lock(&c->sb_lock);
992                 mi = &bch2_sb_get_members(c->disk_sb.sb)->members[ca->dev_idx];
993
994                 if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
995                         SET_BCH_MEMBER_REPLACEMENT(mi, v);
996                         bch2_write_super(c);
997                 }
998                 mutex_unlock(&c->sb_lock);
999         }
1000
1001         if (attr == &sysfs_label) {
1002                 char *tmp;
1003                 int ret;
1004
1005                 tmp = kstrdup(buf, GFP_KERNEL);
1006                 if (!tmp)
1007                         return -ENOMEM;
1008
1009                 ret = bch2_dev_group_set(c, ca, strim(tmp));
1010                 kfree(tmp);
1011                 if (ret)
1012                         return ret;
1013         }
1014
1015         if (attr == &sysfs_wake_allocator)
1016                 bch2_wake_allocator(ca);
1017
1018         return size;
1019 }
1020 SYSFS_OPS(bch2_dev);
1021
1022 struct attribute *bch2_dev_files[] = {
1023         &sysfs_uuid,
1024         &sysfs_bucket_size,
1025         &sysfs_block_size,
1026         &sysfs_first_bucket,
1027         &sysfs_nbuckets,
1028         &sysfs_durability,
1029
1030         /* settings: */
1031         &sysfs_discard,
1032         &sysfs_cache_replacement_policy,
1033         &sysfs_state_rw,
1034         &sysfs_label,
1035
1036         &sysfs_has_data,
1037         &sysfs_iodone,
1038
1039         &sysfs_io_latency_read,
1040         &sysfs_io_latency_write,
1041         &sysfs_io_latency_stats_read,
1042         &sysfs_io_latency_stats_write,
1043         &sysfs_congested,
1044
1045         /* alloc info - other stats: */
1046         &sysfs_bucket_quantiles_last_read,
1047         &sysfs_bucket_quantiles_last_write,
1048         &sysfs_bucket_quantiles_fragmentation,
1049         &sysfs_bucket_quantiles_oldest_gen,
1050
1051         &sysfs_reserve_stats,
1052
1053         /* debug: */
1054         &sysfs_alloc_debug,
1055         &sysfs_wake_allocator,
1056
1057         sysfs_pd_controller_files(copy_gc),
1058         NULL
1059 };
1060
1061 #endif  /* _BCACHEFS_SYSFS_H_ */