null_blk: Fix missing mutex_destroy() at module removal
[linux-block.git] / drivers / block / null_blk / main.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
3bf2bd20
SL
2/*
3 * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
4 * Shaohua Li <shli@fb.com>
5 */
f2298c04 6#include <linux/module.h>
fc1bc354 7
f2298c04
JA
8#include <linux/moduleparam.h>
9#include <linux/sched.h>
10#include <linux/fs.h>
f2298c04 11#include <linux/init.h>
6dad38d3 12#include "null_blk.h"
f2298c04 13
db060f54
DLM
14#undef pr_fmt
15#define pr_fmt(fmt) "null_blk: " fmt
16
5bcd0e0c
SL
17#define FREE_BATCH 16
18
eff2c4f1
SL
19#define TICKS_PER_SEC 50ULL
20#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
21
33f782c4 22#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
93b57046 23static DECLARE_FAULT_ATTR(null_timeout_attr);
24941b90 24static DECLARE_FAULT_ATTR(null_requeue_attr);
596444e7 25static DECLARE_FAULT_ATTR(null_init_hctx_attr);
33f782c4 26#endif
93b57046 27
eff2c4f1
SL
28static inline u64 mb_per_tick(int mbps)
29{
30 return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
31}
f2298c04 32
3bf2bd20
SL
33/*
34 * Status flags for nullb_device.
35 *
36 * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
37 * UP: Device is currently on and visible in userspace.
eff2c4f1 38 * THROTTLED: Device is being throttled.
deb78b41 39 * CACHE: Device is using a write-back cache.
3bf2bd20
SL
40 */
41enum nullb_device_flags {
42 NULLB_DEV_FL_CONFIGURED = 0,
43 NULLB_DEV_FL_UP = 1,
eff2c4f1 44 NULLB_DEV_FL_THROTTLED = 2,
deb78b41 45 NULLB_DEV_FL_CACHE = 3,
3bf2bd20
SL
46};
47
66231ad3 48#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
5bcd0e0c
SL
49/*
50 * nullb_page is a page in memory for nullb devices.
51 *
52 * @page: The page holding the data.
53 * @bitmap: The bitmap represents which sector in the page has data.
54 * Each bit represents one block size. For example, sector 8
55 * will use the 7th bit
deb78b41
SL
56 * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
57 * page is being flushing to storage. FREE means the cache page is freed and
58 * should be skipped from flushing to storage. Please see
59 * null_make_cache_space
5bcd0e0c
SL
60 */
61struct nullb_page {
62 struct page *page;
66231ad3 63 DECLARE_BITMAP(bitmap, MAP_SZ);
5bcd0e0c 64};
66231ad3
ML
65#define NULLB_PAGE_LOCK (MAP_SZ - 1)
66#define NULLB_PAGE_FREE (MAP_SZ - 2)
5bcd0e0c 67
f2298c04
JA
68static LIST_HEAD(nullb_list);
69static struct mutex lock;
70static int null_major;
94bc02e3 71static DEFINE_IDA(nullb_indexes);
82f402fe 72static struct blk_mq_tag_set tag_set;
f2298c04 73
f2298c04
JA
74enum {
75 NULL_IRQ_NONE = 0,
76 NULL_IRQ_SOFTIRQ = 1,
77 NULL_IRQ_TIMER = 2,
ce2c350b 78};
f2298c04 79
cee1b215
MG
80static bool g_virt_boundary = false;
81module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
82MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
83
b3cffc38 84static int g_no_sched;
5657a819 85module_param_named(no_sched, g_no_sched, int, 0444);
b3cffc38 86MODULE_PARM_DESC(no_sched, "No io scheduler");
87
2984c868 88static int g_submit_queues = 1;
5657a819 89module_param_named(submit_queues, g_submit_queues, int, 0444);
f2298c04
JA
90MODULE_PARM_DESC(submit_queues, "Number of submission queues");
91
0a593fbb
JA
92static int g_poll_queues = 1;
93module_param_named(poll_queues, g_poll_queues, int, 0444);
94MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
95
2984c868 96static int g_home_node = NUMA_NO_NODE;
5657a819 97module_param_named(home_node, g_home_node, int, 0444);
f2298c04
JA
98MODULE_PARM_DESC(home_node, "Home node for the device");
99
33f782c4 100#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
290df92a
DZ
101/*
102 * For more details about fault injection, please refer to
103 * Documentation/fault-injection/fault-injection.rst.
104 */
93b57046 105static char g_timeout_str[80];
5657a819 106module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
290df92a 107MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
24941b90
JA
108
109static char g_requeue_str[80];
5657a819 110module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
290df92a 111MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
596444e7
BVA
112
113static char g_init_hctx_str[80];
114module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
290df92a 115MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
33f782c4 116#endif
93b57046 117
8b631f9c
CH
118/*
119 * Historic queue modes.
120 *
121 * These days nothing but NULL_Q_MQ is actually supported, but we keep it the
122 * enum for error reporting.
123 */
124enum {
125 NULL_Q_BIO = 0,
126 NULL_Q_RQ = 1,
127 NULL_Q_MQ = 2,
128};
129
2984c868 130static int g_queue_mode = NULL_Q_MQ;
709c8667
MB
131
132static int null_param_store_val(const char *str, int *val, int min, int max)
133{
134 int ret, new_val;
135
136 ret = kstrtoint(str, 10, &new_val);
137 if (ret)
138 return -EINVAL;
139
140 if (new_val < min || new_val > max)
141 return -EINVAL;
142
143 *val = new_val;
144 return 0;
145}
146
147static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
148{
2984c868 149 return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
709c8667
MB
150}
151
9c27847d 152static const struct kernel_param_ops null_queue_mode_param_ops = {
709c8667
MB
153 .set = null_set_queue_mode,
154 .get = param_get_int,
155};
156
5657a819 157device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
54ae81cd 158MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
f2298c04 159
2984c868 160static int g_gb = 250;
5657a819 161module_param_named(gb, g_gb, int, 0444);
f2298c04
JA
162MODULE_PARM_DESC(gb, "Size in GB");
163
2984c868 164static int g_bs = 512;
5657a819 165module_param_named(bs, g_bs, int, 0444);
f2298c04
JA
166MODULE_PARM_DESC(bs, "Block size (in bytes)");
167
ea17fd35
DLM
168static int g_max_sectors;
169module_param_named(max_sectors, g_max_sectors, int, 0444);
170MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
171
f7c4ce89 172static unsigned int nr_devices = 1;
701dfc42 173module_param(nr_devices, uint, 0444);
f2298c04
JA
174MODULE_PARM_DESC(nr_devices, "Number of devices to register");
175
2984c868 176static bool g_blocking;
5657a819 177module_param_named(blocking, g_blocking, bool, 0444);
db5bcf87
JA
178MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
179
14509b74
SK
180static bool g_shared_tags;
181module_param_named(shared_tags, g_shared_tags, bool, 0444);
82f402fe
JA
182MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
183
0905053b
JG
184static bool g_shared_tag_bitmap;
185module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
186MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
187
2984c868 188static int g_irqmode = NULL_IRQ_SOFTIRQ;
709c8667
MB
189
190static int null_set_irqmode(const char *str, const struct kernel_param *kp)
191{
2984c868 192 return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
709c8667
MB
193 NULL_IRQ_TIMER);
194}
195
9c27847d 196static const struct kernel_param_ops null_irqmode_param_ops = {
709c8667
MB
197 .set = null_set_irqmode,
198 .get = param_get_int,
199};
200
5657a819 201device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
f2298c04
JA
202MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
203
2984c868 204static unsigned long g_completion_nsec = 10000;
5657a819 205module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
f2298c04
JA
206MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
207
2984c868 208static int g_hw_queue_depth = 64;
5657a819 209module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
f2298c04
JA
210MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
211
2984c868 212static bool g_use_per_node_hctx;
5657a819 213module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
20005244 214MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
f2298c04 215
058efe00
VF
216static bool g_memory_backed;
217module_param_named(memory_backed, g_memory_backed, bool, 0444);
218MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
219
220static bool g_discard;
221module_param_named(discard, g_discard, bool, 0444);
222MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
223
224static unsigned long g_cache_size;
225module_param_named(cache_size, g_cache_size, ulong, 0444);
226MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
227
f4f84586
DLM
228static bool g_fua = true;
229module_param_named(fua, g_fua, bool, 0444);
230MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
231
058efe00
VF
232static unsigned int g_mbps;
233module_param_named(mbps, g_mbps, uint, 0444);
234MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
235
ca4b2a01
MB
236static bool g_zoned;
237module_param_named(zoned, g_zoned, bool, S_IRUGO);
238MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
239
240static unsigned long g_zone_size = 256;
241module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
242MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
243
089565fb
AR
244static unsigned long g_zone_capacity;
245module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
246MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
247
ea2c18e1
MS
248static unsigned int g_zone_nr_conv;
249module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
250MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
251
dc4d137e
NC
252static unsigned int g_zone_max_open;
253module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
254MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
255
256static unsigned int g_zone_max_active;
257module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
258MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
259
997a1f08
DLM
260static int g_zone_append_max_sectors = INT_MAX;
261module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
262MODULE_PARM_DESC(zone_append_max_sectors,
263 "Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
264
3bf2bd20
SL
265static struct nullb_device *null_alloc_dev(void);
266static void null_free_dev(struct nullb_device *dev);
cedcafad
SL
267static void null_del_dev(struct nullb *nullb);
268static int null_add_dev(struct nullb_device *dev);
49c3b926 269static struct nullb *null_find_dev_by_name(const char *name);
deb78b41 270static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
3bf2bd20
SL
271
272static inline struct nullb_device *to_nullb_device(struct config_item *item)
273{
bb4c19e0 274 return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
3bf2bd20
SL
275}
276
277static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
278{
279 return snprintf(page, PAGE_SIZE, "%u\n", val);
280}
281
282static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
283 char *page)
284{
285 return snprintf(page, PAGE_SIZE, "%lu\n", val);
286}
287
288static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
289{
290 return snprintf(page, PAGE_SIZE, "%u\n", val);
291}
292
293static ssize_t nullb_device_uint_attr_store(unsigned int *val,
294 const char *page, size_t count)
295{
296 unsigned int tmp;
297 int result;
298
299 result = kstrtouint(page, 0, &tmp);
45919fbf 300 if (result < 0)
3bf2bd20
SL
301 return result;
302
303 *val = tmp;
304 return count;
305}
306
307static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
308 const char *page, size_t count)
309{
310 int result;
311 unsigned long tmp;
312
313 result = kstrtoul(page, 0, &tmp);
45919fbf 314 if (result < 0)
3bf2bd20
SL
315 return result;
316
317 *val = tmp;
318 return count;
319}
320
321static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
322 size_t count)
323{
324 bool tmp;
325 int result;
326
327 result = kstrtobool(page, &tmp);
45919fbf 328 if (result < 0)
3bf2bd20
SL
329 return result;
330
331 *val = tmp;
332 return count;
333}
334
335/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
ca0a95a6
AM
336#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
337static ssize_t \
338nullb_device_##NAME##_show(struct config_item *item, char *page) \
339{ \
340 return nullb_device_##TYPE##_attr_show( \
341 to_nullb_device(item)->NAME, page); \
342} \
343static ssize_t \
344nullb_device_##NAME##_store(struct config_item *item, const char *page, \
345 size_t count) \
346{ \
347 int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
348 struct nullb_device *dev = to_nullb_device(item); \
b9853b4d 349 TYPE new_value = 0; \
ca0a95a6
AM
350 int ret; \
351 \
352 ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
353 if (ret < 0) \
354 return ret; \
355 if (apply_fn) \
356 ret = apply_fn(dev, new_value); \
357 else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
358 ret = -EBUSY; \
359 if (ret < 0) \
360 return ret; \
361 dev->NAME = new_value; \
362 return count; \
363} \
3bf2bd20
SL
364CONFIGFS_ATTR(nullb_device_, NAME);
365
15dfc662
SK
366static int nullb_update_nr_hw_queues(struct nullb_device *dev,
367 unsigned int submit_queues,
368 unsigned int poll_queues)
369
45919fbf 370{
45919fbf 371 struct blk_mq_tag_set *set;
15dfc662 372 int ret, nr_hw_queues;
45919fbf 373
15dfc662 374 if (!dev->nullb)
45919fbf
BVA
375 return 0;
376
15dfc662 377 /*
2bfdbe8b 378 * Make sure at least one submit queue exists.
15dfc662 379 */
2bfdbe8b 380 if (!submit_queues)
15dfc662
SK
381 return -EINVAL;
382
78b10be2
BVA
383 /*
384 * Make sure that null_init_hctx() does not access nullb->queues[] past
385 * the end of that array.
386 */
15dfc662 387 if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues)
78b10be2 388 return -EINVAL;
15dfc662
SK
389
390 /*
391 * Keep previous and new queue numbers in nullb_device for reference in
392 * the call back function null_map_queues().
393 */
394 dev->prev_submit_queues = dev->submit_queues;
395 dev->prev_poll_queues = dev->poll_queues;
396 dev->submit_queues = submit_queues;
397 dev->poll_queues = poll_queues;
398
399 set = dev->nullb->tag_set;
400 nr_hw_queues = submit_queues + poll_queues;
401 blk_mq_update_nr_hw_queues(set, nr_hw_queues);
402 ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM;
403
404 if (ret) {
405 /* on error, revert the queue numbers */
406 dev->submit_queues = dev->prev_submit_queues;
407 dev->poll_queues = dev->prev_poll_queues;
408 }
409
410 return ret;
411}
412
413static int nullb_apply_submit_queues(struct nullb_device *dev,
414 unsigned int submit_queues)
415{
416 return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
417}
418
419static int nullb_apply_poll_queues(struct nullb_device *dev,
420 unsigned int poll_queues)
421{
422 return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
45919fbf
BVA
423}
424
425NULLB_DEVICE_ATTR(size, ulong, NULL);
426NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
427NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
15dfc662 428NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues);
45919fbf
BVA
429NULLB_DEVICE_ATTR(home_node, uint, NULL);
430NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
431NULLB_DEVICE_ATTR(blocksize, uint, NULL);
ea17fd35 432NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
45919fbf
BVA
433NULLB_DEVICE_ATTR(irqmode, uint, NULL);
434NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
435NULLB_DEVICE_ATTR(index, uint, NULL);
436NULLB_DEVICE_ATTR(blocking, bool, NULL);
437NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
438NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
439NULLB_DEVICE_ATTR(discard, bool, NULL);
440NULLB_DEVICE_ATTR(mbps, uint, NULL);
441NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
442NULLB_DEVICE_ATTR(zoned, bool, NULL);
443NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
089565fb 444NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
45919fbf 445NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
dc4d137e
NC
446NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
447NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
997a1f08 448NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
cee1b215 449NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
7012eef5 450NULLB_DEVICE_ATTR(no_sched, bool, NULL);
14509b74 451NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
7012eef5 452NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
f4f84586 453NULLB_DEVICE_ATTR(fua, bool, NULL);
3bf2bd20 454
cedcafad
SL
455static ssize_t nullb_device_power_show(struct config_item *item, char *page)
456{
457 return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
458}
459
460static ssize_t nullb_device_power_store(struct config_item *item,
461 const char *page, size_t count)
462{
463 struct nullb_device *dev = to_nullb_device(item);
464 bool newp = false;
465 ssize_t ret;
466
467 ret = nullb_device_bool_attr_store(&newp, page, count);
468 if (ret < 0)
469 return ret;
470
471 if (!dev->power && newp) {
472 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
473 return count;
a75110c3
CK
474 ret = null_add_dev(dev);
475 if (ret) {
cedcafad 476 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
a75110c3 477 return ret;
cedcafad
SL
478 }
479
480 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
481 dev->power = newp;
b3c30512 482 } else if (dev->power && !newp) {
7602843f
BL
483 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
484 mutex_lock(&lock);
485 dev->power = newp;
486 null_del_dev(dev->nullb);
487 mutex_unlock(&lock);
488 }
00a8cdb8 489 clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
cedcafad
SL
490 }
491
492 return count;
493}
494
495CONFIGFS_ATTR(nullb_device_, power);
496
2f54a613
SL
497static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
498{
499 struct nullb_device *t_dev = to_nullb_device(item);
500
501 return badblocks_show(&t_dev->badblocks, page, 0);
502}
503
504static ssize_t nullb_device_badblocks_store(struct config_item *item,
505 const char *page, size_t count)
506{
507 struct nullb_device *t_dev = to_nullb_device(item);
508 char *orig, *buf, *tmp;
509 u64 start, end;
510 int ret;
511
512 orig = kstrndup(page, count, GFP_KERNEL);
513 if (!orig)
514 return -ENOMEM;
515
516 buf = strstrip(orig);
517
518 ret = -EINVAL;
519 if (buf[0] != '+' && buf[0] != '-')
520 goto out;
521 tmp = strchr(&buf[1], '-');
522 if (!tmp)
523 goto out;
524 *tmp = '\0';
525 ret = kstrtoull(buf + 1, 0, &start);
526 if (ret)
527 goto out;
528 ret = kstrtoull(tmp + 1, 0, &end);
529 if (ret)
530 goto out;
531 ret = -EINVAL;
532 if (start > end)
533 goto out;
534 /* enable badblocks */
535 cmpxchg(&t_dev->badblocks.shift, -1, 0);
536 if (buf[0] == '+')
537 ret = badblocks_set(&t_dev->badblocks, start,
538 end - start + 1, 1);
539 else
540 ret = badblocks_clear(&t_dev->badblocks, start,
541 end - start + 1);
542 if (ret == 0)
543 ret = count;
544out:
545 kfree(orig);
546 return ret;
547}
548CONFIGFS_ATTR(nullb_device_, badblocks);
549
d3a57388
SK
550static ssize_t nullb_device_zone_readonly_store(struct config_item *item,
551 const char *page, size_t count)
552{
553 struct nullb_device *dev = to_nullb_device(item);
554
555 return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY);
556}
557CONFIGFS_ATTR_WO(nullb_device_, zone_readonly);
558
559static ssize_t nullb_device_zone_offline_store(struct config_item *item,
560 const char *page, size_t count)
561{
562 struct nullb_device *dev = to_nullb_device(item);
563
564 return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE);
565}
566CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
567
3bf2bd20
SL
568static struct configfs_attribute *nullb_device_attrs[] = {
569 &nullb_device_attr_size,
570 &nullb_device_attr_completion_nsec,
571 &nullb_device_attr_submit_queues,
0a593fbb 572 &nullb_device_attr_poll_queues,
3bf2bd20
SL
573 &nullb_device_attr_home_node,
574 &nullb_device_attr_queue_mode,
575 &nullb_device_attr_blocksize,
ea17fd35 576 &nullb_device_attr_max_sectors,
3bf2bd20
SL
577 &nullb_device_attr_irqmode,
578 &nullb_device_attr_hw_queue_depth,
cedcafad 579 &nullb_device_attr_index,
3bf2bd20
SL
580 &nullb_device_attr_blocking,
581 &nullb_device_attr_use_per_node_hctx,
cedcafad 582 &nullb_device_attr_power,
5bcd0e0c 583 &nullb_device_attr_memory_backed,
306eb6b4 584 &nullb_device_attr_discard,
eff2c4f1 585 &nullb_device_attr_mbps,
deb78b41 586 &nullb_device_attr_cache_size,
2f54a613 587 &nullb_device_attr_badblocks,
ca4b2a01
MB
588 &nullb_device_attr_zoned,
589 &nullb_device_attr_zone_size,
089565fb 590 &nullb_device_attr_zone_capacity,
ea2c18e1 591 &nullb_device_attr_zone_nr_conv,
dc4d137e
NC
592 &nullb_device_attr_zone_max_open,
593 &nullb_device_attr_zone_max_active,
997a1f08 594 &nullb_device_attr_zone_append_max_sectors,
d3a57388
SK
595 &nullb_device_attr_zone_readonly,
596 &nullb_device_attr_zone_offline,
cee1b215 597 &nullb_device_attr_virt_boundary,
7012eef5 598 &nullb_device_attr_no_sched,
14509b74 599 &nullb_device_attr_shared_tags,
7012eef5 600 &nullb_device_attr_shared_tag_bitmap,
f4f84586 601 &nullb_device_attr_fua,
3bf2bd20
SL
602 NULL,
603};
604
605static void nullb_device_release(struct config_item *item)
606{
5bcd0e0c
SL
607 struct nullb_device *dev = to_nullb_device(item);
608
deb78b41 609 null_free_device_storage(dev, false);
5bcd0e0c 610 null_free_dev(dev);
3bf2bd20
SL
611}
612
613static struct configfs_item_operations nullb_device_ops = {
614 .release = nullb_device_release,
615};
616
e1919dff 617static const struct config_item_type nullb_device_type = {
3bf2bd20
SL
618 .ct_item_ops = &nullb_device_ops,
619 .ct_attrs = nullb_device_attrs,
620 .ct_owner = THIS_MODULE,
621};
622
bb4c19e0
AM
623#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
624
625static void nullb_add_fault_config(struct nullb_device *dev)
626{
627 fault_config_init(&dev->timeout_config, "timeout_inject");
628 fault_config_init(&dev->requeue_config, "requeue_inject");
629 fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
630
631 configfs_add_default_group(&dev->timeout_config.group, &dev->group);
632 configfs_add_default_group(&dev->requeue_config.group, &dev->group);
633 configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
634}
635
636#else
637
638static void nullb_add_fault_config(struct nullb_device *dev)
639{
640}
641
642#endif
643
3bf2bd20 644static struct
bb4c19e0 645config_group *nullb_group_make_group(struct config_group *group, const char *name)
3bf2bd20
SL
646{
647 struct nullb_device *dev;
648
49c3b926
DLM
649 if (null_find_dev_by_name(name))
650 return ERR_PTR(-EEXIST);
651
3bf2bd20
SL
652 dev = null_alloc_dev();
653 if (!dev)
654 return ERR_PTR(-ENOMEM);
655
bb4c19e0
AM
656 config_group_init_type_name(&dev->group, name, &nullb_device_type);
657 nullb_add_fault_config(dev);
3bf2bd20 658
bb4c19e0 659 return &dev->group;
3bf2bd20
SL
660}
661
662static void
663nullb_group_drop_item(struct config_group *group, struct config_item *item)
664{
cedcafad
SL
665 struct nullb_device *dev = to_nullb_device(item);
666
667 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
668 mutex_lock(&lock);
669 dev->power = false;
670 null_del_dev(dev->nullb);
671 mutex_unlock(&lock);
672 }
673
3bf2bd20
SL
674 config_item_put(item);
675}
676
677static ssize_t memb_group_features_show(struct config_item *item, char *page)
678{
089565fb 679 return snprintf(page, PAGE_SIZE,
f4f84586 680 "badblocks,blocking,blocksize,cache_size,fua,"
7012eef5
VF
681 "completion_nsec,discard,home_node,hw_queue_depth,"
682 "irqmode,max_sectors,mbps,memory_backed,no_sched,"
14509b74
SK
683 "poll_queues,power,queue_mode,shared_tag_bitmap,"
684 "shared_tags,size,submit_queues,use_per_node_hctx,"
685 "virt_boundary,zoned,zone_capacity,zone_max_active,"
686 "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
997a1f08 687 "zone_size,zone_append_max_sectors\n");
3bf2bd20
SL
688}
689
690CONFIGFS_ATTR_RO(memb_group_, features);
691
692static struct configfs_attribute *nullb_group_attrs[] = {
693 &memb_group_attr_features,
694 NULL,
695};
696
697static struct configfs_group_operations nullb_group_ops = {
bb4c19e0 698 .make_group = nullb_group_make_group,
3bf2bd20
SL
699 .drop_item = nullb_group_drop_item,
700};
701
e1919dff 702static const struct config_item_type nullb_group_type = {
3bf2bd20
SL
703 .ct_group_ops = &nullb_group_ops,
704 .ct_attrs = nullb_group_attrs,
705 .ct_owner = THIS_MODULE,
706};
707
708static struct configfs_subsystem nullb_subsys = {
709 .su_group = {
710 .cg_item = {
711 .ci_namebuf = "nullb",
712 .ci_type = &nullb_group_type,
713 },
714 },
715};
716
deb78b41
SL
717static inline int null_cache_active(struct nullb *nullb)
718{
719 return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
720}
721
2984c868
SL
722static struct nullb_device *null_alloc_dev(void)
723{
724 struct nullb_device *dev;
725
726 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
727 if (!dev)
728 return NULL;
bb4c19e0
AM
729
730#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
731 dev->timeout_config.attr = null_timeout_attr;
732 dev->requeue_config.attr = null_requeue_attr;
733 dev->init_hctx_fault_config.attr = null_init_hctx_attr;
734#endif
735
5bcd0e0c 736 INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
deb78b41 737 INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
2f54a613
SL
738 if (badblocks_init(&dev->badblocks, 0)) {
739 kfree(dev);
740 return NULL;
741 }
742
2984c868
SL
743 dev->size = g_gb * 1024;
744 dev->completion_nsec = g_completion_nsec;
745 dev->submit_queues = g_submit_queues;
15dfc662 746 dev->prev_submit_queues = g_submit_queues;
0a593fbb 747 dev->poll_queues = g_poll_queues;
15dfc662 748 dev->prev_poll_queues = g_poll_queues;
2984c868
SL
749 dev->home_node = g_home_node;
750 dev->queue_mode = g_queue_mode;
751 dev->blocksize = g_bs;
ea17fd35 752 dev->max_sectors = g_max_sectors;
2984c868
SL
753 dev->irqmode = g_irqmode;
754 dev->hw_queue_depth = g_hw_queue_depth;
2984c868 755 dev->blocking = g_blocking;
058efe00
VF
756 dev->memory_backed = g_memory_backed;
757 dev->discard = g_discard;
758 dev->cache_size = g_cache_size;
759 dev->mbps = g_mbps;
2984c868 760 dev->use_per_node_hctx = g_use_per_node_hctx;
ca4b2a01
MB
761 dev->zoned = g_zoned;
762 dev->zone_size = g_zone_size;
089565fb 763 dev->zone_capacity = g_zone_capacity;
ea2c18e1 764 dev->zone_nr_conv = g_zone_nr_conv;
dc4d137e
NC
765 dev->zone_max_open = g_zone_max_open;
766 dev->zone_max_active = g_zone_max_active;
997a1f08 767 dev->zone_append_max_sectors = g_zone_append_max_sectors;
cee1b215 768 dev->virt_boundary = g_virt_boundary;
7012eef5 769 dev->no_sched = g_no_sched;
14509b74 770 dev->shared_tags = g_shared_tags;
7012eef5 771 dev->shared_tag_bitmap = g_shared_tag_bitmap;
f4f84586
DLM
772 dev->fua = g_fua;
773
2984c868
SL
774 return dev;
775}
776
777static void null_free_dev(struct nullb_device *dev)
778{
1addb798
DD
779 if (!dev)
780 return;
781
d205bde7 782 null_free_zoned_dev(dev);
1addb798 783 badblocks_exit(&dev->badblocks);
2984c868
SL
784 kfree(dev);
785}
786
cf8ecc5a
AA
787static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
788{
8b631f9c 789 struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
f2298c04 790
8b631f9c 791 blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
f2298c04
JA
792 return HRTIMER_NORESTART;
793}
794
795static void null_cmd_end_timer(struct nullb_cmd *cmd)
796{
2984c868 797 ktime_t kt = cmd->nq->dev->completion_nsec;
f2298c04 798
3c395a96 799 hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
f2298c04
JA
800}
801
49f66136 802static void null_complete_rq(struct request *rq)
f2298c04 803{
8b631f9c
CH
804 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
805
806 blk_mq_end_request(rq, cmd->error);
f2298c04
JA
807}
808
c90b6b50 809static struct nullb_page *null_alloc_page(void)
5bcd0e0c
SL
810{
811 struct nullb_page *t_page;
812
c90b6b50 813 t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
5bcd0e0c 814 if (!t_page)
df00b1d2 815 return NULL;
5bcd0e0c 816
c90b6b50 817 t_page->page = alloc_pages(GFP_NOIO, 0);
df00b1d2
CK
818 if (!t_page->page) {
819 kfree(t_page);
820 return NULL;
821 }
5bcd0e0c 822
66231ad3 823 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
5bcd0e0c 824 return t_page;
5bcd0e0c
SL
825}
826
827static void null_free_page(struct nullb_page *t_page)
828{
66231ad3
ML
829 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
830 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
deb78b41 831 return;
5bcd0e0c
SL
832 __free_page(t_page->page);
833 kfree(t_page);
834}
835
66231ad3
ML
836static bool null_page_empty(struct nullb_page *page)
837{
838 int size = MAP_SZ - 2;
839
840 return find_first_bit(page->bitmap, size) == size;
841}
842
deb78b41
SL
843static void null_free_sector(struct nullb *nullb, sector_t sector,
844 bool is_cache)
5bcd0e0c
SL
845{
846 unsigned int sector_bit;
847 u64 idx;
848 struct nullb_page *t_page, *ret;
849 struct radix_tree_root *root;
850
deb78b41 851 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
852 idx = sector >> PAGE_SECTORS_SHIFT;
853 sector_bit = (sector & SECTOR_MASK);
854
855 t_page = radix_tree_lookup(root, idx);
856 if (t_page) {
66231ad3 857 __clear_bit(sector_bit, t_page->bitmap);
5bcd0e0c 858
66231ad3 859 if (null_page_empty(t_page)) {
5bcd0e0c
SL
860 ret = radix_tree_delete_item(root, idx, t_page);
861 WARN_ON(ret != t_page);
862 null_free_page(ret);
deb78b41
SL
863 if (is_cache)
864 nullb->dev->curr_cache -= PAGE_SIZE;
5bcd0e0c
SL
865 }
866 }
867}
868
869static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
deb78b41 870 struct nullb_page *t_page, bool is_cache)
5bcd0e0c
SL
871{
872 struct radix_tree_root *root;
873
deb78b41 874 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
875
876 if (radix_tree_insert(root, idx, t_page)) {
877 null_free_page(t_page);
878 t_page = radix_tree_lookup(root, idx);
879 WARN_ON(!t_page || t_page->page->index != idx);
deb78b41
SL
880 } else if (is_cache)
881 nullb->dev->curr_cache += PAGE_SIZE;
5bcd0e0c
SL
882
883 return t_page;
884}
885
deb78b41 886static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
5bcd0e0c
SL
887{
888 unsigned long pos = 0;
889 int nr_pages;
890 struct nullb_page *ret, *t_pages[FREE_BATCH];
891 struct radix_tree_root *root;
892
deb78b41 893 root = is_cache ? &dev->cache : &dev->data;
5bcd0e0c
SL
894
895 do {
896 int i;
897
898 nr_pages = radix_tree_gang_lookup(root,
899 (void **)t_pages, pos, FREE_BATCH);
900
901 for (i = 0; i < nr_pages; i++) {
902 pos = t_pages[i]->page->index;
903 ret = radix_tree_delete_item(root, pos, t_pages[i]);
904 WARN_ON(ret != t_pages[i]);
905 null_free_page(ret);
906 }
907
908 pos++;
909 } while (nr_pages == FREE_BATCH);
deb78b41
SL
910
911 if (is_cache)
912 dev->curr_cache = 0;
5bcd0e0c
SL
913}
914
deb78b41
SL
915static struct nullb_page *__null_lookup_page(struct nullb *nullb,
916 sector_t sector, bool for_write, bool is_cache)
5bcd0e0c
SL
917{
918 unsigned int sector_bit;
919 u64 idx;
920 struct nullb_page *t_page;
deb78b41 921 struct radix_tree_root *root;
5bcd0e0c
SL
922
923 idx = sector >> PAGE_SECTORS_SHIFT;
924 sector_bit = (sector & SECTOR_MASK);
925
deb78b41
SL
926 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
927 t_page = radix_tree_lookup(root, idx);
5bcd0e0c
SL
928 WARN_ON(t_page && t_page->page->index != idx);
929
66231ad3 930 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
5bcd0e0c
SL
931 return t_page;
932
933 return NULL;
934}
935
deb78b41
SL
936static struct nullb_page *null_lookup_page(struct nullb *nullb,
937 sector_t sector, bool for_write, bool ignore_cache)
938{
939 struct nullb_page *page = NULL;
940
941 if (!ignore_cache)
942 page = __null_lookup_page(nullb, sector, for_write, true);
943 if (page)
944 return page;
945 return __null_lookup_page(nullb, sector, for_write, false);
946}
947
5bcd0e0c 948static struct nullb_page *null_insert_page(struct nullb *nullb,
61884de0
JA
949 sector_t sector, bool ignore_cache)
950 __releases(&nullb->lock)
951 __acquires(&nullb->lock)
5bcd0e0c
SL
952{
953 u64 idx;
954 struct nullb_page *t_page;
955
deb78b41 956 t_page = null_lookup_page(nullb, sector, true, ignore_cache);
5bcd0e0c
SL
957 if (t_page)
958 return t_page;
959
960 spin_unlock_irq(&nullb->lock);
961
c90b6b50 962 t_page = null_alloc_page();
5bcd0e0c
SL
963 if (!t_page)
964 goto out_lock;
965
966 if (radix_tree_preload(GFP_NOIO))
967 goto out_freepage;
968
969 spin_lock_irq(&nullb->lock);
970 idx = sector >> PAGE_SECTORS_SHIFT;
971 t_page->page->index = idx;
deb78b41 972 t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
5bcd0e0c
SL
973 radix_tree_preload_end();
974
975 return t_page;
976out_freepage:
977 null_free_page(t_page);
978out_lock:
979 spin_lock_irq(&nullb->lock);
deb78b41
SL
980 return null_lookup_page(nullb, sector, true, ignore_cache);
981}
982
983static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
984{
985 int i;
986 unsigned int offset;
987 u64 idx;
988 struct nullb_page *t_page, *ret;
989 void *dst, *src;
990
991 idx = c_page->page->index;
992
993 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
994
66231ad3
ML
995 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
996 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
deb78b41 997 null_free_page(c_page);
66231ad3 998 if (t_page && null_page_empty(t_page)) {
deb78b41
SL
999 ret = radix_tree_delete_item(&nullb->dev->data,
1000 idx, t_page);
1001 null_free_page(t_page);
1002 }
1003 return 0;
1004 }
1005
1006 if (!t_page)
1007 return -ENOMEM;
1008
acc3c879
CK
1009 src = kmap_local_page(c_page->page);
1010 dst = kmap_local_page(t_page->page);
deb78b41
SL
1011
1012 for (i = 0; i < PAGE_SECTORS;
1013 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
66231ad3 1014 if (test_bit(i, c_page->bitmap)) {
deb78b41
SL
1015 offset = (i << SECTOR_SHIFT);
1016 memcpy(dst + offset, src + offset,
1017 nullb->dev->blocksize);
66231ad3 1018 __set_bit(i, t_page->bitmap);
deb78b41
SL
1019 }
1020 }
1021
acc3c879
CK
1022 kunmap_local(dst);
1023 kunmap_local(src);
deb78b41
SL
1024
1025 ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
1026 null_free_page(ret);
1027 nullb->dev->curr_cache -= PAGE_SIZE;
1028
1029 return 0;
1030}
1031
1032static int null_make_cache_space(struct nullb *nullb, unsigned long n)
f2298c04 1033{
deb78b41
SL
1034 int i, err, nr_pages;
1035 struct nullb_page *c_pages[FREE_BATCH];
1036 unsigned long flushed = 0, one_round;
1037
1038again:
1039 if ((nullb->dev->cache_size * 1024 * 1024) >
1040 nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
1041 return 0;
1042
1043 nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
1044 (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
1045 /*
1046 * nullb_flush_cache_page could unlock before using the c_pages. To
1047 * avoid race, we don't allow page free
1048 */
1049 for (i = 0; i < nr_pages; i++) {
1050 nullb->cache_flush_pos = c_pages[i]->page->index;
1051 /*
1052 * We found the page which is being flushed to disk by other
1053 * threads
1054 */
66231ad3 1055 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
deb78b41
SL
1056 c_pages[i] = NULL;
1057 else
66231ad3 1058 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
deb78b41
SL
1059 }
1060
1061 one_round = 0;
1062 for (i = 0; i < nr_pages; i++) {
1063 if (c_pages[i] == NULL)
1064 continue;
1065 err = null_flush_cache_page(nullb, c_pages[i]);
1066 if (err)
1067 return err;
1068 one_round++;
1069 }
1070 flushed += one_round << PAGE_SHIFT;
1071
1072 if (n > flushed) {
1073 if (nr_pages == 0)
1074 nullb->cache_flush_pos = 0;
1075 if (one_round == 0) {
1076 /* give other threads a chance */
1077 spin_unlock_irq(&nullb->lock);
1078 spin_lock_irq(&nullb->lock);
1079 }
1080 goto again;
1081 }
1082 return 0;
5bcd0e0c
SL
1083}
1084
1085static int copy_to_nullb(struct nullb *nullb, struct page *source,
deb78b41 1086 unsigned int off, sector_t sector, size_t n, bool is_fua)
5bcd0e0c
SL
1087{
1088 size_t temp, count = 0;
1089 unsigned int offset;
1090 struct nullb_page *t_page;
5bcd0e0c
SL
1091
1092 while (count < n) {
1093 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1094
deb78b41
SL
1095 if (null_cache_active(nullb) && !is_fua)
1096 null_make_cache_space(nullb, PAGE_SIZE);
1097
5bcd0e0c 1098 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1099 t_page = null_insert_page(nullb, sector,
1100 !null_cache_active(nullb) || is_fua);
5bcd0e0c
SL
1101 if (!t_page)
1102 return -ENOSPC;
1103
fbb5615f 1104 memcpy_page(t_page->page, offset, source, off + count, temp);
5bcd0e0c 1105
66231ad3 1106 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
5bcd0e0c 1107
deb78b41
SL
1108 if (is_fua)
1109 null_free_sector(nullb, sector, true);
1110
5bcd0e0c
SL
1111 count += temp;
1112 sector += temp >> SECTOR_SHIFT;
1113 }
1114 return 0;
1115}
1116
1117static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1118 unsigned int off, sector_t sector, size_t n)
1119{
1120 size_t temp, count = 0;
1121 unsigned int offset;
1122 struct nullb_page *t_page;
5bcd0e0c
SL
1123
1124 while (count < n) {
1125 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1126
1127 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1128 t_page = null_lookup_page(nullb, sector, false,
1129 !null_cache_active(nullb));
5bcd0e0c 1130
fbb5615f
CK
1131 if (t_page)
1132 memcpy_page(dest, off + count, t_page->page, offset,
1133 temp);
1134 else
1135 zero_user(dest, off + count, temp);
5bcd0e0c
SL
1136
1137 count += temp;
1138 sector += temp >> SECTOR_SHIFT;
1139 }
1140 return 0;
1141}
1142
dd85b492
AJ
1143static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
1144 unsigned int len, unsigned int off)
1145{
fbb5615f 1146 memset_page(page, off, 0xff, len);
dd85b492
AJ
1147}
1148
0ec4d913
DLM
1149blk_status_t null_handle_discard(struct nullb_device *dev,
1150 sector_t sector, sector_t nr_sectors)
306eb6b4 1151{
49c7089f
DLM
1152 struct nullb *nullb = dev->nullb;
1153 size_t n = nr_sectors << SECTOR_SHIFT;
306eb6b4
SL
1154 size_t temp;
1155
1156 spin_lock_irq(&nullb->lock);
1157 while (n > 0) {
49c7089f 1158 temp = min_t(size_t, n, dev->blocksize);
deb78b41
SL
1159 null_free_sector(nullb, sector, false);
1160 if (null_cache_active(nullb))
1161 null_free_sector(nullb, sector, true);
306eb6b4
SL
1162 sector += temp >> SECTOR_SHIFT;
1163 n -= temp;
1164 }
1165 spin_unlock_irq(&nullb->lock);
49c7089f
DLM
1166
1167 return BLK_STS_OK;
306eb6b4
SL
1168}
1169
cb9e5273 1170static blk_status_t null_handle_flush(struct nullb *nullb)
deb78b41
SL
1171{
1172 int err;
1173
1174 if (!null_cache_active(nullb))
1175 return 0;
1176
1177 spin_lock_irq(&nullb->lock);
1178 while (true) {
1179 err = null_make_cache_space(nullb,
1180 nullb->dev->cache_size * 1024 * 1024);
1181 if (err || nullb->dev->curr_cache == 0)
1182 break;
1183 }
1184
1185 WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1186 spin_unlock_irq(&nullb->lock);
cb9e5273 1187 return errno_to_blk_status(err);
deb78b41
SL
1188}
1189
5bcd0e0c 1190static int null_transfer(struct nullb *nullb, struct page *page,
deb78b41
SL
1191 unsigned int len, unsigned int off, bool is_write, sector_t sector,
1192 bool is_fua)
f2298c04 1193{
dd85b492
AJ
1194 struct nullb_device *dev = nullb->dev;
1195 unsigned int valid_len = len;
5bcd0e0c
SL
1196 int err = 0;
1197
1198 if (!is_write) {
dd85b492
AJ
1199 if (dev->zoned)
1200 valid_len = null_zone_valid_read_len(nullb,
1201 sector, len);
1202
1203 if (valid_len) {
1204 err = copy_from_nullb(nullb, page, off,
1205 sector, valid_len);
1206 off += valid_len;
1207 len -= valid_len;
1208 }
1209
1210 if (len)
1211 nullb_fill_pattern(nullb, page, len, off);
5bcd0e0c
SL
1212 flush_dcache_page(page);
1213 } else {
1214 flush_dcache_page(page);
deb78b41 1215 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
5bcd0e0c
SL
1216 }
1217
1218 return err;
1219}
1220
1221static int null_handle_rq(struct nullb_cmd *cmd)
1222{
8b631f9c 1223 struct request *rq = blk_mq_rq_from_pdu(cmd);
5bcd0e0c 1224 struct nullb *nullb = cmd->nq->dev->nullb;
cb9e5273 1225 int err = 0;
5bcd0e0c 1226 unsigned int len;
49c7089f 1227 sector_t sector = blk_rq_pos(rq);
5bcd0e0c
SL
1228 struct req_iterator iter;
1229 struct bio_vec bvec;
1230
5bcd0e0c
SL
1231 spin_lock_irq(&nullb->lock);
1232 rq_for_each_segment(bvec, rq, iter) {
1233 len = bvec.bv_len;
1234 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
deb78b41 1235 op_is_write(req_op(rq)), sector,
2d62e6b0 1236 rq->cmd_flags & REQ_FUA);
cb9e5273
DLM
1237 if (err)
1238 break;
5bcd0e0c
SL
1239 sector += len >> SECTOR_SHIFT;
1240 }
1241 spin_unlock_irq(&nullb->lock);
1242
cb9e5273 1243 return errno_to_blk_status(err);
5bcd0e0c
SL
1244}
1245
adb84284
CK
1246static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
1247{
1248 struct nullb_device *dev = cmd->nq->dev;
1249 struct nullb *nullb = dev->nullb;
1250 blk_status_t sts = BLK_STS_OK;
8b631f9c 1251 struct request *rq = blk_mq_rq_from_pdu(cmd);
adb84284
CK
1252
1253 if (!hrtimer_active(&nullb->bw_timer))
1254 hrtimer_restart(&nullb->bw_timer);
1255
1256 if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
8b631f9c 1257 blk_mq_stop_hw_queues(nullb->q);
adb84284
CK
1258 /* race with timer */
1259 if (atomic_long_read(&nullb->cur_bytes) > 0)
8b631f9c 1260 blk_mq_start_stopped_hw_queues(nullb->q, true);
adb84284
CK
1261 /* requeue request */
1262 sts = BLK_STS_DEV_RESOURCE;
1263 }
1264 return sts;
1265}
1266
8f94d1c1
CK
1267static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
1268 sector_t sector,
1269 sector_t nr_sectors)
1270{
1271 struct badblocks *bb = &cmd->nq->dev->badblocks;
1272 sector_t first_bad;
1273 int bad_sectors;
1274
1275 if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
1276 return BLK_STS_IOERR;
1277
1278 return BLK_STS_OK;
1279}
1280
7ea88e22 1281static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
ff07a02e 1282 enum req_op op,
49c7089f
DLM
1283 sector_t sector,
1284 sector_t nr_sectors)
7ea88e22
CK
1285{
1286 struct nullb_device *dev = cmd->nq->dev;
7ea88e22 1287
49c7089f
DLM
1288 if (op == REQ_OP_DISCARD)
1289 return null_handle_discard(dev, sector, nr_sectors);
1290
cb9e5273 1291 return null_handle_rq(cmd);
7ea88e22
CK
1292}
1293
cecbc9ce
BVA
1294static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
1295{
8b631f9c 1296 struct request *rq = blk_mq_rq_from_pdu(cmd);
cecbc9ce
BVA
1297 struct nullb_device *dev = cmd->nq->dev;
1298 struct bio *bio;
1299
8b631f9c
CH
1300 if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
1301 __rq_for_each_bio(bio, rq)
cecbc9ce
BVA
1302 zero_fill_bio(bio);
1303 }
1304}
1305
a3d7d674
CK
1306static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
1307{
8b631f9c
CH
1308 struct request *rq = blk_mq_rq_from_pdu(cmd);
1309
cecbc9ce
BVA
1310 /*
1311 * Since root privileges are required to configure the null_blk
1312 * driver, it is fine that this driver does not initialize the
1313 * data buffers of read commands. Zero-initialize these buffers
1314 * anyway if KMSAN is enabled to prevent that KMSAN complains
1315 * about null_blk not initializing read data buffers.
1316 */
1317 if (IS_ENABLED(CONFIG_KMSAN))
1318 nullb_zero_read_cmd_buffer(cmd);
1319
a3d7d674
CK
1320 /* Complete IO by inline, softirq or timer */
1321 switch (cmd->nq->dev->irqmode) {
1322 case NULL_IRQ_SOFTIRQ:
8b631f9c 1323 blk_mq_complete_request(rq);
a3d7d674
CK
1324 break;
1325 case NULL_IRQ_NONE:
8b631f9c 1326 blk_mq_end_request(rq, cmd->error);
a3d7d674
CK
1327 break;
1328 case NULL_IRQ_TIMER:
1329 null_cmd_end_timer(cmd);
1330 break;
1331 }
1332}
1333
ff07a02e
BVA
1334blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
1335 sector_t sector, unsigned int nr_sectors)
9dd44c7e
DLM
1336{
1337 struct nullb_device *dev = cmd->nq->dev;
1338 blk_status_t ret;
1339
1340 if (dev->badblocks.shift != -1) {
1341 ret = null_handle_badblocks(cmd, sector, nr_sectors);
1342 if (ret != BLK_STS_OK)
1343 return ret;
1344 }
1345
1346 if (dev->memory_backed)
49c7089f 1347 return null_handle_memory_backed(cmd, op, sector, nr_sectors);
9dd44c7e
DLM
1348
1349 return BLK_STS_OK;
1350}
1351
53f2bca2
CZ
1352static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
1353 sector_t nr_sectors, enum req_op op)
5bcd0e0c
SL
1354{
1355 struct nullb_device *dev = cmd->nq->dev;
eff2c4f1 1356 struct nullb *nullb = dev->nullb;
adb84284 1357 blk_status_t sts;
5bcd0e0c 1358
d4b186ed 1359 if (op == REQ_OP_FLUSH) {
cb9e5273 1360 cmd->error = null_handle_flush(nullb);
d4b186ed
CK
1361 goto out;
1362 }
d4b186ed 1363
9dd44c7e 1364 if (dev->zoned)
de3510e5 1365 sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
9dd44c7e 1366 else
de3510e5
DLM
1367 sts = null_process_cmd(cmd, op, sector, nr_sectors);
1368
1369 /* Do not overwrite errors (e.g. timeout errors) */
1370 if (cmd->error == BLK_STS_OK)
1371 cmd->error = sts;
fceb5d1b 1372
2f54a613 1373out:
a3d7d674 1374 nullb_complete_cmd(cmd);
f2298c04
JA
1375}
1376
eff2c4f1
SL
1377static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1378{
1379 struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1380 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1381 unsigned int mbps = nullb->dev->mbps;
1382
1383 if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1384 return HRTIMER_NORESTART;
1385
1386 atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
8b631f9c 1387 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1388
1389 hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1390
1391 return HRTIMER_RESTART;
1392}
1393
1394static void nullb_setup_bwtimer(struct nullb *nullb)
1395{
1396 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1397
1398 hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1399 nullb->bw_timer.function = nullb_bwtimer_fn;
1400 atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1401 hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
f2298c04
JA
1402}
1403
bb4c19e0
AM
1404#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1405
1406static bool should_timeout_request(struct request *rq)
1407{
1408 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1409 struct nullb_device *dev = cmd->nq->dev;
1410
1411 return should_fail(&dev->timeout_config.attr, 1);
1412}
1413
1414static bool should_requeue_request(struct request *rq)
1415{
1416 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1417 struct nullb_device *dev = cmd->nq->dev;
1418
1419 return should_fail(&dev->requeue_config.attr, 1);
1420}
1421
1422static bool should_init_hctx_fail(struct nullb_device *dev)
1423{
1424 return should_fail(&dev->init_hctx_fault_config.attr, 1);
1425}
1426
1427#else
1428
93b57046
JA
1429static bool should_timeout_request(struct request *rq)
1430{
24941b90
JA
1431 return false;
1432}
93b57046 1433
24941b90
JA
1434static bool should_requeue_request(struct request *rq)
1435{
93b57046
JA
1436 return false;
1437}
1438
bb4c19e0
AM
1439static bool should_init_hctx_fail(struct nullb_device *dev)
1440{
1441 return false;
1442}
1443
1444#endif
1445
a4e1d0b7 1446static void null_map_queues(struct blk_mq_tag_set *set)
0a593fbb
JA
1447{
1448 struct nullb *nullb = set->driver_data;
1449 int i, qoff;
15dfc662
SK
1450 unsigned int submit_queues = g_submit_queues;
1451 unsigned int poll_queues = g_poll_queues;
1452
1453 if (nullb) {
1454 struct nullb_device *dev = nullb->dev;
1455
1456 /*
1457 * Refer nr_hw_queues of the tag set to check if the expected
1458 * number of hardware queues are prepared. If block layer failed
1459 * to prepare them, use previous numbers of submit queues and
1460 * poll queues to map queues.
1461 */
1462 if (set->nr_hw_queues ==
1463 dev->submit_queues + dev->poll_queues) {
1464 submit_queues = dev->submit_queues;
1465 poll_queues = dev->poll_queues;
1466 } else if (set->nr_hw_queues ==
1467 dev->prev_submit_queues + dev->prev_poll_queues) {
1468 submit_queues = dev->prev_submit_queues;
1469 poll_queues = dev->prev_poll_queues;
1470 } else {
1471 pr_warn("tag set has unexpected nr_hw_queues: %d\n",
1472 set->nr_hw_queues);
10b41ea1
BVA
1473 WARN_ON_ONCE(true);
1474 submit_queues = 1;
1475 poll_queues = 0;
15dfc662
SK
1476 }
1477 }
0a593fbb
JA
1478
1479 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
1480 struct blk_mq_queue_map *map = &set->map[i];
1481
1482 switch (i) {
1483 case HCTX_TYPE_DEFAULT:
15dfc662 1484 map->nr_queues = submit_queues;
0a593fbb
JA
1485 break;
1486 case HCTX_TYPE_READ:
1487 map->nr_queues = 0;
1488 continue;
1489 case HCTX_TYPE_POLL:
15dfc662 1490 map->nr_queues = poll_queues;
0a593fbb
JA
1491 break;
1492 }
1493 map->queue_offset = qoff;
1494 qoff += map->nr_queues;
1495 blk_mq_map_queues(map);
1496 }
0a593fbb
JA
1497}
1498
1499static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
1500{
1501 struct nullb_queue *nq = hctx->driver_data;
1502 LIST_HEAD(list);
1503 int nr = 0;
5a26e45e 1504 struct request *rq;
0a593fbb
JA
1505
1506 spin_lock(&nq->poll_lock);
1507 list_splice_init(&nq->poll_list, &list);
5a26e45e
CZ
1508 list_for_each_entry(rq, &list, queuelist)
1509 blk_mq_set_request_complete(rq);
0a593fbb
JA
1510 spin_unlock(&nq->poll_lock);
1511
1512 while (!list_empty(&list)) {
1513 struct nullb_cmd *cmd;
1514 struct request *req;
1515
1516 req = list_first_entry(&list, struct request, queuelist);
1517 list_del_init(&req->queuelist);
1518 cmd = blk_mq_rq_to_pdu(req);
1519 cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
1520 blk_rq_sectors(req));
c5eafd79 1521 if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
2385ebf3 1522 blk_mq_end_request_batch))
8b631f9c 1523 blk_mq_end_request(req, cmd->error);
0a593fbb
JA
1524 nr++;
1525 }
1526
1527 return nr;
1528}
1529
9bdb4833 1530static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
5448aca4 1531{
0a593fbb 1532 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
de3510e5
DLM
1533 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1534
0a593fbb
JA
1535 if (hctx->type == HCTX_TYPE_POLL) {
1536 struct nullb_queue *nq = hctx->driver_data;
1537
1538 spin_lock(&nq->poll_lock);
5a26e45e
CZ
1539 /* The request may have completed meanwhile. */
1540 if (blk_mq_request_completed(rq)) {
1541 spin_unlock(&nq->poll_lock);
1542 return BLK_EH_DONE;
1543 }
0a593fbb
JA
1544 list_del_init(&rq->queuelist);
1545 spin_unlock(&nq->poll_lock);
1546 }
1547
5a26e45e
CZ
1548 pr_info("rq %p timed out\n", rq);
1549
de3510e5
DLM
1550 /*
1551 * If the device is marked as blocking (i.e. memory backed or zoned
1552 * device), the submission path may be blocked waiting for resources
1553 * and cause real timeouts. For these real timeouts, the submission
1554 * path will complete the request using blk_mq_complete_request().
1555 * Only fake timeouts need to execute blk_mq_complete_request() here.
1556 */
1557 cmd->error = BLK_STS_TIMEOUT;
3e3876d3 1558 if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
de3510e5 1559 blk_mq_complete_request(rq);
0df0bb08 1560 return BLK_EH_DONE;
5448aca4
JA
1561}
1562
fc17b653 1563static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
b6402014 1564 const struct blk_mq_queue_data *bd)
f2298c04 1565{
b6402014
DLM
1566 struct request *rq = bd->rq;
1567 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
2984c868 1568 struct nullb_queue *nq = hctx->driver_data;
b6402014
DLM
1569 sector_t nr_sectors = blk_rq_sectors(rq);
1570 sector_t sector = blk_rq_pos(rq);
0a593fbb 1571 const bool is_poll = hctx->type == HCTX_TYPE_POLL;
f2298c04 1572
db5bcf87
JA
1573 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1574
0a593fbb 1575 if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
3c395a96
PV
1576 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1577 cmd->timer.function = null_cmd_timer_expired;
1578 }
ff770422 1579 cmd->error = BLK_STS_OK;
2984c868 1580 cmd->nq = nq;
b6402014
DLM
1581 cmd->fake_timeout = should_timeout_request(rq) ||
1582 blk_should_fake_timeout(rq->q);
f2298c04 1583
b6402014 1584 if (should_requeue_request(rq)) {
24941b90
JA
1585 /*
1586 * Alternate between hitting the core BUSY path, and the
1587 * driver driven requeue path
1588 */
1589 nq->requeue_selection++;
1590 if (nq->requeue_selection & 1)
1591 return BLK_STS_RESOURCE;
b6402014
DLM
1592 blk_mq_requeue_request(rq, true);
1593 return BLK_STS_OK;
24941b90 1594 }
0a593fbb 1595
53f2bca2
CZ
1596 if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
1597 blk_status_t sts = null_handle_throttled(cmd);
1598
1599 if (sts != BLK_STS_OK)
1600 return sts;
1601 }
1602
1603 blk_mq_start_request(rq);
1604
0a593fbb
JA
1605 if (is_poll) {
1606 spin_lock(&nq->poll_lock);
b6402014 1607 list_add_tail(&rq->queuelist, &nq->poll_list);
0a593fbb
JA
1608 spin_unlock(&nq->poll_lock);
1609 return BLK_STS_OK;
1610 }
de3510e5 1611 if (cmd->fake_timeout)
24941b90 1612 return BLK_STS_OK;
93b57046 1613
53f2bca2
CZ
1614 null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
1615 return BLK_STS_OK;
f2298c04
JA
1616}
1617
d78bfa13
CZ
1618static void null_queue_rqs(struct request **rqlist)
1619{
1620 struct request *requeue_list = NULL;
1621 struct request **requeue_lastp = &requeue_list;
1622 struct blk_mq_queue_data bd = { };
1623 blk_status_t ret;
1624
1625 do {
1626 struct request *rq = rq_list_pop(rqlist);
1627
1628 bd.rq = rq;
1629 ret = null_queue_rq(rq->mq_hctx, &bd);
1630 if (ret != BLK_STS_OK)
1631 rq_list_add_tail(&requeue_lastp, rq);
1632 } while (!rq_list_empty(*rqlist));
1633
1634 *rqlist = requeue_list;
1635}
1636
78b10be2
BVA
1637static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1638{
78b10be2 1639 nq->dev = nullb->dev;
0a593fbb
JA
1640 INIT_LIST_HEAD(&nq->poll_list);
1641 spin_lock_init(&nq->poll_lock);
78b10be2
BVA
1642}
1643
1644static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
1645 unsigned int hctx_idx)
1646{
1647 struct nullb *nullb = hctx->queue->queuedata;
1648 struct nullb_queue *nq;
1649
bb4c19e0 1650 if (should_init_hctx_fail(nullb->dev))
596444e7 1651 return -EFAULT;
596444e7 1652
78b10be2
BVA
1653 nq = &nullb->queues[hctx_idx];
1654 hctx->driver_data = nq;
1655 null_init_queue(nullb, nq);
78b10be2
BVA
1656
1657 return 0;
1658}
1659
1660static const struct blk_mq_ops null_mq_ops = {
1661 .queue_rq = null_queue_rq,
d78bfa13 1662 .queue_rqs = null_queue_rqs,
78b10be2
BVA
1663 .complete = null_complete_rq,
1664 .timeout = null_timeout_rq,
0a593fbb
JA
1665 .poll = null_poll,
1666 .map_queues = null_map_queues,
78b10be2 1667 .init_hctx = null_init_hctx,
78b10be2
BVA
1668};
1669
9ae2d0aa
MB
1670static void null_del_dev(struct nullb *nullb)
1671{
9b03b713
BVA
1672 struct nullb_device *dev;
1673
1674 if (!nullb)
1675 return;
1676
1677 dev = nullb->dev;
2984c868 1678
95931a24 1679 ida_free(&nullb_indexes, nullb->index);
94bc02e3 1680
9ae2d0aa
MB
1681 list_del_init(&nullb->list);
1682
74ede5af 1683 del_gendisk(nullb->disk);
eff2c4f1
SL
1684
1685 if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1686 hrtimer_cancel(&nullb->bw_timer);
1687 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
8b631f9c 1688 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1689 }
1690
8b9ab626 1691 put_disk(nullb->disk);
8b631f9c 1692 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1693 blk_mq_free_tag_set(nullb->tag_set);
8b631f9c 1694 kfree(nullb->queues);
deb78b41
SL
1695 if (null_cache_active(nullb))
1696 null_free_device_storage(nullb->dev, true);
9ae2d0aa 1697 kfree(nullb);
2984c868 1698 dev->nullb = NULL;
9ae2d0aa
MB
1699}
1700
e440626b 1701static void null_config_discard(struct nullb *nullb, struct queue_limits *lim)
306eb6b4
SL
1702{
1703 if (nullb->dev->discard == false)
1704 return;
1592cd15 1705
49c7089f
DLM
1706 if (!nullb->dev->memory_backed) {
1707 nullb->dev->discard = false;
1708 pr_info("discard option is ignored without memory backing\n");
1709 return;
1710 }
1711
1592cd15
CK
1712 if (nullb->dev->zoned) {
1713 nullb->dev->discard = false;
1714 pr_info("discard option is ignored in zoned mode\n");
1715 return;
1716 }
1717
e440626b 1718 lim->max_hw_discard_sectors = UINT_MAX >> 9;
9ae2d0aa
MB
1719}
1720
8b631f9c 1721static const struct block_device_operations null_ops = {
c62b37d9 1722 .owner = THIS_MODULE,
c62b37d9
CH
1723 .report_zones = null_report_zones,
1724};
1725
f2298c04
JA
1726static int setup_queues(struct nullb *nullb)
1727{
0a593fbb
JA
1728 int nqueues = nr_cpu_ids;
1729
1730 if (g_poll_queues)
1731 nqueues += g_poll_queues;
1732
1733 nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
6396bb22 1734 GFP_KERNEL);
f2298c04 1735 if (!nullb->queues)
2d263a78 1736 return -ENOMEM;
f2298c04 1737
2d263a78
MB
1738 return 0;
1739}
1740
72ca2876 1741static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
82f402fe
JA
1742{
1743 set->ops = &null_mq_ops;
72ca2876 1744 set->cmd_size = sizeof(struct nullb_cmd);
e32b0855 1745 set->timeout = 5 * HZ;
72ca2876 1746 set->nr_maps = 1;
7012eef5
VF
1747 if (poll_queues) {
1748 set->nr_hw_queues += poll_queues;
72ca2876 1749 set->nr_maps += 2;
7012eef5 1750 }
82f402fe
JA
1751 return blk_mq_alloc_tag_set(set);
1752}
1753
72ca2876
CH
1754static int null_init_global_tag_set(void)
1755{
1756 int error;
1757
1758 if (tag_set.ops)
1759 return 0;
1760
1761 tag_set.nr_hw_queues = g_submit_queues;
1762 tag_set.queue_depth = g_hw_queue_depth;
1763 tag_set.numa_node = g_home_node;
1764 tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
1765 if (g_no_sched)
1766 tag_set.flags |= BLK_MQ_F_NO_SCHED;
1767 if (g_shared_tag_bitmap)
1768 tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1769 if (g_blocking)
1770 tag_set.flags |= BLK_MQ_F_BLOCKING;
1771
1772 error = null_init_tag_set(&tag_set, g_poll_queues);
1773 if (error)
1774 tag_set.ops = NULL;
1775 return error;
1776}
1777
1778static int null_setup_tagset(struct nullb *nullb)
1779{
1780 if (nullb->dev->shared_tags) {
1781 nullb->tag_set = &tag_set;
1782 return null_init_global_tag_set();
1783 }
1784
1785 nullb->tag_set = &nullb->__tag_set;
1786 nullb->tag_set->driver_data = nullb;
1787 nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
1788 nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
1789 nullb->tag_set->numa_node = nullb->dev->home_node;
1790 nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
1791 if (nullb->dev->no_sched)
1792 nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED;
1793 if (nullb->dev->shared_tag_bitmap)
1794 nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1795 if (nullb->dev->blocking)
1796 nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
1797 return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
1798}
1799
5c4bd1f4 1800static int null_validate_conf(struct nullb_device *dev)
cedcafad 1801{
63f8793e
CK
1802 if (dev->queue_mode == NULL_Q_RQ) {
1803 pr_err("legacy IO path is no longer available\n");
1804 return -EINVAL;
1805 }
8b631f9c
CH
1806 if (dev->queue_mode == NULL_Q_BIO) {
1807 pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
1808 dev->queue_mode = NULL_Q_MQ;
1809 }
63f8793e 1810
cedcafad
SL
1811 dev->blocksize = round_down(dev->blocksize, 512);
1812 dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
cedcafad 1813
8b631f9c 1814 if (dev->use_per_node_hctx) {
cedcafad
SL
1815 if (dev->submit_queues != nr_online_nodes)
1816 dev->submit_queues = nr_online_nodes;
1817 } else if (dev->submit_queues > nr_cpu_ids)
1818 dev->submit_queues = nr_cpu_ids;
1819 else if (dev->submit_queues == 0)
1820 dev->submit_queues = 1;
15dfc662
SK
1821 dev->prev_submit_queues = dev->submit_queues;
1822
1823 if (dev->poll_queues > g_poll_queues)
1824 dev->poll_queues = g_poll_queues;
15dfc662 1825 dev->prev_poll_queues = dev->poll_queues;
cedcafad 1826 dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
5bcd0e0c
SL
1827
1828 /* Do memory allocation, so set blocking */
1829 if (dev->memory_backed)
1830 dev->blocking = true;
deb78b41
SL
1831 else /* cache is meaningless */
1832 dev->cache_size = 0;
1833 dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1834 dev->cache_size);
eff2c4f1 1835 dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
5c4bd1f4
DLM
1836
1837 if (dev->zoned &&
1838 (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
1839 pr_err("zone_size must be power-of-two\n");
1840 return -EINVAL;
1841 }
1842
1843 return 0;
cedcafad
SL
1844}
1845
33f782c4 1846#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
24941b90
JA
1847static bool __null_setup_fault(struct fault_attr *attr, char *str)
1848{
1849 if (!str[0])
93b57046
JA
1850 return true;
1851
24941b90 1852 if (!setup_fault_attr(attr, str))
93b57046
JA
1853 return false;
1854
24941b90
JA
1855 attr->verbose = 0;
1856 return true;
1857}
1858#endif
1859
1860static bool null_setup_fault(void)
1861{
1862#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1863 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1864 return false;
1865 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1866 return false;
596444e7
BVA
1867 if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
1868 return false;
33f782c4 1869#endif
93b57046
JA
1870 return true;
1871}
1872
2984c868 1873static int null_add_dev(struct nullb_device *dev)
9ae2d0aa 1874{
e440626b
CH
1875 struct queue_limits lim = {
1876 .logical_block_size = dev->blocksize,
1877 .physical_block_size = dev->blocksize,
1878 .max_hw_sectors = dev->max_sectors,
1879 };
1880
9ae2d0aa 1881 struct nullb *nullb;
dc501dc0 1882 int rv;
f2298c04 1883
5c4bd1f4
DLM
1884 rv = null_validate_conf(dev);
1885 if (rv)
1886 return rv;
cedcafad 1887
2984c868 1888 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
dc501dc0
RE
1889 if (!nullb) {
1890 rv = -ENOMEM;
24d2f903 1891 goto out;
dc501dc0 1892 }
2984c868
SL
1893 nullb->dev = dev;
1894 dev->nullb = nullb;
f2298c04
JA
1895
1896 spin_lock_init(&nullb->lock);
1897
dc501dc0
RE
1898 rv = setup_queues(nullb);
1899 if (rv)
24d2f903 1900 goto out_free_nullb;
f2298c04 1901
72ca2876 1902 rv = null_setup_tagset(nullb);
8b631f9c
CH
1903 if (rv)
1904 goto out_cleanup_queues;
132226b3 1905
e440626b
CH
1906 if (dev->virt_boundary)
1907 lim.virt_boundary_mask = PAGE_SIZE - 1;
1908 null_config_discard(nullb, &lim);
1909 if (dev->zoned) {
1910 rv = null_init_zoned_dev(dev, &lim);
1911 if (rv)
1912 goto out_cleanup_tags;
1913 }
1914
1915 nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
8b631f9c
CH
1916 if (IS_ERR(nullb->disk)) {
1917 rv = PTR_ERR(nullb->disk);
e440626b 1918 goto out_cleanup_zone;
f2298c04 1919 }
8b631f9c 1920 nullb->q = nullb->disk->queue;
f2298c04 1921
eff2c4f1
SL
1922 if (dev->mbps) {
1923 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
1924 nullb_setup_bwtimer(nullb);
1925 }
1926
deb78b41
SL
1927 if (dev->cache_size > 0) {
1928 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
f4f84586 1929 blk_queue_write_cache(nullb->q, true, dev->fua);
deb78b41
SL
1930 }
1931
f2298c04 1932 nullb->q->queuedata = nullb;
8b904b5b 1933 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
f2298c04 1934
f2298c04 1935 mutex_lock(&lock);
95931a24 1936 rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
ee452a8d
DC
1937 if (rv < 0) {
1938 mutex_unlock(&lock);
e440626b 1939 goto out_cleanup_disk;
ee452a8d
DC
1940 }
1941 nullb->index = rv;
1942 dev->index = rv;
f2298c04
JA
1943 mutex_unlock(&lock);
1944
bb4c19e0 1945 if (config_item_name(&dev->group.cg_item)) {
49c3b926
DLM
1946 /* Use configfs dir name as the device name */
1947 snprintf(nullb->disk_name, sizeof(nullb->disk_name),
bb4c19e0 1948 "%s", config_item_name(&dev->group.cg_item));
49c3b926
DLM
1949 } else {
1950 sprintf(nullb->disk_name, "nullb%d", nullb->index);
1951 }
b2b7e001 1952
0a39e550
CH
1953 set_capacity(nullb->disk,
1954 ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT);
1955 nullb->disk->major = null_major;
1956 nullb->disk->first_minor = nullb->index;
1957 nullb->disk->minors = 1;
1958 nullb->disk->fops = &null_ops;
1959 nullb->disk->private_data = nullb;
1960 strscpy_pad(nullb->disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
1961
1962 if (nullb->dev->zoned) {
1963 rv = null_register_zoned_dev(nullb);
1964 if (rv)
1965 goto out_ida_free;
1966 }
1967
1968 rv = add_disk(nullb->disk);
9ae2d0aa 1969 if (rv)
ee452a8d 1970 goto out_ida_free;
a514379b
MB
1971
1972 mutex_lock(&lock);
1973 list_add_tail(&nullb->list, &nullb_list);
1974 mutex_unlock(&lock);
3681c85d 1975
db060f54
DLM
1976 pr_info("disk %s created\n", nullb->disk_name);
1977
f2298c04 1978 return 0;
ee452a8d
DC
1979
1980out_ida_free:
1981 ida_free(&nullb_indexes, nullb->index);
ca4b2a01 1982out_cleanup_zone:
d205bde7 1983 null_free_zoned_dev(dev);
132226b3 1984out_cleanup_disk:
8b9ab626 1985 put_disk(nullb->disk);
24d2f903 1986out_cleanup_tags:
8b631f9c 1987 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1988 blk_mq_free_tag_set(nullb->tag_set);
24d2f903 1989out_cleanup_queues:
8b631f9c 1990 kfree(nullb->queues);
24d2f903
CH
1991out_free_nullb:
1992 kfree(nullb);
2004bfde 1993 dev->nullb = NULL;
24d2f903 1994out:
dc501dc0 1995 return rv;
f2298c04
JA
1996}
1997
49c3b926
DLM
1998static struct nullb *null_find_dev_by_name(const char *name)
1999{
2000 struct nullb *nullb = NULL, *nb;
2001
2002 mutex_lock(&lock);
2003 list_for_each_entry(nb, &nullb_list, list) {
2004 if (strcmp(nb->disk_name, name) == 0) {
2005 nullb = nb;
2006 break;
2007 }
2008 }
2009 mutex_unlock(&lock);
2010
2011 return nullb;
2012}
2013
b3a0a73e
DLM
2014static int null_create_dev(void)
2015{
2016 struct nullb_device *dev;
2017 int ret;
2018
2019 dev = null_alloc_dev();
2020 if (!dev)
2021 return -ENOMEM;
2022
2023 ret = null_add_dev(dev);
2024 if (ret) {
2025 null_free_dev(dev);
2026 return ret;
2027 }
2028
2029 return 0;
2030}
2031
2032static void null_destroy_dev(struct nullb *nullb)
2033{
2034 struct nullb_device *dev = nullb->dev;
2035
2036 null_del_dev(nullb);
8cfb9819 2037 null_free_device_storage(dev, false);
b3a0a73e
DLM
2038 null_free_dev(dev);
2039}
2040
f2298c04
JA
2041static int __init null_init(void)
2042{
af096e22 2043 int ret = 0;
f2298c04 2044 unsigned int i;
af096e22 2045 struct nullb *nullb;
f2298c04 2046
2984c868 2047 if (g_bs > PAGE_SIZE) {
9c7eddf1
AA
2048 pr_warn("invalid block size\n");
2049 pr_warn("defaults block size to %lu\n", PAGE_SIZE);
2984c868 2050 g_bs = PAGE_SIZE;
9967d8ac 2051 }
f2298c04 2052
7ff684a6 2053 if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
9c7eddf1 2054 pr_err("invalid home_node value\n");
7ff684a6
JP
2055 g_home_node = NUMA_NO_NODE;
2056 }
2057
bb4c19e0
AM
2058 if (!null_setup_fault())
2059 return -EINVAL;
2060
e50b1e32 2061 if (g_queue_mode == NULL_Q_RQ) {
525323d2 2062 pr_err("legacy IO path is no longer available\n");
e50b1e32
JA
2063 return -EINVAL;
2064 }
525323d2 2065
8b631f9c 2066 if (g_use_per_node_hctx) {
2984c868 2067 if (g_submit_queues != nr_online_nodes) {
9c7eddf1 2068 pr_warn("submit_queues param is set to %u.\n",
525323d2 2069 nr_online_nodes);
2984c868 2070 g_submit_queues = nr_online_nodes;
fc1bc354 2071 }
525323d2 2072 } else if (g_submit_queues > nr_cpu_ids) {
2984c868 2073 g_submit_queues = nr_cpu_ids;
525323d2 2074 } else if (g_submit_queues <= 0) {
2984c868 2075 g_submit_queues = 1;
525323d2 2076 }
f2298c04 2077
3bf2bd20
SL
2078 config_group_init(&nullb_subsys.su_group);
2079 mutex_init(&nullb_subsys.su_mutex);
2080
2081 ret = configfs_register_subsystem(&nullb_subsys);
2082 if (ret)
14509b74 2083 return ret;
3bf2bd20 2084
f2298c04
JA
2085 mutex_init(&lock);
2086
f2298c04 2087 null_major = register_blkdev(0, "nullb");
db2d153d
MG
2088 if (null_major < 0) {
2089 ret = null_major;
3bf2bd20 2090 goto err_conf;
db2d153d 2091 }
f2298c04
JA
2092
2093 for (i = 0; i < nr_devices; i++) {
b3a0a73e
DLM
2094 ret = null_create_dev();
2095 if (ret)
af096e22 2096 goto err_dev;
f2298c04
JA
2097 }
2098
9c7eddf1 2099 pr_info("module loaded\n");
f2298c04 2100 return 0;
af096e22
MH
2101
2102err_dev:
2103 while (!list_empty(&nullb_list)) {
2104 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2105 null_destroy_dev(nullb);
af096e22 2106 }
af096e22 2107 unregister_blkdev(null_major, "nullb");
3bf2bd20
SL
2108err_conf:
2109 configfs_unregister_subsystem(&nullb_subsys);
af096e22 2110 return ret;
f2298c04
JA
2111}
2112
2113static void __exit null_exit(void)
2114{
2115 struct nullb *nullb;
2116
3bf2bd20
SL
2117 configfs_unregister_subsystem(&nullb_subsys);
2118
f2298c04
JA
2119 unregister_blkdev(null_major, "nullb");
2120
2121 mutex_lock(&lock);
2122 while (!list_empty(&nullb_list)) {
2123 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2124 null_destroy_dev(nullb);
f2298c04
JA
2125 }
2126 mutex_unlock(&lock);
6bb9535b 2127
14509b74 2128 if (tag_set.ops)
82f402fe 2129 blk_mq_free_tag_set(&tag_set);
07d1b998
ZY
2130
2131 mutex_destroy(&lock);
f2298c04
JA
2132}
2133
2134module_init(null_init);
2135module_exit(null_exit);
2136
231b3db1 2137MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
f2298c04 2138MODULE_LICENSE("GPL");