null_blk: refactor tag_set setup
[linux-block.git] / drivers / block / null_blk / main.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
3bf2bd20
SL
2/*
3 * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
4 * Shaohua Li <shli@fb.com>
5 */
f2298c04 6#include <linux/module.h>
fc1bc354 7
f2298c04
JA
8#include <linux/moduleparam.h>
9#include <linux/sched.h>
10#include <linux/fs.h>
f2298c04 11#include <linux/init.h>
6dad38d3 12#include "null_blk.h"
f2298c04 13
db060f54
DLM
14#undef pr_fmt
15#define pr_fmt(fmt) "null_blk: " fmt
16
5bcd0e0c
SL
17#define FREE_BATCH 16
18
eff2c4f1
SL
19#define TICKS_PER_SEC 50ULL
20#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
21
33f782c4 22#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
93b57046 23static DECLARE_FAULT_ATTR(null_timeout_attr);
24941b90 24static DECLARE_FAULT_ATTR(null_requeue_attr);
596444e7 25static DECLARE_FAULT_ATTR(null_init_hctx_attr);
33f782c4 26#endif
93b57046 27
eff2c4f1
SL
28static inline u64 mb_per_tick(int mbps)
29{
30 return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
31}
f2298c04 32
3bf2bd20
SL
33/*
34 * Status flags for nullb_device.
35 *
36 * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
37 * UP: Device is currently on and visible in userspace.
eff2c4f1 38 * THROTTLED: Device is being throttled.
deb78b41 39 * CACHE: Device is using a write-back cache.
3bf2bd20
SL
40 */
41enum nullb_device_flags {
42 NULLB_DEV_FL_CONFIGURED = 0,
43 NULLB_DEV_FL_UP = 1,
eff2c4f1 44 NULLB_DEV_FL_THROTTLED = 2,
deb78b41 45 NULLB_DEV_FL_CACHE = 3,
3bf2bd20
SL
46};
47
66231ad3 48#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
5bcd0e0c
SL
49/*
50 * nullb_page is a page in memory for nullb devices.
51 *
52 * @page: The page holding the data.
53 * @bitmap: The bitmap represents which sector in the page has data.
54 * Each bit represents one block size. For example, sector 8
55 * will use the 7th bit
deb78b41
SL
56 * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
57 * page is being flushing to storage. FREE means the cache page is freed and
58 * should be skipped from flushing to storage. Please see
59 * null_make_cache_space
5bcd0e0c
SL
60 */
61struct nullb_page {
62 struct page *page;
66231ad3 63 DECLARE_BITMAP(bitmap, MAP_SZ);
5bcd0e0c 64};
66231ad3
ML
65#define NULLB_PAGE_LOCK (MAP_SZ - 1)
66#define NULLB_PAGE_FREE (MAP_SZ - 2)
5bcd0e0c 67
f2298c04
JA
68static LIST_HEAD(nullb_list);
69static struct mutex lock;
70static int null_major;
94bc02e3 71static DEFINE_IDA(nullb_indexes);
82f402fe 72static struct blk_mq_tag_set tag_set;
f2298c04 73
f2298c04
JA
74enum {
75 NULL_IRQ_NONE = 0,
76 NULL_IRQ_SOFTIRQ = 1,
77 NULL_IRQ_TIMER = 2,
ce2c350b 78};
f2298c04 79
cee1b215
MG
80static bool g_virt_boundary = false;
81module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
82MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
83
b3cffc38 84static int g_no_sched;
5657a819 85module_param_named(no_sched, g_no_sched, int, 0444);
b3cffc38 86MODULE_PARM_DESC(no_sched, "No io scheduler");
87
2984c868 88static int g_submit_queues = 1;
5657a819 89module_param_named(submit_queues, g_submit_queues, int, 0444);
f2298c04
JA
90MODULE_PARM_DESC(submit_queues, "Number of submission queues");
91
0a593fbb
JA
92static int g_poll_queues = 1;
93module_param_named(poll_queues, g_poll_queues, int, 0444);
94MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
95
2984c868 96static int g_home_node = NUMA_NO_NODE;
5657a819 97module_param_named(home_node, g_home_node, int, 0444);
f2298c04
JA
98MODULE_PARM_DESC(home_node, "Home node for the device");
99
33f782c4 100#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
290df92a
DZ
101/*
102 * For more details about fault injection, please refer to
103 * Documentation/fault-injection/fault-injection.rst.
104 */
93b57046 105static char g_timeout_str[80];
5657a819 106module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
290df92a 107MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
24941b90
JA
108
109static char g_requeue_str[80];
5657a819 110module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
290df92a 111MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
596444e7
BVA
112
113static char g_init_hctx_str[80];
114module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
290df92a 115MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
33f782c4 116#endif
93b57046 117
8b631f9c
CH
118/*
119 * Historic queue modes.
120 *
121 * These days nothing but NULL_Q_MQ is actually supported, but we keep it the
122 * enum for error reporting.
123 */
124enum {
125 NULL_Q_BIO = 0,
126 NULL_Q_RQ = 1,
127 NULL_Q_MQ = 2,
128};
129
2984c868 130static int g_queue_mode = NULL_Q_MQ;
709c8667
MB
131
132static int null_param_store_val(const char *str, int *val, int min, int max)
133{
134 int ret, new_val;
135
136 ret = kstrtoint(str, 10, &new_val);
137 if (ret)
138 return -EINVAL;
139
140 if (new_val < min || new_val > max)
141 return -EINVAL;
142
143 *val = new_val;
144 return 0;
145}
146
147static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
148{
2984c868 149 return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
709c8667
MB
150}
151
9c27847d 152static const struct kernel_param_ops null_queue_mode_param_ops = {
709c8667
MB
153 .set = null_set_queue_mode,
154 .get = param_get_int,
155};
156
5657a819 157device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
54ae81cd 158MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
f2298c04 159
2984c868 160static int g_gb = 250;
5657a819 161module_param_named(gb, g_gb, int, 0444);
f2298c04
JA
162MODULE_PARM_DESC(gb, "Size in GB");
163
2984c868 164static int g_bs = 512;
5657a819 165module_param_named(bs, g_bs, int, 0444);
f2298c04
JA
166MODULE_PARM_DESC(bs, "Block size (in bytes)");
167
ea17fd35
DLM
168static int g_max_sectors;
169module_param_named(max_sectors, g_max_sectors, int, 0444);
170MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
171
f7c4ce89 172static unsigned int nr_devices = 1;
701dfc42 173module_param(nr_devices, uint, 0444);
f2298c04
JA
174MODULE_PARM_DESC(nr_devices, "Number of devices to register");
175
2984c868 176static bool g_blocking;
5657a819 177module_param_named(blocking, g_blocking, bool, 0444);
db5bcf87
JA
178MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
179
14509b74
SK
180static bool g_shared_tags;
181module_param_named(shared_tags, g_shared_tags, bool, 0444);
82f402fe
JA
182MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
183
0905053b
JG
184static bool g_shared_tag_bitmap;
185module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
186MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
187
2984c868 188static int g_irqmode = NULL_IRQ_SOFTIRQ;
709c8667
MB
189
190static int null_set_irqmode(const char *str, const struct kernel_param *kp)
191{
2984c868 192 return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
709c8667
MB
193 NULL_IRQ_TIMER);
194}
195
9c27847d 196static const struct kernel_param_ops null_irqmode_param_ops = {
709c8667
MB
197 .set = null_set_irqmode,
198 .get = param_get_int,
199};
200
5657a819 201device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
f2298c04
JA
202MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
203
2984c868 204static unsigned long g_completion_nsec = 10000;
5657a819 205module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
f2298c04
JA
206MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
207
2984c868 208static int g_hw_queue_depth = 64;
5657a819 209module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
f2298c04
JA
210MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
211
2984c868 212static bool g_use_per_node_hctx;
5657a819 213module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
20005244 214MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
f2298c04 215
058efe00
VF
216static bool g_memory_backed;
217module_param_named(memory_backed, g_memory_backed, bool, 0444);
218MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
219
220static bool g_discard;
221module_param_named(discard, g_discard, bool, 0444);
222MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
223
224static unsigned long g_cache_size;
225module_param_named(cache_size, g_cache_size, ulong, 0444);
226MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
227
228static unsigned int g_mbps;
229module_param_named(mbps, g_mbps, uint, 0444);
230MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
231
ca4b2a01
MB
232static bool g_zoned;
233module_param_named(zoned, g_zoned, bool, S_IRUGO);
234MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
235
236static unsigned long g_zone_size = 256;
237module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
238MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
239
089565fb
AR
240static unsigned long g_zone_capacity;
241module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
242MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
243
ea2c18e1
MS
244static unsigned int g_zone_nr_conv;
245module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
246MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
247
dc4d137e
NC
248static unsigned int g_zone_max_open;
249module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
250MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
251
252static unsigned int g_zone_max_active;
253module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
254MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
255
3bf2bd20
SL
256static struct nullb_device *null_alloc_dev(void);
257static void null_free_dev(struct nullb_device *dev);
cedcafad
SL
258static void null_del_dev(struct nullb *nullb);
259static int null_add_dev(struct nullb_device *dev);
49c3b926 260static struct nullb *null_find_dev_by_name(const char *name);
deb78b41 261static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
3bf2bd20
SL
262
263static inline struct nullb_device *to_nullb_device(struct config_item *item)
264{
bb4c19e0 265 return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
3bf2bd20
SL
266}
267
268static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
269{
270 return snprintf(page, PAGE_SIZE, "%u\n", val);
271}
272
273static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
274 char *page)
275{
276 return snprintf(page, PAGE_SIZE, "%lu\n", val);
277}
278
279static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
280{
281 return snprintf(page, PAGE_SIZE, "%u\n", val);
282}
283
284static ssize_t nullb_device_uint_attr_store(unsigned int *val,
285 const char *page, size_t count)
286{
287 unsigned int tmp;
288 int result;
289
290 result = kstrtouint(page, 0, &tmp);
45919fbf 291 if (result < 0)
3bf2bd20
SL
292 return result;
293
294 *val = tmp;
295 return count;
296}
297
298static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
299 const char *page, size_t count)
300{
301 int result;
302 unsigned long tmp;
303
304 result = kstrtoul(page, 0, &tmp);
45919fbf 305 if (result < 0)
3bf2bd20
SL
306 return result;
307
308 *val = tmp;
309 return count;
310}
311
312static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
313 size_t count)
314{
315 bool tmp;
316 int result;
317
318 result = kstrtobool(page, &tmp);
45919fbf 319 if (result < 0)
3bf2bd20
SL
320 return result;
321
322 *val = tmp;
323 return count;
324}
325
326/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
ca0a95a6
AM
327#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
328static ssize_t \
329nullb_device_##NAME##_show(struct config_item *item, char *page) \
330{ \
331 return nullb_device_##TYPE##_attr_show( \
332 to_nullb_device(item)->NAME, page); \
333} \
334static ssize_t \
335nullb_device_##NAME##_store(struct config_item *item, const char *page, \
336 size_t count) \
337{ \
338 int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
339 struct nullb_device *dev = to_nullb_device(item); \
b9853b4d 340 TYPE new_value = 0; \
ca0a95a6
AM
341 int ret; \
342 \
343 ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
344 if (ret < 0) \
345 return ret; \
346 if (apply_fn) \
347 ret = apply_fn(dev, new_value); \
348 else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
349 ret = -EBUSY; \
350 if (ret < 0) \
351 return ret; \
352 dev->NAME = new_value; \
353 return count; \
354} \
3bf2bd20
SL
355CONFIGFS_ATTR(nullb_device_, NAME);
356
15dfc662
SK
357static int nullb_update_nr_hw_queues(struct nullb_device *dev,
358 unsigned int submit_queues,
359 unsigned int poll_queues)
360
45919fbf 361{
45919fbf 362 struct blk_mq_tag_set *set;
15dfc662 363 int ret, nr_hw_queues;
45919fbf 364
15dfc662 365 if (!dev->nullb)
45919fbf
BVA
366 return 0;
367
15dfc662 368 /*
2bfdbe8b 369 * Make sure at least one submit queue exists.
15dfc662 370 */
2bfdbe8b 371 if (!submit_queues)
15dfc662
SK
372 return -EINVAL;
373
78b10be2
BVA
374 /*
375 * Make sure that null_init_hctx() does not access nullb->queues[] past
376 * the end of that array.
377 */
15dfc662 378 if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues)
78b10be2 379 return -EINVAL;
15dfc662
SK
380
381 /*
382 * Keep previous and new queue numbers in nullb_device for reference in
383 * the call back function null_map_queues().
384 */
385 dev->prev_submit_queues = dev->submit_queues;
386 dev->prev_poll_queues = dev->poll_queues;
387 dev->submit_queues = submit_queues;
388 dev->poll_queues = poll_queues;
389
390 set = dev->nullb->tag_set;
391 nr_hw_queues = submit_queues + poll_queues;
392 blk_mq_update_nr_hw_queues(set, nr_hw_queues);
393 ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM;
394
395 if (ret) {
396 /* on error, revert the queue numbers */
397 dev->submit_queues = dev->prev_submit_queues;
398 dev->poll_queues = dev->prev_poll_queues;
399 }
400
401 return ret;
402}
403
404static int nullb_apply_submit_queues(struct nullb_device *dev,
405 unsigned int submit_queues)
406{
407 return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
408}
409
410static int nullb_apply_poll_queues(struct nullb_device *dev,
411 unsigned int poll_queues)
412{
413 return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
45919fbf
BVA
414}
415
416NULLB_DEVICE_ATTR(size, ulong, NULL);
417NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
418NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
15dfc662 419NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues);
45919fbf
BVA
420NULLB_DEVICE_ATTR(home_node, uint, NULL);
421NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
422NULLB_DEVICE_ATTR(blocksize, uint, NULL);
ea17fd35 423NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
45919fbf
BVA
424NULLB_DEVICE_ATTR(irqmode, uint, NULL);
425NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
426NULLB_DEVICE_ATTR(index, uint, NULL);
427NULLB_DEVICE_ATTR(blocking, bool, NULL);
428NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
429NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
430NULLB_DEVICE_ATTR(discard, bool, NULL);
431NULLB_DEVICE_ATTR(mbps, uint, NULL);
432NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
433NULLB_DEVICE_ATTR(zoned, bool, NULL);
434NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
089565fb 435NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
45919fbf 436NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
dc4d137e
NC
437NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
438NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
cee1b215 439NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
7012eef5 440NULLB_DEVICE_ATTR(no_sched, bool, NULL);
14509b74 441NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
7012eef5 442NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
3bf2bd20 443
cedcafad
SL
444static ssize_t nullb_device_power_show(struct config_item *item, char *page)
445{
446 return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
447}
448
449static ssize_t nullb_device_power_store(struct config_item *item,
450 const char *page, size_t count)
451{
452 struct nullb_device *dev = to_nullb_device(item);
453 bool newp = false;
454 ssize_t ret;
455
456 ret = nullb_device_bool_attr_store(&newp, page, count);
457 if (ret < 0)
458 return ret;
459
460 if (!dev->power && newp) {
461 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
462 return count;
a75110c3
CK
463 ret = null_add_dev(dev);
464 if (ret) {
cedcafad 465 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
a75110c3 466 return ret;
cedcafad
SL
467 }
468
469 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
470 dev->power = newp;
b3c30512 471 } else if (dev->power && !newp) {
7602843f
BL
472 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
473 mutex_lock(&lock);
474 dev->power = newp;
475 null_del_dev(dev->nullb);
476 mutex_unlock(&lock);
477 }
00a8cdb8 478 clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
cedcafad
SL
479 }
480
481 return count;
482}
483
484CONFIGFS_ATTR(nullb_device_, power);
485
2f54a613
SL
486static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
487{
488 struct nullb_device *t_dev = to_nullb_device(item);
489
490 return badblocks_show(&t_dev->badblocks, page, 0);
491}
492
493static ssize_t nullb_device_badblocks_store(struct config_item *item,
494 const char *page, size_t count)
495{
496 struct nullb_device *t_dev = to_nullb_device(item);
497 char *orig, *buf, *tmp;
498 u64 start, end;
499 int ret;
500
501 orig = kstrndup(page, count, GFP_KERNEL);
502 if (!orig)
503 return -ENOMEM;
504
505 buf = strstrip(orig);
506
507 ret = -EINVAL;
508 if (buf[0] != '+' && buf[0] != '-')
509 goto out;
510 tmp = strchr(&buf[1], '-');
511 if (!tmp)
512 goto out;
513 *tmp = '\0';
514 ret = kstrtoull(buf + 1, 0, &start);
515 if (ret)
516 goto out;
517 ret = kstrtoull(tmp + 1, 0, &end);
518 if (ret)
519 goto out;
520 ret = -EINVAL;
521 if (start > end)
522 goto out;
523 /* enable badblocks */
524 cmpxchg(&t_dev->badblocks.shift, -1, 0);
525 if (buf[0] == '+')
526 ret = badblocks_set(&t_dev->badblocks, start,
527 end - start + 1, 1);
528 else
529 ret = badblocks_clear(&t_dev->badblocks, start,
530 end - start + 1);
531 if (ret == 0)
532 ret = count;
533out:
534 kfree(orig);
535 return ret;
536}
537CONFIGFS_ATTR(nullb_device_, badblocks);
538
d3a57388
SK
539static ssize_t nullb_device_zone_readonly_store(struct config_item *item,
540 const char *page, size_t count)
541{
542 struct nullb_device *dev = to_nullb_device(item);
543
544 return zone_cond_store(dev, page, count, BLK_ZONE_COND_READONLY);
545}
546CONFIGFS_ATTR_WO(nullb_device_, zone_readonly);
547
548static ssize_t nullb_device_zone_offline_store(struct config_item *item,
549 const char *page, size_t count)
550{
551 struct nullb_device *dev = to_nullb_device(item);
552
553 return zone_cond_store(dev, page, count, BLK_ZONE_COND_OFFLINE);
554}
555CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
556
3bf2bd20
SL
557static struct configfs_attribute *nullb_device_attrs[] = {
558 &nullb_device_attr_size,
559 &nullb_device_attr_completion_nsec,
560 &nullb_device_attr_submit_queues,
0a593fbb 561 &nullb_device_attr_poll_queues,
3bf2bd20
SL
562 &nullb_device_attr_home_node,
563 &nullb_device_attr_queue_mode,
564 &nullb_device_attr_blocksize,
ea17fd35 565 &nullb_device_attr_max_sectors,
3bf2bd20
SL
566 &nullb_device_attr_irqmode,
567 &nullb_device_attr_hw_queue_depth,
cedcafad 568 &nullb_device_attr_index,
3bf2bd20
SL
569 &nullb_device_attr_blocking,
570 &nullb_device_attr_use_per_node_hctx,
cedcafad 571 &nullb_device_attr_power,
5bcd0e0c 572 &nullb_device_attr_memory_backed,
306eb6b4 573 &nullb_device_attr_discard,
eff2c4f1 574 &nullb_device_attr_mbps,
deb78b41 575 &nullb_device_attr_cache_size,
2f54a613 576 &nullb_device_attr_badblocks,
ca4b2a01
MB
577 &nullb_device_attr_zoned,
578 &nullb_device_attr_zone_size,
089565fb 579 &nullb_device_attr_zone_capacity,
ea2c18e1 580 &nullb_device_attr_zone_nr_conv,
dc4d137e
NC
581 &nullb_device_attr_zone_max_open,
582 &nullb_device_attr_zone_max_active,
d3a57388
SK
583 &nullb_device_attr_zone_readonly,
584 &nullb_device_attr_zone_offline,
cee1b215 585 &nullb_device_attr_virt_boundary,
7012eef5 586 &nullb_device_attr_no_sched,
14509b74 587 &nullb_device_attr_shared_tags,
7012eef5 588 &nullb_device_attr_shared_tag_bitmap,
3bf2bd20
SL
589 NULL,
590};
591
592static void nullb_device_release(struct config_item *item)
593{
5bcd0e0c
SL
594 struct nullb_device *dev = to_nullb_device(item);
595
deb78b41 596 null_free_device_storage(dev, false);
5bcd0e0c 597 null_free_dev(dev);
3bf2bd20
SL
598}
599
600static struct configfs_item_operations nullb_device_ops = {
601 .release = nullb_device_release,
602};
603
e1919dff 604static const struct config_item_type nullb_device_type = {
3bf2bd20
SL
605 .ct_item_ops = &nullb_device_ops,
606 .ct_attrs = nullb_device_attrs,
607 .ct_owner = THIS_MODULE,
608};
609
bb4c19e0
AM
610#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
611
612static void nullb_add_fault_config(struct nullb_device *dev)
613{
614 fault_config_init(&dev->timeout_config, "timeout_inject");
615 fault_config_init(&dev->requeue_config, "requeue_inject");
616 fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
617
618 configfs_add_default_group(&dev->timeout_config.group, &dev->group);
619 configfs_add_default_group(&dev->requeue_config.group, &dev->group);
620 configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
621}
622
623#else
624
625static void nullb_add_fault_config(struct nullb_device *dev)
626{
627}
628
629#endif
630
3bf2bd20 631static struct
bb4c19e0 632config_group *nullb_group_make_group(struct config_group *group, const char *name)
3bf2bd20
SL
633{
634 struct nullb_device *dev;
635
49c3b926
DLM
636 if (null_find_dev_by_name(name))
637 return ERR_PTR(-EEXIST);
638
3bf2bd20
SL
639 dev = null_alloc_dev();
640 if (!dev)
641 return ERR_PTR(-ENOMEM);
642
bb4c19e0
AM
643 config_group_init_type_name(&dev->group, name, &nullb_device_type);
644 nullb_add_fault_config(dev);
3bf2bd20 645
bb4c19e0 646 return &dev->group;
3bf2bd20
SL
647}
648
649static void
650nullb_group_drop_item(struct config_group *group, struct config_item *item)
651{
cedcafad
SL
652 struct nullb_device *dev = to_nullb_device(item);
653
654 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
655 mutex_lock(&lock);
656 dev->power = false;
657 null_del_dev(dev->nullb);
658 mutex_unlock(&lock);
659 }
660
3bf2bd20
SL
661 config_item_put(item);
662}
663
664static ssize_t memb_group_features_show(struct config_item *item, char *page)
665{
089565fb 666 return snprintf(page, PAGE_SIZE,
7012eef5
VF
667 "badblocks,blocking,blocksize,cache_size,"
668 "completion_nsec,discard,home_node,hw_queue_depth,"
669 "irqmode,max_sectors,mbps,memory_backed,no_sched,"
14509b74
SK
670 "poll_queues,power,queue_mode,shared_tag_bitmap,"
671 "shared_tags,size,submit_queues,use_per_node_hctx,"
672 "virt_boundary,zoned,zone_capacity,zone_max_active,"
673 "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
674 "zone_size\n");
3bf2bd20
SL
675}
676
677CONFIGFS_ATTR_RO(memb_group_, features);
678
679static struct configfs_attribute *nullb_group_attrs[] = {
680 &memb_group_attr_features,
681 NULL,
682};
683
684static struct configfs_group_operations nullb_group_ops = {
bb4c19e0 685 .make_group = nullb_group_make_group,
3bf2bd20
SL
686 .drop_item = nullb_group_drop_item,
687};
688
e1919dff 689static const struct config_item_type nullb_group_type = {
3bf2bd20
SL
690 .ct_group_ops = &nullb_group_ops,
691 .ct_attrs = nullb_group_attrs,
692 .ct_owner = THIS_MODULE,
693};
694
695static struct configfs_subsystem nullb_subsys = {
696 .su_group = {
697 .cg_item = {
698 .ci_namebuf = "nullb",
699 .ci_type = &nullb_group_type,
700 },
701 },
702};
703
deb78b41
SL
704static inline int null_cache_active(struct nullb *nullb)
705{
706 return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
707}
708
2984c868
SL
709static struct nullb_device *null_alloc_dev(void)
710{
711 struct nullb_device *dev;
712
713 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
714 if (!dev)
715 return NULL;
bb4c19e0
AM
716
717#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
718 dev->timeout_config.attr = null_timeout_attr;
719 dev->requeue_config.attr = null_requeue_attr;
720 dev->init_hctx_fault_config.attr = null_init_hctx_attr;
721#endif
722
5bcd0e0c 723 INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
deb78b41 724 INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
2f54a613
SL
725 if (badblocks_init(&dev->badblocks, 0)) {
726 kfree(dev);
727 return NULL;
728 }
729
2984c868
SL
730 dev->size = g_gb * 1024;
731 dev->completion_nsec = g_completion_nsec;
732 dev->submit_queues = g_submit_queues;
15dfc662 733 dev->prev_submit_queues = g_submit_queues;
0a593fbb 734 dev->poll_queues = g_poll_queues;
15dfc662 735 dev->prev_poll_queues = g_poll_queues;
2984c868
SL
736 dev->home_node = g_home_node;
737 dev->queue_mode = g_queue_mode;
738 dev->blocksize = g_bs;
ea17fd35 739 dev->max_sectors = g_max_sectors;
2984c868
SL
740 dev->irqmode = g_irqmode;
741 dev->hw_queue_depth = g_hw_queue_depth;
2984c868 742 dev->blocking = g_blocking;
058efe00
VF
743 dev->memory_backed = g_memory_backed;
744 dev->discard = g_discard;
745 dev->cache_size = g_cache_size;
746 dev->mbps = g_mbps;
2984c868 747 dev->use_per_node_hctx = g_use_per_node_hctx;
ca4b2a01
MB
748 dev->zoned = g_zoned;
749 dev->zone_size = g_zone_size;
089565fb 750 dev->zone_capacity = g_zone_capacity;
ea2c18e1 751 dev->zone_nr_conv = g_zone_nr_conv;
dc4d137e
NC
752 dev->zone_max_open = g_zone_max_open;
753 dev->zone_max_active = g_zone_max_active;
cee1b215 754 dev->virt_boundary = g_virt_boundary;
7012eef5 755 dev->no_sched = g_no_sched;
14509b74 756 dev->shared_tags = g_shared_tags;
7012eef5 757 dev->shared_tag_bitmap = g_shared_tag_bitmap;
2984c868
SL
758 return dev;
759}
760
761static void null_free_dev(struct nullb_device *dev)
762{
1addb798
DD
763 if (!dev)
764 return;
765
d205bde7 766 null_free_zoned_dev(dev);
1addb798 767 badblocks_exit(&dev->badblocks);
2984c868
SL
768 kfree(dev);
769}
770
cf8ecc5a
AA
771static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
772{
8b631f9c 773 struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
f2298c04 774
8b631f9c 775 blk_mq_end_request(blk_mq_rq_from_pdu(cmd), cmd->error);
f2298c04
JA
776 return HRTIMER_NORESTART;
777}
778
779static void null_cmd_end_timer(struct nullb_cmd *cmd)
780{
2984c868 781 ktime_t kt = cmd->nq->dev->completion_nsec;
f2298c04 782
3c395a96 783 hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
f2298c04
JA
784}
785
49f66136 786static void null_complete_rq(struct request *rq)
f2298c04 787{
8b631f9c
CH
788 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
789
790 blk_mq_end_request(rq, cmd->error);
f2298c04
JA
791}
792
c90b6b50 793static struct nullb_page *null_alloc_page(void)
5bcd0e0c
SL
794{
795 struct nullb_page *t_page;
796
c90b6b50 797 t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
5bcd0e0c 798 if (!t_page)
df00b1d2 799 return NULL;
5bcd0e0c 800
c90b6b50 801 t_page->page = alloc_pages(GFP_NOIO, 0);
df00b1d2
CK
802 if (!t_page->page) {
803 kfree(t_page);
804 return NULL;
805 }
5bcd0e0c 806
66231ad3 807 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
5bcd0e0c 808 return t_page;
5bcd0e0c
SL
809}
810
811static void null_free_page(struct nullb_page *t_page)
812{
66231ad3
ML
813 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
814 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
deb78b41 815 return;
5bcd0e0c
SL
816 __free_page(t_page->page);
817 kfree(t_page);
818}
819
66231ad3
ML
820static bool null_page_empty(struct nullb_page *page)
821{
822 int size = MAP_SZ - 2;
823
824 return find_first_bit(page->bitmap, size) == size;
825}
826
deb78b41
SL
827static void null_free_sector(struct nullb *nullb, sector_t sector,
828 bool is_cache)
5bcd0e0c
SL
829{
830 unsigned int sector_bit;
831 u64 idx;
832 struct nullb_page *t_page, *ret;
833 struct radix_tree_root *root;
834
deb78b41 835 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
836 idx = sector >> PAGE_SECTORS_SHIFT;
837 sector_bit = (sector & SECTOR_MASK);
838
839 t_page = radix_tree_lookup(root, idx);
840 if (t_page) {
66231ad3 841 __clear_bit(sector_bit, t_page->bitmap);
5bcd0e0c 842
66231ad3 843 if (null_page_empty(t_page)) {
5bcd0e0c
SL
844 ret = radix_tree_delete_item(root, idx, t_page);
845 WARN_ON(ret != t_page);
846 null_free_page(ret);
deb78b41
SL
847 if (is_cache)
848 nullb->dev->curr_cache -= PAGE_SIZE;
5bcd0e0c
SL
849 }
850 }
851}
852
853static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
deb78b41 854 struct nullb_page *t_page, bool is_cache)
5bcd0e0c
SL
855{
856 struct radix_tree_root *root;
857
deb78b41 858 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
859
860 if (radix_tree_insert(root, idx, t_page)) {
861 null_free_page(t_page);
862 t_page = radix_tree_lookup(root, idx);
863 WARN_ON(!t_page || t_page->page->index != idx);
deb78b41
SL
864 } else if (is_cache)
865 nullb->dev->curr_cache += PAGE_SIZE;
5bcd0e0c
SL
866
867 return t_page;
868}
869
deb78b41 870static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
5bcd0e0c
SL
871{
872 unsigned long pos = 0;
873 int nr_pages;
874 struct nullb_page *ret, *t_pages[FREE_BATCH];
875 struct radix_tree_root *root;
876
deb78b41 877 root = is_cache ? &dev->cache : &dev->data;
5bcd0e0c
SL
878
879 do {
880 int i;
881
882 nr_pages = radix_tree_gang_lookup(root,
883 (void **)t_pages, pos, FREE_BATCH);
884
885 for (i = 0; i < nr_pages; i++) {
886 pos = t_pages[i]->page->index;
887 ret = radix_tree_delete_item(root, pos, t_pages[i]);
888 WARN_ON(ret != t_pages[i]);
889 null_free_page(ret);
890 }
891
892 pos++;
893 } while (nr_pages == FREE_BATCH);
deb78b41
SL
894
895 if (is_cache)
896 dev->curr_cache = 0;
5bcd0e0c
SL
897}
898
deb78b41
SL
899static struct nullb_page *__null_lookup_page(struct nullb *nullb,
900 sector_t sector, bool for_write, bool is_cache)
5bcd0e0c
SL
901{
902 unsigned int sector_bit;
903 u64 idx;
904 struct nullb_page *t_page;
deb78b41 905 struct radix_tree_root *root;
5bcd0e0c
SL
906
907 idx = sector >> PAGE_SECTORS_SHIFT;
908 sector_bit = (sector & SECTOR_MASK);
909
deb78b41
SL
910 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
911 t_page = radix_tree_lookup(root, idx);
5bcd0e0c
SL
912 WARN_ON(t_page && t_page->page->index != idx);
913
66231ad3 914 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
5bcd0e0c
SL
915 return t_page;
916
917 return NULL;
918}
919
deb78b41
SL
920static struct nullb_page *null_lookup_page(struct nullb *nullb,
921 sector_t sector, bool for_write, bool ignore_cache)
922{
923 struct nullb_page *page = NULL;
924
925 if (!ignore_cache)
926 page = __null_lookup_page(nullb, sector, for_write, true);
927 if (page)
928 return page;
929 return __null_lookup_page(nullb, sector, for_write, false);
930}
931
5bcd0e0c 932static struct nullb_page *null_insert_page(struct nullb *nullb,
61884de0
JA
933 sector_t sector, bool ignore_cache)
934 __releases(&nullb->lock)
935 __acquires(&nullb->lock)
5bcd0e0c
SL
936{
937 u64 idx;
938 struct nullb_page *t_page;
939
deb78b41 940 t_page = null_lookup_page(nullb, sector, true, ignore_cache);
5bcd0e0c
SL
941 if (t_page)
942 return t_page;
943
944 spin_unlock_irq(&nullb->lock);
945
c90b6b50 946 t_page = null_alloc_page();
5bcd0e0c
SL
947 if (!t_page)
948 goto out_lock;
949
950 if (radix_tree_preload(GFP_NOIO))
951 goto out_freepage;
952
953 spin_lock_irq(&nullb->lock);
954 idx = sector >> PAGE_SECTORS_SHIFT;
955 t_page->page->index = idx;
deb78b41 956 t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
5bcd0e0c
SL
957 radix_tree_preload_end();
958
959 return t_page;
960out_freepage:
961 null_free_page(t_page);
962out_lock:
963 spin_lock_irq(&nullb->lock);
deb78b41
SL
964 return null_lookup_page(nullb, sector, true, ignore_cache);
965}
966
967static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
968{
969 int i;
970 unsigned int offset;
971 u64 idx;
972 struct nullb_page *t_page, *ret;
973 void *dst, *src;
974
975 idx = c_page->page->index;
976
977 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
978
66231ad3
ML
979 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
980 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
deb78b41 981 null_free_page(c_page);
66231ad3 982 if (t_page && null_page_empty(t_page)) {
deb78b41
SL
983 ret = radix_tree_delete_item(&nullb->dev->data,
984 idx, t_page);
985 null_free_page(t_page);
986 }
987 return 0;
988 }
989
990 if (!t_page)
991 return -ENOMEM;
992
acc3c879
CK
993 src = kmap_local_page(c_page->page);
994 dst = kmap_local_page(t_page->page);
deb78b41
SL
995
996 for (i = 0; i < PAGE_SECTORS;
997 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
66231ad3 998 if (test_bit(i, c_page->bitmap)) {
deb78b41
SL
999 offset = (i << SECTOR_SHIFT);
1000 memcpy(dst + offset, src + offset,
1001 nullb->dev->blocksize);
66231ad3 1002 __set_bit(i, t_page->bitmap);
deb78b41
SL
1003 }
1004 }
1005
acc3c879
CK
1006 kunmap_local(dst);
1007 kunmap_local(src);
deb78b41
SL
1008
1009 ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
1010 null_free_page(ret);
1011 nullb->dev->curr_cache -= PAGE_SIZE;
1012
1013 return 0;
1014}
1015
1016static int null_make_cache_space(struct nullb *nullb, unsigned long n)
f2298c04 1017{
deb78b41
SL
1018 int i, err, nr_pages;
1019 struct nullb_page *c_pages[FREE_BATCH];
1020 unsigned long flushed = 0, one_round;
1021
1022again:
1023 if ((nullb->dev->cache_size * 1024 * 1024) >
1024 nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
1025 return 0;
1026
1027 nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
1028 (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
1029 /*
1030 * nullb_flush_cache_page could unlock before using the c_pages. To
1031 * avoid race, we don't allow page free
1032 */
1033 for (i = 0; i < nr_pages; i++) {
1034 nullb->cache_flush_pos = c_pages[i]->page->index;
1035 /*
1036 * We found the page which is being flushed to disk by other
1037 * threads
1038 */
66231ad3 1039 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
deb78b41
SL
1040 c_pages[i] = NULL;
1041 else
66231ad3 1042 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
deb78b41
SL
1043 }
1044
1045 one_round = 0;
1046 for (i = 0; i < nr_pages; i++) {
1047 if (c_pages[i] == NULL)
1048 continue;
1049 err = null_flush_cache_page(nullb, c_pages[i]);
1050 if (err)
1051 return err;
1052 one_round++;
1053 }
1054 flushed += one_round << PAGE_SHIFT;
1055
1056 if (n > flushed) {
1057 if (nr_pages == 0)
1058 nullb->cache_flush_pos = 0;
1059 if (one_round == 0) {
1060 /* give other threads a chance */
1061 spin_unlock_irq(&nullb->lock);
1062 spin_lock_irq(&nullb->lock);
1063 }
1064 goto again;
1065 }
1066 return 0;
5bcd0e0c
SL
1067}
1068
1069static int copy_to_nullb(struct nullb *nullb, struct page *source,
deb78b41 1070 unsigned int off, sector_t sector, size_t n, bool is_fua)
5bcd0e0c
SL
1071{
1072 size_t temp, count = 0;
1073 unsigned int offset;
1074 struct nullb_page *t_page;
5bcd0e0c
SL
1075
1076 while (count < n) {
1077 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1078
deb78b41
SL
1079 if (null_cache_active(nullb) && !is_fua)
1080 null_make_cache_space(nullb, PAGE_SIZE);
1081
5bcd0e0c 1082 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1083 t_page = null_insert_page(nullb, sector,
1084 !null_cache_active(nullb) || is_fua);
5bcd0e0c
SL
1085 if (!t_page)
1086 return -ENOSPC;
1087
fbb5615f 1088 memcpy_page(t_page->page, offset, source, off + count, temp);
5bcd0e0c 1089
66231ad3 1090 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
5bcd0e0c 1091
deb78b41
SL
1092 if (is_fua)
1093 null_free_sector(nullb, sector, true);
1094
5bcd0e0c
SL
1095 count += temp;
1096 sector += temp >> SECTOR_SHIFT;
1097 }
1098 return 0;
1099}
1100
1101static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1102 unsigned int off, sector_t sector, size_t n)
1103{
1104 size_t temp, count = 0;
1105 unsigned int offset;
1106 struct nullb_page *t_page;
5bcd0e0c
SL
1107
1108 while (count < n) {
1109 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1110
1111 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1112 t_page = null_lookup_page(nullb, sector, false,
1113 !null_cache_active(nullb));
5bcd0e0c 1114
fbb5615f
CK
1115 if (t_page)
1116 memcpy_page(dest, off + count, t_page->page, offset,
1117 temp);
1118 else
1119 zero_user(dest, off + count, temp);
5bcd0e0c
SL
1120
1121 count += temp;
1122 sector += temp >> SECTOR_SHIFT;
1123 }
1124 return 0;
1125}
1126
dd85b492
AJ
1127static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
1128 unsigned int len, unsigned int off)
1129{
fbb5615f 1130 memset_page(page, off, 0xff, len);
dd85b492
AJ
1131}
1132
0ec4d913
DLM
1133blk_status_t null_handle_discard(struct nullb_device *dev,
1134 sector_t sector, sector_t nr_sectors)
306eb6b4 1135{
49c7089f
DLM
1136 struct nullb *nullb = dev->nullb;
1137 size_t n = nr_sectors << SECTOR_SHIFT;
306eb6b4
SL
1138 size_t temp;
1139
1140 spin_lock_irq(&nullb->lock);
1141 while (n > 0) {
49c7089f 1142 temp = min_t(size_t, n, dev->blocksize);
deb78b41
SL
1143 null_free_sector(nullb, sector, false);
1144 if (null_cache_active(nullb))
1145 null_free_sector(nullb, sector, true);
306eb6b4
SL
1146 sector += temp >> SECTOR_SHIFT;
1147 n -= temp;
1148 }
1149 spin_unlock_irq(&nullb->lock);
49c7089f
DLM
1150
1151 return BLK_STS_OK;
306eb6b4
SL
1152}
1153
deb78b41
SL
1154static int null_handle_flush(struct nullb *nullb)
1155{
1156 int err;
1157
1158 if (!null_cache_active(nullb))
1159 return 0;
1160
1161 spin_lock_irq(&nullb->lock);
1162 while (true) {
1163 err = null_make_cache_space(nullb,
1164 nullb->dev->cache_size * 1024 * 1024);
1165 if (err || nullb->dev->curr_cache == 0)
1166 break;
1167 }
1168
1169 WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1170 spin_unlock_irq(&nullb->lock);
1171 return err;
1172}
1173
5bcd0e0c 1174static int null_transfer(struct nullb *nullb, struct page *page,
deb78b41
SL
1175 unsigned int len, unsigned int off, bool is_write, sector_t sector,
1176 bool is_fua)
f2298c04 1177{
dd85b492
AJ
1178 struct nullb_device *dev = nullb->dev;
1179 unsigned int valid_len = len;
5bcd0e0c
SL
1180 int err = 0;
1181
1182 if (!is_write) {
dd85b492
AJ
1183 if (dev->zoned)
1184 valid_len = null_zone_valid_read_len(nullb,
1185 sector, len);
1186
1187 if (valid_len) {
1188 err = copy_from_nullb(nullb, page, off,
1189 sector, valid_len);
1190 off += valid_len;
1191 len -= valid_len;
1192 }
1193
1194 if (len)
1195 nullb_fill_pattern(nullb, page, len, off);
5bcd0e0c
SL
1196 flush_dcache_page(page);
1197 } else {
1198 flush_dcache_page(page);
deb78b41 1199 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
5bcd0e0c
SL
1200 }
1201
1202 return err;
1203}
1204
1205static int null_handle_rq(struct nullb_cmd *cmd)
1206{
8b631f9c 1207 struct request *rq = blk_mq_rq_from_pdu(cmd);
5bcd0e0c
SL
1208 struct nullb *nullb = cmd->nq->dev->nullb;
1209 int err;
1210 unsigned int len;
49c7089f 1211 sector_t sector = blk_rq_pos(rq);
5bcd0e0c
SL
1212 struct req_iterator iter;
1213 struct bio_vec bvec;
1214
5bcd0e0c
SL
1215 spin_lock_irq(&nullb->lock);
1216 rq_for_each_segment(bvec, rq, iter) {
1217 len = bvec.bv_len;
1218 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
deb78b41 1219 op_is_write(req_op(rq)), sector,
2d62e6b0 1220 rq->cmd_flags & REQ_FUA);
5bcd0e0c
SL
1221 if (err) {
1222 spin_unlock_irq(&nullb->lock);
1223 return err;
1224 }
1225 sector += len >> SECTOR_SHIFT;
1226 }
1227 spin_unlock_irq(&nullb->lock);
1228
1229 return 0;
1230}
1231
adb84284
CK
1232static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
1233{
1234 struct nullb_device *dev = cmd->nq->dev;
1235 struct nullb *nullb = dev->nullb;
1236 blk_status_t sts = BLK_STS_OK;
8b631f9c 1237 struct request *rq = blk_mq_rq_from_pdu(cmd);
adb84284
CK
1238
1239 if (!hrtimer_active(&nullb->bw_timer))
1240 hrtimer_restart(&nullb->bw_timer);
1241
1242 if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
8b631f9c 1243 blk_mq_stop_hw_queues(nullb->q);
adb84284
CK
1244 /* race with timer */
1245 if (atomic_long_read(&nullb->cur_bytes) > 0)
8b631f9c 1246 blk_mq_start_stopped_hw_queues(nullb->q, true);
adb84284
CK
1247 /* requeue request */
1248 sts = BLK_STS_DEV_RESOURCE;
1249 }
1250 return sts;
1251}
1252
8f94d1c1
CK
1253static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
1254 sector_t sector,
1255 sector_t nr_sectors)
1256{
1257 struct badblocks *bb = &cmd->nq->dev->badblocks;
1258 sector_t first_bad;
1259 int bad_sectors;
1260
1261 if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
1262 return BLK_STS_IOERR;
1263
1264 return BLK_STS_OK;
1265}
1266
7ea88e22 1267static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
ff07a02e 1268 enum req_op op,
49c7089f
DLM
1269 sector_t sector,
1270 sector_t nr_sectors)
7ea88e22
CK
1271{
1272 struct nullb_device *dev = cmd->nq->dev;
7ea88e22 1273
49c7089f
DLM
1274 if (op == REQ_OP_DISCARD)
1275 return null_handle_discard(dev, sector, nr_sectors);
8b631f9c 1276 return errno_to_blk_status(null_handle_rq(cmd));
49c7089f 1277
7ea88e22
CK
1278}
1279
cecbc9ce
BVA
1280static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
1281{
8b631f9c 1282 struct request *rq = blk_mq_rq_from_pdu(cmd);
cecbc9ce
BVA
1283 struct nullb_device *dev = cmd->nq->dev;
1284 struct bio *bio;
1285
8b631f9c
CH
1286 if (!dev->memory_backed && req_op(rq) == REQ_OP_READ) {
1287 __rq_for_each_bio(bio, rq)
cecbc9ce
BVA
1288 zero_fill_bio(bio);
1289 }
1290}
1291
a3d7d674
CK
1292static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
1293{
8b631f9c
CH
1294 struct request *rq = blk_mq_rq_from_pdu(cmd);
1295
cecbc9ce
BVA
1296 /*
1297 * Since root privileges are required to configure the null_blk
1298 * driver, it is fine that this driver does not initialize the
1299 * data buffers of read commands. Zero-initialize these buffers
1300 * anyway if KMSAN is enabled to prevent that KMSAN complains
1301 * about null_blk not initializing read data buffers.
1302 */
1303 if (IS_ENABLED(CONFIG_KMSAN))
1304 nullb_zero_read_cmd_buffer(cmd);
1305
a3d7d674
CK
1306 /* Complete IO by inline, softirq or timer */
1307 switch (cmd->nq->dev->irqmode) {
1308 case NULL_IRQ_SOFTIRQ:
8b631f9c 1309 blk_mq_complete_request(rq);
a3d7d674
CK
1310 break;
1311 case NULL_IRQ_NONE:
8b631f9c 1312 blk_mq_end_request(rq, cmd->error);
a3d7d674
CK
1313 break;
1314 case NULL_IRQ_TIMER:
1315 null_cmd_end_timer(cmd);
1316 break;
1317 }
1318}
1319
ff07a02e
BVA
1320blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
1321 sector_t sector, unsigned int nr_sectors)
9dd44c7e
DLM
1322{
1323 struct nullb_device *dev = cmd->nq->dev;
1324 blk_status_t ret;
1325
1326 if (dev->badblocks.shift != -1) {
1327 ret = null_handle_badblocks(cmd, sector, nr_sectors);
1328 if (ret != BLK_STS_OK)
1329 return ret;
1330 }
1331
1332 if (dev->memory_backed)
49c7089f 1333 return null_handle_memory_backed(cmd, op, sector, nr_sectors);
9dd44c7e
DLM
1334
1335 return BLK_STS_OK;
1336}
1337
53f2bca2
CZ
1338static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
1339 sector_t nr_sectors, enum req_op op)
5bcd0e0c
SL
1340{
1341 struct nullb_device *dev = cmd->nq->dev;
eff2c4f1 1342 struct nullb *nullb = dev->nullb;
adb84284 1343 blk_status_t sts;
5bcd0e0c 1344
d4b186ed
CK
1345 if (op == REQ_OP_FLUSH) {
1346 cmd->error = errno_to_blk_status(null_handle_flush(nullb));
1347 goto out;
1348 }
d4b186ed 1349
9dd44c7e 1350 if (dev->zoned)
de3510e5 1351 sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
9dd44c7e 1352 else
de3510e5
DLM
1353 sts = null_process_cmd(cmd, op, sector, nr_sectors);
1354
1355 /* Do not overwrite errors (e.g. timeout errors) */
1356 if (cmd->error == BLK_STS_OK)
1357 cmd->error = sts;
fceb5d1b 1358
2f54a613 1359out:
a3d7d674 1360 nullb_complete_cmd(cmd);
f2298c04
JA
1361}
1362
eff2c4f1
SL
1363static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1364{
1365 struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1366 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1367 unsigned int mbps = nullb->dev->mbps;
1368
1369 if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1370 return HRTIMER_NORESTART;
1371
1372 atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
8b631f9c 1373 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1374
1375 hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1376
1377 return HRTIMER_RESTART;
1378}
1379
1380static void nullb_setup_bwtimer(struct nullb *nullb)
1381{
1382 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1383
1384 hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1385 nullb->bw_timer.function = nullb_bwtimer_fn;
1386 atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1387 hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
f2298c04
JA
1388}
1389
bb4c19e0
AM
1390#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1391
1392static bool should_timeout_request(struct request *rq)
1393{
1394 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1395 struct nullb_device *dev = cmd->nq->dev;
1396
1397 return should_fail(&dev->timeout_config.attr, 1);
1398}
1399
1400static bool should_requeue_request(struct request *rq)
1401{
1402 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1403 struct nullb_device *dev = cmd->nq->dev;
1404
1405 return should_fail(&dev->requeue_config.attr, 1);
1406}
1407
1408static bool should_init_hctx_fail(struct nullb_device *dev)
1409{
1410 return should_fail(&dev->init_hctx_fault_config.attr, 1);
1411}
1412
1413#else
1414
93b57046
JA
1415static bool should_timeout_request(struct request *rq)
1416{
24941b90
JA
1417 return false;
1418}
93b57046 1419
24941b90
JA
1420static bool should_requeue_request(struct request *rq)
1421{
93b57046
JA
1422 return false;
1423}
1424
bb4c19e0
AM
1425static bool should_init_hctx_fail(struct nullb_device *dev)
1426{
1427 return false;
1428}
1429
1430#endif
1431
a4e1d0b7 1432static void null_map_queues(struct blk_mq_tag_set *set)
0a593fbb
JA
1433{
1434 struct nullb *nullb = set->driver_data;
1435 int i, qoff;
15dfc662
SK
1436 unsigned int submit_queues = g_submit_queues;
1437 unsigned int poll_queues = g_poll_queues;
1438
1439 if (nullb) {
1440 struct nullb_device *dev = nullb->dev;
1441
1442 /*
1443 * Refer nr_hw_queues of the tag set to check if the expected
1444 * number of hardware queues are prepared. If block layer failed
1445 * to prepare them, use previous numbers of submit queues and
1446 * poll queues to map queues.
1447 */
1448 if (set->nr_hw_queues ==
1449 dev->submit_queues + dev->poll_queues) {
1450 submit_queues = dev->submit_queues;
1451 poll_queues = dev->poll_queues;
1452 } else if (set->nr_hw_queues ==
1453 dev->prev_submit_queues + dev->prev_poll_queues) {
1454 submit_queues = dev->prev_submit_queues;
1455 poll_queues = dev->prev_poll_queues;
1456 } else {
1457 pr_warn("tag set has unexpected nr_hw_queues: %d\n",
1458 set->nr_hw_queues);
10b41ea1
BVA
1459 WARN_ON_ONCE(true);
1460 submit_queues = 1;
1461 poll_queues = 0;
15dfc662
SK
1462 }
1463 }
0a593fbb
JA
1464
1465 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
1466 struct blk_mq_queue_map *map = &set->map[i];
1467
1468 switch (i) {
1469 case HCTX_TYPE_DEFAULT:
15dfc662 1470 map->nr_queues = submit_queues;
0a593fbb
JA
1471 break;
1472 case HCTX_TYPE_READ:
1473 map->nr_queues = 0;
1474 continue;
1475 case HCTX_TYPE_POLL:
15dfc662 1476 map->nr_queues = poll_queues;
0a593fbb
JA
1477 break;
1478 }
1479 map->queue_offset = qoff;
1480 qoff += map->nr_queues;
1481 blk_mq_map_queues(map);
1482 }
0a593fbb
JA
1483}
1484
1485static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
1486{
1487 struct nullb_queue *nq = hctx->driver_data;
1488 LIST_HEAD(list);
1489 int nr = 0;
5a26e45e 1490 struct request *rq;
0a593fbb
JA
1491
1492 spin_lock(&nq->poll_lock);
1493 list_splice_init(&nq->poll_list, &list);
5a26e45e
CZ
1494 list_for_each_entry(rq, &list, queuelist)
1495 blk_mq_set_request_complete(rq);
0a593fbb
JA
1496 spin_unlock(&nq->poll_lock);
1497
1498 while (!list_empty(&list)) {
1499 struct nullb_cmd *cmd;
1500 struct request *req;
1501
1502 req = list_first_entry(&list, struct request, queuelist);
1503 list_del_init(&req->queuelist);
1504 cmd = blk_mq_rq_to_pdu(req);
1505 cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
1506 blk_rq_sectors(req));
c5eafd79 1507 if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
2385ebf3 1508 blk_mq_end_request_batch))
8b631f9c 1509 blk_mq_end_request(req, cmd->error);
0a593fbb
JA
1510 nr++;
1511 }
1512
1513 return nr;
1514}
1515
9bdb4833 1516static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
5448aca4 1517{
0a593fbb 1518 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
de3510e5
DLM
1519 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1520
0a593fbb
JA
1521 if (hctx->type == HCTX_TYPE_POLL) {
1522 struct nullb_queue *nq = hctx->driver_data;
1523
1524 spin_lock(&nq->poll_lock);
5a26e45e
CZ
1525 /* The request may have completed meanwhile. */
1526 if (blk_mq_request_completed(rq)) {
1527 spin_unlock(&nq->poll_lock);
1528 return BLK_EH_DONE;
1529 }
0a593fbb
JA
1530 list_del_init(&rq->queuelist);
1531 spin_unlock(&nq->poll_lock);
1532 }
1533
5a26e45e
CZ
1534 pr_info("rq %p timed out\n", rq);
1535
de3510e5
DLM
1536 /*
1537 * If the device is marked as blocking (i.e. memory backed or zoned
1538 * device), the submission path may be blocked waiting for resources
1539 * and cause real timeouts. For these real timeouts, the submission
1540 * path will complete the request using blk_mq_complete_request().
1541 * Only fake timeouts need to execute blk_mq_complete_request() here.
1542 */
1543 cmd->error = BLK_STS_TIMEOUT;
3e3876d3 1544 if (cmd->fake_timeout || hctx->type == HCTX_TYPE_POLL)
de3510e5 1545 blk_mq_complete_request(rq);
0df0bb08 1546 return BLK_EH_DONE;
5448aca4
JA
1547}
1548
fc17b653 1549static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
b6402014 1550 const struct blk_mq_queue_data *bd)
f2298c04 1551{
b6402014
DLM
1552 struct request *rq = bd->rq;
1553 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
2984c868 1554 struct nullb_queue *nq = hctx->driver_data;
b6402014
DLM
1555 sector_t nr_sectors = blk_rq_sectors(rq);
1556 sector_t sector = blk_rq_pos(rq);
0a593fbb 1557 const bool is_poll = hctx->type == HCTX_TYPE_POLL;
f2298c04 1558
db5bcf87
JA
1559 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1560
0a593fbb 1561 if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
3c395a96
PV
1562 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1563 cmd->timer.function = null_cmd_timer_expired;
1564 }
ff770422 1565 cmd->error = BLK_STS_OK;
2984c868 1566 cmd->nq = nq;
b6402014
DLM
1567 cmd->fake_timeout = should_timeout_request(rq) ||
1568 blk_should_fake_timeout(rq->q);
f2298c04 1569
b6402014 1570 if (should_requeue_request(rq)) {
24941b90
JA
1571 /*
1572 * Alternate between hitting the core BUSY path, and the
1573 * driver driven requeue path
1574 */
1575 nq->requeue_selection++;
1576 if (nq->requeue_selection & 1)
1577 return BLK_STS_RESOURCE;
b6402014
DLM
1578 blk_mq_requeue_request(rq, true);
1579 return BLK_STS_OK;
24941b90 1580 }
0a593fbb 1581
53f2bca2
CZ
1582 if (test_bit(NULLB_DEV_FL_THROTTLED, &nq->dev->flags)) {
1583 blk_status_t sts = null_handle_throttled(cmd);
1584
1585 if (sts != BLK_STS_OK)
1586 return sts;
1587 }
1588
1589 blk_mq_start_request(rq);
1590
0a593fbb
JA
1591 if (is_poll) {
1592 spin_lock(&nq->poll_lock);
b6402014 1593 list_add_tail(&rq->queuelist, &nq->poll_list);
0a593fbb
JA
1594 spin_unlock(&nq->poll_lock);
1595 return BLK_STS_OK;
1596 }
de3510e5 1597 if (cmd->fake_timeout)
24941b90 1598 return BLK_STS_OK;
93b57046 1599
53f2bca2
CZ
1600 null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
1601 return BLK_STS_OK;
f2298c04
JA
1602}
1603
d78bfa13
CZ
1604static void null_queue_rqs(struct request **rqlist)
1605{
1606 struct request *requeue_list = NULL;
1607 struct request **requeue_lastp = &requeue_list;
1608 struct blk_mq_queue_data bd = { };
1609 blk_status_t ret;
1610
1611 do {
1612 struct request *rq = rq_list_pop(rqlist);
1613
1614 bd.rq = rq;
1615 ret = null_queue_rq(rq->mq_hctx, &bd);
1616 if (ret != BLK_STS_OK)
1617 rq_list_add_tail(&requeue_lastp, rq);
1618 } while (!rq_list_empty(*rqlist));
1619
1620 *rqlist = requeue_list;
1621}
1622
78b10be2
BVA
1623static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1624{
1625 struct nullb_queue *nq = hctx->driver_data;
1626 struct nullb *nullb = nq->dev->nullb;
1627
1628 nullb->nr_queues--;
1629}
1630
1631static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1632{
78b10be2 1633 nq->dev = nullb->dev;
0a593fbb
JA
1634 INIT_LIST_HEAD(&nq->poll_list);
1635 spin_lock_init(&nq->poll_lock);
78b10be2
BVA
1636}
1637
1638static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
1639 unsigned int hctx_idx)
1640{
1641 struct nullb *nullb = hctx->queue->queuedata;
1642 struct nullb_queue *nq;
1643
bb4c19e0 1644 if (should_init_hctx_fail(nullb->dev))
596444e7 1645 return -EFAULT;
596444e7 1646
78b10be2
BVA
1647 nq = &nullb->queues[hctx_idx];
1648 hctx->driver_data = nq;
1649 null_init_queue(nullb, nq);
1650 nullb->nr_queues++;
1651
1652 return 0;
1653}
1654
1655static const struct blk_mq_ops null_mq_ops = {
1656 .queue_rq = null_queue_rq,
d78bfa13 1657 .queue_rqs = null_queue_rqs,
78b10be2
BVA
1658 .complete = null_complete_rq,
1659 .timeout = null_timeout_rq,
0a593fbb
JA
1660 .poll = null_poll,
1661 .map_queues = null_map_queues,
78b10be2
BVA
1662 .init_hctx = null_init_hctx,
1663 .exit_hctx = null_exit_hctx,
1664};
1665
9ae2d0aa
MB
1666static void null_del_dev(struct nullb *nullb)
1667{
9b03b713
BVA
1668 struct nullb_device *dev;
1669
1670 if (!nullb)
1671 return;
1672
1673 dev = nullb->dev;
2984c868 1674
95931a24 1675 ida_free(&nullb_indexes, nullb->index);
94bc02e3 1676
9ae2d0aa
MB
1677 list_del_init(&nullb->list);
1678
74ede5af 1679 del_gendisk(nullb->disk);
eff2c4f1
SL
1680
1681 if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1682 hrtimer_cancel(&nullb->bw_timer);
1683 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
8b631f9c 1684 blk_mq_start_stopped_hw_queues(nullb->q, true);
eff2c4f1
SL
1685 }
1686
8b9ab626 1687 put_disk(nullb->disk);
8b631f9c 1688 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1689 blk_mq_free_tag_set(nullb->tag_set);
8b631f9c 1690 kfree(nullb->queues);
deb78b41
SL
1691 if (null_cache_active(nullb))
1692 null_free_device_storage(nullb->dev, true);
9ae2d0aa 1693 kfree(nullb);
2984c868 1694 dev->nullb = NULL;
9ae2d0aa
MB
1695}
1696
306eb6b4
SL
1697static void null_config_discard(struct nullb *nullb)
1698{
1699 if (nullb->dev->discard == false)
1700 return;
1592cd15 1701
49c7089f
DLM
1702 if (!nullb->dev->memory_backed) {
1703 nullb->dev->discard = false;
1704 pr_info("discard option is ignored without memory backing\n");
1705 return;
1706 }
1707
1592cd15
CK
1708 if (nullb->dev->zoned) {
1709 nullb->dev->discard = false;
1710 pr_info("discard option is ignored in zoned mode\n");
1711 return;
1712 }
1713
306eb6b4 1714 blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
9ae2d0aa
MB
1715}
1716
8b631f9c 1717static const struct block_device_operations null_ops = {
c62b37d9 1718 .owner = THIS_MODULE,
c62b37d9
CH
1719 .report_zones = null_report_zones,
1720};
1721
f2298c04
JA
1722static int setup_queues(struct nullb *nullb)
1723{
0a593fbb
JA
1724 int nqueues = nr_cpu_ids;
1725
1726 if (g_poll_queues)
1727 nqueues += g_poll_queues;
1728
1729 nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
6396bb22 1730 GFP_KERNEL);
f2298c04 1731 if (!nullb->queues)
2d263a78 1732 return -ENOMEM;
f2298c04 1733
2984c868 1734 nullb->queue_depth = nullb->dev->hw_queue_depth;
2d263a78
MB
1735 return 0;
1736}
1737
9ae2d0aa 1738static int null_gendisk_register(struct nullb *nullb)
f2298c04 1739{
979d5447 1740 sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
132226b3 1741 struct gendisk *disk = nullb->disk;
9ae2d0aa 1742
979d5447 1743 set_capacity(disk, size);
9ae2d0aa 1744
9ae2d0aa
MB
1745 disk->major = null_major;
1746 disk->first_minor = nullb->index;
132226b3 1747 disk->minors = 1;
8b631f9c 1748 disk->fops = &null_ops;
9ae2d0aa 1749 disk->private_data = nullb;
e1f2760b 1750 strscpy_pad(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
9ae2d0aa 1751
bf505456 1752 if (nullb->dev->zoned) {
d205bde7
DLM
1753 int ret = null_register_zoned_dev(nullb);
1754
1755 if (ret)
1756 return ret;
bf505456
DLM
1757 }
1758
10e7123d 1759 return add_disk(disk);
9ae2d0aa
MB
1760}
1761
72ca2876 1762static int null_init_tag_set(struct blk_mq_tag_set *set, int poll_queues)
82f402fe
JA
1763{
1764 set->ops = &null_mq_ops;
72ca2876 1765 set->cmd_size = sizeof(struct nullb_cmd);
e32b0855 1766 set->timeout = 5 * HZ;
72ca2876 1767 set->nr_maps = 1;
7012eef5
VF
1768 if (poll_queues) {
1769 set->nr_hw_queues += poll_queues;
72ca2876 1770 set->nr_maps += 2;
7012eef5 1771 }
82f402fe
JA
1772 return blk_mq_alloc_tag_set(set);
1773}
1774
72ca2876
CH
1775static int null_init_global_tag_set(void)
1776{
1777 int error;
1778
1779 if (tag_set.ops)
1780 return 0;
1781
1782 tag_set.nr_hw_queues = g_submit_queues;
1783 tag_set.queue_depth = g_hw_queue_depth;
1784 tag_set.numa_node = g_home_node;
1785 tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
1786 if (g_no_sched)
1787 tag_set.flags |= BLK_MQ_F_NO_SCHED;
1788 if (g_shared_tag_bitmap)
1789 tag_set.flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1790 if (g_blocking)
1791 tag_set.flags |= BLK_MQ_F_BLOCKING;
1792
1793 error = null_init_tag_set(&tag_set, g_poll_queues);
1794 if (error)
1795 tag_set.ops = NULL;
1796 return error;
1797}
1798
1799static int null_setup_tagset(struct nullb *nullb)
1800{
1801 if (nullb->dev->shared_tags) {
1802 nullb->tag_set = &tag_set;
1803 return null_init_global_tag_set();
1804 }
1805
1806 nullb->tag_set = &nullb->__tag_set;
1807 nullb->tag_set->driver_data = nullb;
1808 nullb->tag_set->nr_hw_queues = nullb->dev->submit_queues;
1809 nullb->tag_set->queue_depth = nullb->dev->hw_queue_depth;
1810 nullb->tag_set->numa_node = nullb->dev->home_node;
1811 nullb->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
1812 if (nullb->dev->no_sched)
1813 nullb->tag_set->flags |= BLK_MQ_F_NO_SCHED;
1814 if (nullb->dev->shared_tag_bitmap)
1815 nullb->tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
1816 if (nullb->dev->blocking)
1817 nullb->tag_set->flags |= BLK_MQ_F_BLOCKING;
1818 return null_init_tag_set(nullb->tag_set, nullb->dev->poll_queues);
1819}
1820
5c4bd1f4 1821static int null_validate_conf(struct nullb_device *dev)
cedcafad 1822{
63f8793e
CK
1823 if (dev->queue_mode == NULL_Q_RQ) {
1824 pr_err("legacy IO path is no longer available\n");
1825 return -EINVAL;
1826 }
8b631f9c
CH
1827 if (dev->queue_mode == NULL_Q_BIO) {
1828 pr_err("BIO-based IO path is no longer available, using blk-mq instead.\n");
1829 dev->queue_mode = NULL_Q_MQ;
1830 }
63f8793e 1831
cedcafad
SL
1832 dev->blocksize = round_down(dev->blocksize, 512);
1833 dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
cedcafad 1834
8b631f9c 1835 if (dev->use_per_node_hctx) {
cedcafad
SL
1836 if (dev->submit_queues != nr_online_nodes)
1837 dev->submit_queues = nr_online_nodes;
1838 } else if (dev->submit_queues > nr_cpu_ids)
1839 dev->submit_queues = nr_cpu_ids;
1840 else if (dev->submit_queues == 0)
1841 dev->submit_queues = 1;
15dfc662
SK
1842 dev->prev_submit_queues = dev->submit_queues;
1843
1844 if (dev->poll_queues > g_poll_queues)
1845 dev->poll_queues = g_poll_queues;
15dfc662 1846 dev->prev_poll_queues = dev->poll_queues;
cedcafad 1847 dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
5bcd0e0c
SL
1848
1849 /* Do memory allocation, so set blocking */
1850 if (dev->memory_backed)
1851 dev->blocking = true;
deb78b41
SL
1852 else /* cache is meaningless */
1853 dev->cache_size = 0;
1854 dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1855 dev->cache_size);
eff2c4f1 1856 dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
5c4bd1f4
DLM
1857
1858 if (dev->zoned &&
1859 (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
1860 pr_err("zone_size must be power-of-two\n");
1861 return -EINVAL;
1862 }
1863
1864 return 0;
cedcafad
SL
1865}
1866
33f782c4 1867#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
24941b90
JA
1868static bool __null_setup_fault(struct fault_attr *attr, char *str)
1869{
1870 if (!str[0])
93b57046
JA
1871 return true;
1872
24941b90 1873 if (!setup_fault_attr(attr, str))
93b57046
JA
1874 return false;
1875
24941b90
JA
1876 attr->verbose = 0;
1877 return true;
1878}
1879#endif
1880
1881static bool null_setup_fault(void)
1882{
1883#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1884 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1885 return false;
1886 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1887 return false;
596444e7
BVA
1888 if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
1889 return false;
33f782c4 1890#endif
93b57046
JA
1891 return true;
1892}
1893
2984c868 1894static int null_add_dev(struct nullb_device *dev)
9ae2d0aa
MB
1895{
1896 struct nullb *nullb;
dc501dc0 1897 int rv;
f2298c04 1898
5c4bd1f4
DLM
1899 rv = null_validate_conf(dev);
1900 if (rv)
1901 return rv;
cedcafad 1902
2984c868 1903 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
dc501dc0
RE
1904 if (!nullb) {
1905 rv = -ENOMEM;
24d2f903 1906 goto out;
dc501dc0 1907 }
2984c868
SL
1908 nullb->dev = dev;
1909 dev->nullb = nullb;
f2298c04
JA
1910
1911 spin_lock_init(&nullb->lock);
1912
dc501dc0
RE
1913 rv = setup_queues(nullb);
1914 if (rv)
24d2f903 1915 goto out_free_nullb;
f2298c04 1916
72ca2876 1917 rv = null_setup_tagset(nullb);
8b631f9c
CH
1918 if (rv)
1919 goto out_cleanup_queues;
132226b3 1920
8b631f9c
CH
1921 nullb->disk = blk_mq_alloc_disk(nullb->tag_set, NULL, nullb);
1922 if (IS_ERR(nullb->disk)) {
1923 rv = PTR_ERR(nullb->disk);
1924 goto out_cleanup_tags;
f2298c04 1925 }
8b631f9c 1926 nullb->q = nullb->disk->queue;
f2298c04 1927
eff2c4f1
SL
1928 if (dev->mbps) {
1929 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
1930 nullb_setup_bwtimer(nullb);
1931 }
1932
deb78b41
SL
1933 if (dev->cache_size > 0) {
1934 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
1935 blk_queue_write_cache(nullb->q, true, true);
deb78b41
SL
1936 }
1937
ca4b2a01 1938 if (dev->zoned) {
d205bde7 1939 rv = null_init_zoned_dev(dev, nullb->q);
ca4b2a01 1940 if (rv)
132226b3 1941 goto out_cleanup_disk;
ca4b2a01
MB
1942 }
1943
f2298c04 1944 nullb->q->queuedata = nullb;
8b904b5b 1945 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
f2298c04 1946
f2298c04 1947 mutex_lock(&lock);
95931a24 1948 rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
ee452a8d
DC
1949 if (rv < 0) {
1950 mutex_unlock(&lock);
1951 goto out_cleanup_zone;
1952 }
1953 nullb->index = rv;
1954 dev->index = rv;
f2298c04
JA
1955 mutex_unlock(&lock);
1956
2984c868
SL
1957 blk_queue_logical_block_size(nullb->q, dev->blocksize);
1958 blk_queue_physical_block_size(nullb->q, dev->blocksize);
9a9525de
CH
1959 if (dev->max_sectors)
1960 blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
f2298c04 1961
cee1b215
MG
1962 if (dev->virt_boundary)
1963 blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
1964
306eb6b4 1965 null_config_discard(nullb);
f2298c04 1966
bb4c19e0 1967 if (config_item_name(&dev->group.cg_item)) {
49c3b926
DLM
1968 /* Use configfs dir name as the device name */
1969 snprintf(nullb->disk_name, sizeof(nullb->disk_name),
bb4c19e0 1970 "%s", config_item_name(&dev->group.cg_item));
49c3b926
DLM
1971 } else {
1972 sprintf(nullb->disk_name, "nullb%d", nullb->index);
1973 }
b2b7e001 1974
74ede5af 1975 rv = null_gendisk_register(nullb);
9ae2d0aa 1976 if (rv)
ee452a8d 1977 goto out_ida_free;
a514379b
MB
1978
1979 mutex_lock(&lock);
1980 list_add_tail(&nullb->list, &nullb_list);
1981 mutex_unlock(&lock);
3681c85d 1982
db060f54
DLM
1983 pr_info("disk %s created\n", nullb->disk_name);
1984
f2298c04 1985 return 0;
ee452a8d
DC
1986
1987out_ida_free:
1988 ida_free(&nullb_indexes, nullb->index);
ca4b2a01 1989out_cleanup_zone:
d205bde7 1990 null_free_zoned_dev(dev);
132226b3 1991out_cleanup_disk:
8b9ab626 1992 put_disk(nullb->disk);
24d2f903 1993out_cleanup_tags:
8b631f9c 1994 if (nullb->tag_set == &nullb->__tag_set)
82f402fe 1995 blk_mq_free_tag_set(nullb->tag_set);
24d2f903 1996out_cleanup_queues:
8b631f9c 1997 kfree(nullb->queues);
24d2f903
CH
1998out_free_nullb:
1999 kfree(nullb);
2004bfde 2000 dev->nullb = NULL;
24d2f903 2001out:
dc501dc0 2002 return rv;
f2298c04
JA
2003}
2004
49c3b926
DLM
2005static struct nullb *null_find_dev_by_name(const char *name)
2006{
2007 struct nullb *nullb = NULL, *nb;
2008
2009 mutex_lock(&lock);
2010 list_for_each_entry(nb, &nullb_list, list) {
2011 if (strcmp(nb->disk_name, name) == 0) {
2012 nullb = nb;
2013 break;
2014 }
2015 }
2016 mutex_unlock(&lock);
2017
2018 return nullb;
2019}
2020
b3a0a73e
DLM
2021static int null_create_dev(void)
2022{
2023 struct nullb_device *dev;
2024 int ret;
2025
2026 dev = null_alloc_dev();
2027 if (!dev)
2028 return -ENOMEM;
2029
2030 ret = null_add_dev(dev);
2031 if (ret) {
2032 null_free_dev(dev);
2033 return ret;
2034 }
2035
2036 return 0;
2037}
2038
2039static void null_destroy_dev(struct nullb *nullb)
2040{
2041 struct nullb_device *dev = nullb->dev;
2042
2043 null_del_dev(nullb);
8cfb9819 2044 null_free_device_storage(dev, false);
b3a0a73e
DLM
2045 null_free_dev(dev);
2046}
2047
f2298c04
JA
2048static int __init null_init(void)
2049{
af096e22 2050 int ret = 0;
f2298c04 2051 unsigned int i;
af096e22 2052 struct nullb *nullb;
f2298c04 2053
2984c868 2054 if (g_bs > PAGE_SIZE) {
9c7eddf1
AA
2055 pr_warn("invalid block size\n");
2056 pr_warn("defaults block size to %lu\n", PAGE_SIZE);
2984c868 2057 g_bs = PAGE_SIZE;
9967d8ac 2058 }
f2298c04 2059
7ff684a6 2060 if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
9c7eddf1 2061 pr_err("invalid home_node value\n");
7ff684a6
JP
2062 g_home_node = NUMA_NO_NODE;
2063 }
2064
bb4c19e0
AM
2065 if (!null_setup_fault())
2066 return -EINVAL;
2067
e50b1e32 2068 if (g_queue_mode == NULL_Q_RQ) {
525323d2 2069 pr_err("legacy IO path is no longer available\n");
e50b1e32
JA
2070 return -EINVAL;
2071 }
525323d2 2072
8b631f9c 2073 if (g_use_per_node_hctx) {
2984c868 2074 if (g_submit_queues != nr_online_nodes) {
9c7eddf1 2075 pr_warn("submit_queues param is set to %u.\n",
525323d2 2076 nr_online_nodes);
2984c868 2077 g_submit_queues = nr_online_nodes;
fc1bc354 2078 }
525323d2 2079 } else if (g_submit_queues > nr_cpu_ids) {
2984c868 2080 g_submit_queues = nr_cpu_ids;
525323d2 2081 } else if (g_submit_queues <= 0) {
2984c868 2082 g_submit_queues = 1;
525323d2 2083 }
f2298c04 2084
3bf2bd20
SL
2085 config_group_init(&nullb_subsys.su_group);
2086 mutex_init(&nullb_subsys.su_mutex);
2087
2088 ret = configfs_register_subsystem(&nullb_subsys);
2089 if (ret)
14509b74 2090 return ret;
3bf2bd20 2091
f2298c04
JA
2092 mutex_init(&lock);
2093
f2298c04 2094 null_major = register_blkdev(0, "nullb");
db2d153d
MG
2095 if (null_major < 0) {
2096 ret = null_major;
3bf2bd20 2097 goto err_conf;
db2d153d 2098 }
f2298c04
JA
2099
2100 for (i = 0; i < nr_devices; i++) {
b3a0a73e
DLM
2101 ret = null_create_dev();
2102 if (ret)
af096e22 2103 goto err_dev;
f2298c04
JA
2104 }
2105
9c7eddf1 2106 pr_info("module loaded\n");
f2298c04 2107 return 0;
af096e22
MH
2108
2109err_dev:
2110 while (!list_empty(&nullb_list)) {
2111 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2112 null_destroy_dev(nullb);
af096e22 2113 }
af096e22 2114 unregister_blkdev(null_major, "nullb");
3bf2bd20
SL
2115err_conf:
2116 configfs_unregister_subsystem(&nullb_subsys);
af096e22 2117 return ret;
f2298c04
JA
2118}
2119
2120static void __exit null_exit(void)
2121{
2122 struct nullb *nullb;
2123
3bf2bd20
SL
2124 configfs_unregister_subsystem(&nullb_subsys);
2125
f2298c04
JA
2126 unregister_blkdev(null_major, "nullb");
2127
2128 mutex_lock(&lock);
2129 while (!list_empty(&nullb_list)) {
2130 nullb = list_entry(nullb_list.next, struct nullb, list);
b3a0a73e 2131 null_destroy_dev(nullb);
f2298c04
JA
2132 }
2133 mutex_unlock(&lock);
6bb9535b 2134
14509b74 2135 if (tag_set.ops)
82f402fe 2136 blk_mq_free_tag_set(&tag_set);
f2298c04
JA
2137}
2138
2139module_init(null_init);
2140module_exit(null_exit);
2141
231b3db1 2142MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
f2298c04 2143MODULE_LICENSE("GPL");