block: fix offset/size check in bio_trim()
[linux-block.git] / drivers / block / null_blk / main.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
3bf2bd20
SL
2/*
3 * Add configfs and memory store: Kyungchan Koh <kkc6196@fb.com> and
4 * Shaohua Li <shli@fb.com>
5 */
f2298c04 6#include <linux/module.h>
fc1bc354 7
f2298c04
JA
8#include <linux/moduleparam.h>
9#include <linux/sched.h>
10#include <linux/fs.h>
f2298c04 11#include <linux/init.h>
6dad38d3 12#include "null_blk.h"
f2298c04 13
5bcd0e0c
SL
14#define FREE_BATCH 16
15
eff2c4f1
SL
16#define TICKS_PER_SEC 50ULL
17#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
18
33f782c4 19#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
93b57046 20static DECLARE_FAULT_ATTR(null_timeout_attr);
24941b90 21static DECLARE_FAULT_ATTR(null_requeue_attr);
596444e7 22static DECLARE_FAULT_ATTR(null_init_hctx_attr);
33f782c4 23#endif
93b57046 24
eff2c4f1
SL
25static inline u64 mb_per_tick(int mbps)
26{
27 return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
28}
f2298c04 29
3bf2bd20
SL
30/*
31 * Status flags for nullb_device.
32 *
33 * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
34 * UP: Device is currently on and visible in userspace.
eff2c4f1 35 * THROTTLED: Device is being throttled.
deb78b41 36 * CACHE: Device is using a write-back cache.
3bf2bd20
SL
37 */
38enum nullb_device_flags {
39 NULLB_DEV_FL_CONFIGURED = 0,
40 NULLB_DEV_FL_UP = 1,
eff2c4f1 41 NULLB_DEV_FL_THROTTLED = 2,
deb78b41 42 NULLB_DEV_FL_CACHE = 3,
3bf2bd20
SL
43};
44
66231ad3 45#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2)
5bcd0e0c
SL
46/*
47 * nullb_page is a page in memory for nullb devices.
48 *
49 * @page: The page holding the data.
50 * @bitmap: The bitmap represents which sector in the page has data.
51 * Each bit represents one block size. For example, sector 8
52 * will use the 7th bit
deb78b41
SL
53 * The highest 2 bits of bitmap are for special purpose. LOCK means the cache
54 * page is being flushing to storage. FREE means the cache page is freed and
55 * should be skipped from flushing to storage. Please see
56 * null_make_cache_space
5bcd0e0c
SL
57 */
58struct nullb_page {
59 struct page *page;
66231ad3 60 DECLARE_BITMAP(bitmap, MAP_SZ);
5bcd0e0c 61};
66231ad3
ML
62#define NULLB_PAGE_LOCK (MAP_SZ - 1)
63#define NULLB_PAGE_FREE (MAP_SZ - 2)
5bcd0e0c 64
f2298c04
JA
65static LIST_HEAD(nullb_list);
66static struct mutex lock;
67static int null_major;
94bc02e3 68static DEFINE_IDA(nullb_indexes);
82f402fe 69static struct blk_mq_tag_set tag_set;
f2298c04 70
f2298c04
JA
71enum {
72 NULL_IRQ_NONE = 0,
73 NULL_IRQ_SOFTIRQ = 1,
74 NULL_IRQ_TIMER = 2,
ce2c350b 75};
f2298c04 76
ce2c350b 77enum {
f2298c04
JA
78 NULL_Q_BIO = 0,
79 NULL_Q_RQ = 1,
80 NULL_Q_MQ = 2,
81};
82
cee1b215
MG
83static bool g_virt_boundary = false;
84module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
85MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
86
b3cffc38 87static int g_no_sched;
5657a819 88module_param_named(no_sched, g_no_sched, int, 0444);
b3cffc38 89MODULE_PARM_DESC(no_sched, "No io scheduler");
90
2984c868 91static int g_submit_queues = 1;
5657a819 92module_param_named(submit_queues, g_submit_queues, int, 0444);
f2298c04
JA
93MODULE_PARM_DESC(submit_queues, "Number of submission queues");
94
0a593fbb
JA
95static int g_poll_queues = 1;
96module_param_named(poll_queues, g_poll_queues, int, 0444);
97MODULE_PARM_DESC(poll_queues, "Number of IOPOLL submission queues");
98
2984c868 99static int g_home_node = NUMA_NO_NODE;
5657a819 100module_param_named(home_node, g_home_node, int, 0444);
f2298c04
JA
101MODULE_PARM_DESC(home_node, "Home node for the device");
102
33f782c4 103#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
290df92a
DZ
104/*
105 * For more details about fault injection, please refer to
106 * Documentation/fault-injection/fault-injection.rst.
107 */
93b57046 108static char g_timeout_str[80];
5657a819 109module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
290df92a 110MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
24941b90
JA
111
112static char g_requeue_str[80];
5657a819 113module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
290df92a 114MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
596444e7
BVA
115
116static char g_init_hctx_str[80];
117module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
290df92a 118MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
33f782c4 119#endif
93b57046 120
2984c868 121static int g_queue_mode = NULL_Q_MQ;
709c8667
MB
122
123static int null_param_store_val(const char *str, int *val, int min, int max)
124{
125 int ret, new_val;
126
127 ret = kstrtoint(str, 10, &new_val);
128 if (ret)
129 return -EINVAL;
130
131 if (new_val < min || new_val > max)
132 return -EINVAL;
133
134 *val = new_val;
135 return 0;
136}
137
138static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
139{
2984c868 140 return null_param_store_val(str, &g_queue_mode, NULL_Q_BIO, NULL_Q_MQ);
709c8667
MB
141}
142
9c27847d 143static const struct kernel_param_ops null_queue_mode_param_ops = {
709c8667
MB
144 .set = null_set_queue_mode,
145 .get = param_get_int,
146};
147
5657a819 148device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
54ae81cd 149MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
f2298c04 150
2984c868 151static int g_gb = 250;
5657a819 152module_param_named(gb, g_gb, int, 0444);
f2298c04
JA
153MODULE_PARM_DESC(gb, "Size in GB");
154
2984c868 155static int g_bs = 512;
5657a819 156module_param_named(bs, g_bs, int, 0444);
f2298c04
JA
157MODULE_PARM_DESC(bs, "Block size (in bytes)");
158
ea17fd35
DLM
159static int g_max_sectors;
160module_param_named(max_sectors, g_max_sectors, int, 0444);
161MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
162
f7c4ce89 163static unsigned int nr_devices = 1;
701dfc42 164module_param(nr_devices, uint, 0444);
f2298c04
JA
165MODULE_PARM_DESC(nr_devices, "Number of devices to register");
166
2984c868 167static bool g_blocking;
5657a819 168module_param_named(blocking, g_blocking, bool, 0444);
db5bcf87
JA
169MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
170
82f402fe 171static bool shared_tags;
5657a819 172module_param(shared_tags, bool, 0444);
82f402fe
JA
173MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
174
0905053b
JG
175static bool g_shared_tag_bitmap;
176module_param_named(shared_tag_bitmap, g_shared_tag_bitmap, bool, 0444);
177MODULE_PARM_DESC(shared_tag_bitmap, "Use shared tag bitmap for all submission queues for blk-mq");
178
2984c868 179static int g_irqmode = NULL_IRQ_SOFTIRQ;
709c8667
MB
180
181static int null_set_irqmode(const char *str, const struct kernel_param *kp)
182{
2984c868 183 return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
709c8667
MB
184 NULL_IRQ_TIMER);
185}
186
9c27847d 187static const struct kernel_param_ops null_irqmode_param_ops = {
709c8667
MB
188 .set = null_set_irqmode,
189 .get = param_get_int,
190};
191
5657a819 192device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
f2298c04
JA
193MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
194
2984c868 195static unsigned long g_completion_nsec = 10000;
5657a819 196module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
f2298c04
JA
197MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
198
2984c868 199static int g_hw_queue_depth = 64;
5657a819 200module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
f2298c04
JA
201MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
202
2984c868 203static bool g_use_per_node_hctx;
5657a819 204module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
20005244 205MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
f2298c04 206
ca4b2a01
MB
207static bool g_zoned;
208module_param_named(zoned, g_zoned, bool, S_IRUGO);
209MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
210
211static unsigned long g_zone_size = 256;
212module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
213MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
214
089565fb
AR
215static unsigned long g_zone_capacity;
216module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
217MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
218
ea2c18e1
MS
219static unsigned int g_zone_nr_conv;
220module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
221MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
222
dc4d137e
NC
223static unsigned int g_zone_max_open;
224module_param_named(zone_max_open, g_zone_max_open, uint, 0444);
225MODULE_PARM_DESC(zone_max_open, "Maximum number of open zones when block device is zoned. Default: 0 (no limit)");
226
227static unsigned int g_zone_max_active;
228module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
229MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
230
3bf2bd20
SL
231static struct nullb_device *null_alloc_dev(void);
232static void null_free_dev(struct nullb_device *dev);
cedcafad
SL
233static void null_del_dev(struct nullb *nullb);
234static int null_add_dev(struct nullb_device *dev);
deb78b41 235static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
3bf2bd20
SL
236
237static inline struct nullb_device *to_nullb_device(struct config_item *item)
238{
239 return item ? container_of(item, struct nullb_device, item) : NULL;
240}
241
242static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
243{
244 return snprintf(page, PAGE_SIZE, "%u\n", val);
245}
246
247static inline ssize_t nullb_device_ulong_attr_show(unsigned long val,
248 char *page)
249{
250 return snprintf(page, PAGE_SIZE, "%lu\n", val);
251}
252
253static inline ssize_t nullb_device_bool_attr_show(bool val, char *page)
254{
255 return snprintf(page, PAGE_SIZE, "%u\n", val);
256}
257
258static ssize_t nullb_device_uint_attr_store(unsigned int *val,
259 const char *page, size_t count)
260{
261 unsigned int tmp;
262 int result;
263
264 result = kstrtouint(page, 0, &tmp);
45919fbf 265 if (result < 0)
3bf2bd20
SL
266 return result;
267
268 *val = tmp;
269 return count;
270}
271
272static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
273 const char *page, size_t count)
274{
275 int result;
276 unsigned long tmp;
277
278 result = kstrtoul(page, 0, &tmp);
45919fbf 279 if (result < 0)
3bf2bd20
SL
280 return result;
281
282 *val = tmp;
283 return count;
284}
285
286static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
287 size_t count)
288{
289 bool tmp;
290 int result;
291
292 result = kstrtobool(page, &tmp);
45919fbf 293 if (result < 0)
3bf2bd20
SL
294 return result;
295
296 *val = tmp;
297 return count;
298}
299
300/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
ca0a95a6
AM
301#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
302static ssize_t \
303nullb_device_##NAME##_show(struct config_item *item, char *page) \
304{ \
305 return nullb_device_##TYPE##_attr_show( \
306 to_nullb_device(item)->NAME, page); \
307} \
308static ssize_t \
309nullb_device_##NAME##_store(struct config_item *item, const char *page, \
310 size_t count) \
311{ \
312 int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY;\
313 struct nullb_device *dev = to_nullb_device(item); \
b9853b4d 314 TYPE new_value = 0; \
ca0a95a6
AM
315 int ret; \
316 \
317 ret = nullb_device_##TYPE##_attr_store(&new_value, page, count);\
318 if (ret < 0) \
319 return ret; \
320 if (apply_fn) \
321 ret = apply_fn(dev, new_value); \
322 else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
323 ret = -EBUSY; \
324 if (ret < 0) \
325 return ret; \
326 dev->NAME = new_value; \
327 return count; \
328} \
3bf2bd20
SL
329CONFIGFS_ATTR(nullb_device_, NAME);
330
15dfc662
SK
331static int nullb_update_nr_hw_queues(struct nullb_device *dev,
332 unsigned int submit_queues,
333 unsigned int poll_queues)
334
45919fbf 335{
45919fbf 336 struct blk_mq_tag_set *set;
15dfc662 337 int ret, nr_hw_queues;
45919fbf 338
15dfc662 339 if (!dev->nullb)
45919fbf
BVA
340 return 0;
341
15dfc662 342 /*
2bfdbe8b 343 * Make sure at least one submit queue exists.
15dfc662 344 */
2bfdbe8b 345 if (!submit_queues)
15dfc662
SK
346 return -EINVAL;
347
78b10be2
BVA
348 /*
349 * Make sure that null_init_hctx() does not access nullb->queues[] past
350 * the end of that array.
351 */
15dfc662 352 if (submit_queues > nr_cpu_ids || poll_queues > g_poll_queues)
78b10be2 353 return -EINVAL;
15dfc662
SK
354
355 /*
356 * Keep previous and new queue numbers in nullb_device for reference in
357 * the call back function null_map_queues().
358 */
359 dev->prev_submit_queues = dev->submit_queues;
360 dev->prev_poll_queues = dev->poll_queues;
361 dev->submit_queues = submit_queues;
362 dev->poll_queues = poll_queues;
363
364 set = dev->nullb->tag_set;
365 nr_hw_queues = submit_queues + poll_queues;
366 blk_mq_update_nr_hw_queues(set, nr_hw_queues);
367 ret = set->nr_hw_queues == nr_hw_queues ? 0 : -ENOMEM;
368
369 if (ret) {
370 /* on error, revert the queue numbers */
371 dev->submit_queues = dev->prev_submit_queues;
372 dev->poll_queues = dev->prev_poll_queues;
373 }
374
375 return ret;
376}
377
378static int nullb_apply_submit_queues(struct nullb_device *dev,
379 unsigned int submit_queues)
380{
381 return nullb_update_nr_hw_queues(dev, submit_queues, dev->poll_queues);
382}
383
384static int nullb_apply_poll_queues(struct nullb_device *dev,
385 unsigned int poll_queues)
386{
387 return nullb_update_nr_hw_queues(dev, dev->submit_queues, poll_queues);
45919fbf
BVA
388}
389
390NULLB_DEVICE_ATTR(size, ulong, NULL);
391NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
392NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
15dfc662 393NULLB_DEVICE_ATTR(poll_queues, uint, nullb_apply_poll_queues);
45919fbf
BVA
394NULLB_DEVICE_ATTR(home_node, uint, NULL);
395NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
396NULLB_DEVICE_ATTR(blocksize, uint, NULL);
ea17fd35 397NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
45919fbf
BVA
398NULLB_DEVICE_ATTR(irqmode, uint, NULL);
399NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
400NULLB_DEVICE_ATTR(index, uint, NULL);
401NULLB_DEVICE_ATTR(blocking, bool, NULL);
402NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
403NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
404NULLB_DEVICE_ATTR(discard, bool, NULL);
405NULLB_DEVICE_ATTR(mbps, uint, NULL);
406NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
407NULLB_DEVICE_ATTR(zoned, bool, NULL);
408NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
089565fb 409NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
45919fbf 410NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
dc4d137e
NC
411NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
412NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
cee1b215 413NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
3bf2bd20 414
cedcafad
SL
415static ssize_t nullb_device_power_show(struct config_item *item, char *page)
416{
417 return nullb_device_bool_attr_show(to_nullb_device(item)->power, page);
418}
419
420static ssize_t nullb_device_power_store(struct config_item *item,
421 const char *page, size_t count)
422{
423 struct nullb_device *dev = to_nullb_device(item);
424 bool newp = false;
425 ssize_t ret;
426
427 ret = nullb_device_bool_attr_store(&newp, page, count);
428 if (ret < 0)
429 return ret;
430
431 if (!dev->power && newp) {
432 if (test_and_set_bit(NULLB_DEV_FL_UP, &dev->flags))
433 return count;
a75110c3
CK
434 ret = null_add_dev(dev);
435 if (ret) {
cedcafad 436 clear_bit(NULLB_DEV_FL_UP, &dev->flags);
a75110c3 437 return ret;
cedcafad
SL
438 }
439
440 set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
441 dev->power = newp;
b3c30512 442 } else if (dev->power && !newp) {
7602843f
BL
443 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
444 mutex_lock(&lock);
445 dev->power = newp;
446 null_del_dev(dev->nullb);
447 mutex_unlock(&lock);
448 }
00a8cdb8 449 clear_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
cedcafad
SL
450 }
451
452 return count;
453}
454
455CONFIGFS_ATTR(nullb_device_, power);
456
2f54a613
SL
457static ssize_t nullb_device_badblocks_show(struct config_item *item, char *page)
458{
459 struct nullb_device *t_dev = to_nullb_device(item);
460
461 return badblocks_show(&t_dev->badblocks, page, 0);
462}
463
464static ssize_t nullb_device_badblocks_store(struct config_item *item,
465 const char *page, size_t count)
466{
467 struct nullb_device *t_dev = to_nullb_device(item);
468 char *orig, *buf, *tmp;
469 u64 start, end;
470 int ret;
471
472 orig = kstrndup(page, count, GFP_KERNEL);
473 if (!orig)
474 return -ENOMEM;
475
476 buf = strstrip(orig);
477
478 ret = -EINVAL;
479 if (buf[0] != '+' && buf[0] != '-')
480 goto out;
481 tmp = strchr(&buf[1], '-');
482 if (!tmp)
483 goto out;
484 *tmp = '\0';
485 ret = kstrtoull(buf + 1, 0, &start);
486 if (ret)
487 goto out;
488 ret = kstrtoull(tmp + 1, 0, &end);
489 if (ret)
490 goto out;
491 ret = -EINVAL;
492 if (start > end)
493 goto out;
494 /* enable badblocks */
495 cmpxchg(&t_dev->badblocks.shift, -1, 0);
496 if (buf[0] == '+')
497 ret = badblocks_set(&t_dev->badblocks, start,
498 end - start + 1, 1);
499 else
500 ret = badblocks_clear(&t_dev->badblocks, start,
501 end - start + 1);
502 if (ret == 0)
503 ret = count;
504out:
505 kfree(orig);
506 return ret;
507}
508CONFIGFS_ATTR(nullb_device_, badblocks);
509
3bf2bd20
SL
510static struct configfs_attribute *nullb_device_attrs[] = {
511 &nullb_device_attr_size,
512 &nullb_device_attr_completion_nsec,
513 &nullb_device_attr_submit_queues,
0a593fbb 514 &nullb_device_attr_poll_queues,
3bf2bd20
SL
515 &nullb_device_attr_home_node,
516 &nullb_device_attr_queue_mode,
517 &nullb_device_attr_blocksize,
ea17fd35 518 &nullb_device_attr_max_sectors,
3bf2bd20
SL
519 &nullb_device_attr_irqmode,
520 &nullb_device_attr_hw_queue_depth,
cedcafad 521 &nullb_device_attr_index,
3bf2bd20
SL
522 &nullb_device_attr_blocking,
523 &nullb_device_attr_use_per_node_hctx,
cedcafad 524 &nullb_device_attr_power,
5bcd0e0c 525 &nullb_device_attr_memory_backed,
306eb6b4 526 &nullb_device_attr_discard,
eff2c4f1 527 &nullb_device_attr_mbps,
deb78b41 528 &nullb_device_attr_cache_size,
2f54a613 529 &nullb_device_attr_badblocks,
ca4b2a01
MB
530 &nullb_device_attr_zoned,
531 &nullb_device_attr_zone_size,
089565fb 532 &nullb_device_attr_zone_capacity,
ea2c18e1 533 &nullb_device_attr_zone_nr_conv,
dc4d137e
NC
534 &nullb_device_attr_zone_max_open,
535 &nullb_device_attr_zone_max_active,
cee1b215 536 &nullb_device_attr_virt_boundary,
3bf2bd20
SL
537 NULL,
538};
539
540static void nullb_device_release(struct config_item *item)
541{
5bcd0e0c
SL
542 struct nullb_device *dev = to_nullb_device(item);
543
deb78b41 544 null_free_device_storage(dev, false);
5bcd0e0c 545 null_free_dev(dev);
3bf2bd20
SL
546}
547
548static struct configfs_item_operations nullb_device_ops = {
549 .release = nullb_device_release,
550};
551
e1919dff 552static const struct config_item_type nullb_device_type = {
3bf2bd20
SL
553 .ct_item_ops = &nullb_device_ops,
554 .ct_attrs = nullb_device_attrs,
555 .ct_owner = THIS_MODULE,
556};
557
558static struct
559config_item *nullb_group_make_item(struct config_group *group, const char *name)
560{
561 struct nullb_device *dev;
562
563 dev = null_alloc_dev();
564 if (!dev)
565 return ERR_PTR(-ENOMEM);
566
567 config_item_init_type_name(&dev->item, name, &nullb_device_type);
568
569 return &dev->item;
570}
571
572static void
573nullb_group_drop_item(struct config_group *group, struct config_item *item)
574{
cedcafad
SL
575 struct nullb_device *dev = to_nullb_device(item);
576
577 if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
578 mutex_lock(&lock);
579 dev->power = false;
580 null_del_dev(dev->nullb);
581 mutex_unlock(&lock);
582 }
583
3bf2bd20
SL
584 config_item_put(item);
585}
586
587static ssize_t memb_group_features_show(struct config_item *item, char *page)
588{
089565fb 589 return snprintf(page, PAGE_SIZE,
cee1b215 590 "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
3bf2bd20
SL
591}
592
593CONFIGFS_ATTR_RO(memb_group_, features);
594
595static struct configfs_attribute *nullb_group_attrs[] = {
596 &memb_group_attr_features,
597 NULL,
598};
599
600static struct configfs_group_operations nullb_group_ops = {
601 .make_item = nullb_group_make_item,
602 .drop_item = nullb_group_drop_item,
603};
604
e1919dff 605static const struct config_item_type nullb_group_type = {
3bf2bd20
SL
606 .ct_group_ops = &nullb_group_ops,
607 .ct_attrs = nullb_group_attrs,
608 .ct_owner = THIS_MODULE,
609};
610
611static struct configfs_subsystem nullb_subsys = {
612 .su_group = {
613 .cg_item = {
614 .ci_namebuf = "nullb",
615 .ci_type = &nullb_group_type,
616 },
617 },
618};
619
deb78b41
SL
620static inline int null_cache_active(struct nullb *nullb)
621{
622 return test_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
623}
624
2984c868
SL
625static struct nullb_device *null_alloc_dev(void)
626{
627 struct nullb_device *dev;
628
629 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
630 if (!dev)
631 return NULL;
5bcd0e0c 632 INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
deb78b41 633 INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
2f54a613
SL
634 if (badblocks_init(&dev->badblocks, 0)) {
635 kfree(dev);
636 return NULL;
637 }
638
2984c868
SL
639 dev->size = g_gb * 1024;
640 dev->completion_nsec = g_completion_nsec;
641 dev->submit_queues = g_submit_queues;
15dfc662 642 dev->prev_submit_queues = g_submit_queues;
0a593fbb 643 dev->poll_queues = g_poll_queues;
15dfc662 644 dev->prev_poll_queues = g_poll_queues;
2984c868
SL
645 dev->home_node = g_home_node;
646 dev->queue_mode = g_queue_mode;
647 dev->blocksize = g_bs;
ea17fd35 648 dev->max_sectors = g_max_sectors;
2984c868
SL
649 dev->irqmode = g_irqmode;
650 dev->hw_queue_depth = g_hw_queue_depth;
2984c868
SL
651 dev->blocking = g_blocking;
652 dev->use_per_node_hctx = g_use_per_node_hctx;
ca4b2a01
MB
653 dev->zoned = g_zoned;
654 dev->zone_size = g_zone_size;
089565fb 655 dev->zone_capacity = g_zone_capacity;
ea2c18e1 656 dev->zone_nr_conv = g_zone_nr_conv;
dc4d137e
NC
657 dev->zone_max_open = g_zone_max_open;
658 dev->zone_max_active = g_zone_max_active;
cee1b215 659 dev->virt_boundary = g_virt_boundary;
2984c868
SL
660 return dev;
661}
662
663static void null_free_dev(struct nullb_device *dev)
664{
1addb798
DD
665 if (!dev)
666 return;
667
d205bde7 668 null_free_zoned_dev(dev);
1addb798 669 badblocks_exit(&dev->badblocks);
2984c868
SL
670 kfree(dev);
671}
672
f2298c04
JA
673static void put_tag(struct nullb_queue *nq, unsigned int tag)
674{
675 clear_bit_unlock(tag, nq->tag_map);
676
677 if (waitqueue_active(&nq->wait))
678 wake_up(&nq->wait);
679}
680
681static unsigned int get_tag(struct nullb_queue *nq)
682{
683 unsigned int tag;
684
685 do {
686 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
687 if (tag >= nq->queue_depth)
688 return -1U;
689 } while (test_and_set_bit_lock(tag, nq->tag_map));
690
691 return tag;
692}
693
694static void free_cmd(struct nullb_cmd *cmd)
695{
696 put_tag(cmd->nq, cmd->tag);
697}
698
3c395a96
PV
699static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
700
f2298c04
JA
701static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
702{
703 struct nullb_cmd *cmd;
704 unsigned int tag;
705
706 tag = get_tag(nq);
707 if (tag != -1U) {
708 cmd = &nq->cmds[tag];
709 cmd->tag = tag;
ff770422 710 cmd->error = BLK_STS_OK;
f2298c04 711 cmd->nq = nq;
2984c868 712 if (nq->dev->irqmode == NULL_IRQ_TIMER) {
3c395a96
PV
713 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
714 HRTIMER_MODE_REL);
715 cmd->timer.function = null_cmd_timer_expired;
716 }
f2298c04
JA
717 return cmd;
718 }
719
720 return NULL;
721}
722
3d3472f3 723static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, struct bio *bio)
f2298c04
JA
724{
725 struct nullb_cmd *cmd;
726 DEFINE_WAIT(wait);
727
f2298c04 728 do {
3d3472f3
CK
729 /*
730 * This avoids multiple return statements, multiple calls to
731 * __alloc_cmd() and a fast path call to prepare_to_wait().
732 */
f2298c04 733 cmd = __alloc_cmd(nq);
3d3472f3
CK
734 if (cmd) {
735 cmd->bio = bio;
736 return cmd;
737 }
738 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
f2298c04 739 io_schedule();
3d3472f3 740 finish_wait(&nq->wait, &wait);
f2298c04 741 } while (1);
f2298c04
JA
742}
743
744static void end_cmd(struct nullb_cmd *cmd)
745{
2984c868 746 int queue_mode = cmd->nq->dev->queue_mode;
cf8ecc5a 747
ce2c350b
CH
748 switch (queue_mode) {
749 case NULL_Q_MQ:
5bcd0e0c 750 blk_mq_end_request(cmd->rq, cmd->error);
ce2c350b 751 return;
ce2c350b 752 case NULL_Q_BIO:
5bcd0e0c 753 cmd->bio->bi_status = cmd->error;
4246a0b6 754 bio_endio(cmd->bio);
48cc661e 755 break;
ce2c350b 756 }
f2298c04 757
48cc661e 758 free_cmd(cmd);
cf8ecc5a
AA
759}
760
761static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
762{
763 end_cmd(container_of(timer, struct nullb_cmd, timer));
f2298c04
JA
764
765 return HRTIMER_NORESTART;
766}
767
768static void null_cmd_end_timer(struct nullb_cmd *cmd)
769{
2984c868 770 ktime_t kt = cmd->nq->dev->completion_nsec;
f2298c04 771
3c395a96 772 hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
f2298c04
JA
773}
774
49f66136 775static void null_complete_rq(struct request *rq)
f2298c04 776{
49f66136 777 end_cmd(blk_mq_rq_to_pdu(rq));
f2298c04
JA
778}
779
c90b6b50 780static struct nullb_page *null_alloc_page(void)
5bcd0e0c
SL
781{
782 struct nullb_page *t_page;
783
c90b6b50 784 t_page = kmalloc(sizeof(struct nullb_page), GFP_NOIO);
5bcd0e0c 785 if (!t_page)
df00b1d2 786 return NULL;
5bcd0e0c 787
c90b6b50 788 t_page->page = alloc_pages(GFP_NOIO, 0);
df00b1d2
CK
789 if (!t_page->page) {
790 kfree(t_page);
791 return NULL;
792 }
5bcd0e0c 793
66231ad3 794 memset(t_page->bitmap, 0, sizeof(t_page->bitmap));
5bcd0e0c 795 return t_page;
5bcd0e0c
SL
796}
797
798static void null_free_page(struct nullb_page *t_page)
799{
66231ad3
ML
800 __set_bit(NULLB_PAGE_FREE, t_page->bitmap);
801 if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap))
deb78b41 802 return;
5bcd0e0c
SL
803 __free_page(t_page->page);
804 kfree(t_page);
805}
806
66231ad3
ML
807static bool null_page_empty(struct nullb_page *page)
808{
809 int size = MAP_SZ - 2;
810
811 return find_first_bit(page->bitmap, size) == size;
812}
813
deb78b41
SL
814static void null_free_sector(struct nullb *nullb, sector_t sector,
815 bool is_cache)
5bcd0e0c
SL
816{
817 unsigned int sector_bit;
818 u64 idx;
819 struct nullb_page *t_page, *ret;
820 struct radix_tree_root *root;
821
deb78b41 822 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
823 idx = sector >> PAGE_SECTORS_SHIFT;
824 sector_bit = (sector & SECTOR_MASK);
825
826 t_page = radix_tree_lookup(root, idx);
827 if (t_page) {
66231ad3 828 __clear_bit(sector_bit, t_page->bitmap);
5bcd0e0c 829
66231ad3 830 if (null_page_empty(t_page)) {
5bcd0e0c
SL
831 ret = radix_tree_delete_item(root, idx, t_page);
832 WARN_ON(ret != t_page);
833 null_free_page(ret);
deb78b41
SL
834 if (is_cache)
835 nullb->dev->curr_cache -= PAGE_SIZE;
5bcd0e0c
SL
836 }
837 }
838}
839
840static struct nullb_page *null_radix_tree_insert(struct nullb *nullb, u64 idx,
deb78b41 841 struct nullb_page *t_page, bool is_cache)
5bcd0e0c
SL
842{
843 struct radix_tree_root *root;
844
deb78b41 845 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
5bcd0e0c
SL
846
847 if (radix_tree_insert(root, idx, t_page)) {
848 null_free_page(t_page);
849 t_page = radix_tree_lookup(root, idx);
850 WARN_ON(!t_page || t_page->page->index != idx);
deb78b41
SL
851 } else if (is_cache)
852 nullb->dev->curr_cache += PAGE_SIZE;
5bcd0e0c
SL
853
854 return t_page;
855}
856
deb78b41 857static void null_free_device_storage(struct nullb_device *dev, bool is_cache)
5bcd0e0c
SL
858{
859 unsigned long pos = 0;
860 int nr_pages;
861 struct nullb_page *ret, *t_pages[FREE_BATCH];
862 struct radix_tree_root *root;
863
deb78b41 864 root = is_cache ? &dev->cache : &dev->data;
5bcd0e0c
SL
865
866 do {
867 int i;
868
869 nr_pages = radix_tree_gang_lookup(root,
870 (void **)t_pages, pos, FREE_BATCH);
871
872 for (i = 0; i < nr_pages; i++) {
873 pos = t_pages[i]->page->index;
874 ret = radix_tree_delete_item(root, pos, t_pages[i]);
875 WARN_ON(ret != t_pages[i]);
876 null_free_page(ret);
877 }
878
879 pos++;
880 } while (nr_pages == FREE_BATCH);
deb78b41
SL
881
882 if (is_cache)
883 dev->curr_cache = 0;
5bcd0e0c
SL
884}
885
deb78b41
SL
886static struct nullb_page *__null_lookup_page(struct nullb *nullb,
887 sector_t sector, bool for_write, bool is_cache)
5bcd0e0c
SL
888{
889 unsigned int sector_bit;
890 u64 idx;
891 struct nullb_page *t_page;
deb78b41 892 struct radix_tree_root *root;
5bcd0e0c
SL
893
894 idx = sector >> PAGE_SECTORS_SHIFT;
895 sector_bit = (sector & SECTOR_MASK);
896
deb78b41
SL
897 root = is_cache ? &nullb->dev->cache : &nullb->dev->data;
898 t_page = radix_tree_lookup(root, idx);
5bcd0e0c
SL
899 WARN_ON(t_page && t_page->page->index != idx);
900
66231ad3 901 if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap)))
5bcd0e0c
SL
902 return t_page;
903
904 return NULL;
905}
906
deb78b41
SL
907static struct nullb_page *null_lookup_page(struct nullb *nullb,
908 sector_t sector, bool for_write, bool ignore_cache)
909{
910 struct nullb_page *page = NULL;
911
912 if (!ignore_cache)
913 page = __null_lookup_page(nullb, sector, for_write, true);
914 if (page)
915 return page;
916 return __null_lookup_page(nullb, sector, for_write, false);
917}
918
5bcd0e0c 919static struct nullb_page *null_insert_page(struct nullb *nullb,
61884de0
JA
920 sector_t sector, bool ignore_cache)
921 __releases(&nullb->lock)
922 __acquires(&nullb->lock)
5bcd0e0c
SL
923{
924 u64 idx;
925 struct nullb_page *t_page;
926
deb78b41 927 t_page = null_lookup_page(nullb, sector, true, ignore_cache);
5bcd0e0c
SL
928 if (t_page)
929 return t_page;
930
931 spin_unlock_irq(&nullb->lock);
932
c90b6b50 933 t_page = null_alloc_page();
5bcd0e0c
SL
934 if (!t_page)
935 goto out_lock;
936
937 if (radix_tree_preload(GFP_NOIO))
938 goto out_freepage;
939
940 spin_lock_irq(&nullb->lock);
941 idx = sector >> PAGE_SECTORS_SHIFT;
942 t_page->page->index = idx;
deb78b41 943 t_page = null_radix_tree_insert(nullb, idx, t_page, !ignore_cache);
5bcd0e0c
SL
944 radix_tree_preload_end();
945
946 return t_page;
947out_freepage:
948 null_free_page(t_page);
949out_lock:
950 spin_lock_irq(&nullb->lock);
deb78b41
SL
951 return null_lookup_page(nullb, sector, true, ignore_cache);
952}
953
954static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
955{
956 int i;
957 unsigned int offset;
958 u64 idx;
959 struct nullb_page *t_page, *ret;
960 void *dst, *src;
961
962 idx = c_page->page->index;
963
964 t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true);
965
66231ad3
ML
966 __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap);
967 if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) {
deb78b41 968 null_free_page(c_page);
66231ad3 969 if (t_page && null_page_empty(t_page)) {
deb78b41
SL
970 ret = radix_tree_delete_item(&nullb->dev->data,
971 idx, t_page);
972 null_free_page(t_page);
973 }
974 return 0;
975 }
976
977 if (!t_page)
978 return -ENOMEM;
979
980 src = kmap_atomic(c_page->page);
981 dst = kmap_atomic(t_page->page);
982
983 for (i = 0; i < PAGE_SECTORS;
984 i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
66231ad3 985 if (test_bit(i, c_page->bitmap)) {
deb78b41
SL
986 offset = (i << SECTOR_SHIFT);
987 memcpy(dst + offset, src + offset,
988 nullb->dev->blocksize);
66231ad3 989 __set_bit(i, t_page->bitmap);
deb78b41
SL
990 }
991 }
992
993 kunmap_atomic(dst);
994 kunmap_atomic(src);
995
996 ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
997 null_free_page(ret);
998 nullb->dev->curr_cache -= PAGE_SIZE;
999
1000 return 0;
1001}
1002
1003static int null_make_cache_space(struct nullb *nullb, unsigned long n)
f2298c04 1004{
deb78b41
SL
1005 int i, err, nr_pages;
1006 struct nullb_page *c_pages[FREE_BATCH];
1007 unsigned long flushed = 0, one_round;
1008
1009again:
1010 if ((nullb->dev->cache_size * 1024 * 1024) >
1011 nullb->dev->curr_cache + n || nullb->dev->curr_cache == 0)
1012 return 0;
1013
1014 nr_pages = radix_tree_gang_lookup(&nullb->dev->cache,
1015 (void **)c_pages, nullb->cache_flush_pos, FREE_BATCH);
1016 /*
1017 * nullb_flush_cache_page could unlock before using the c_pages. To
1018 * avoid race, we don't allow page free
1019 */
1020 for (i = 0; i < nr_pages; i++) {
1021 nullb->cache_flush_pos = c_pages[i]->page->index;
1022 /*
1023 * We found the page which is being flushed to disk by other
1024 * threads
1025 */
66231ad3 1026 if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap))
deb78b41
SL
1027 c_pages[i] = NULL;
1028 else
66231ad3 1029 __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap);
deb78b41
SL
1030 }
1031
1032 one_round = 0;
1033 for (i = 0; i < nr_pages; i++) {
1034 if (c_pages[i] == NULL)
1035 continue;
1036 err = null_flush_cache_page(nullb, c_pages[i]);
1037 if (err)
1038 return err;
1039 one_round++;
1040 }
1041 flushed += one_round << PAGE_SHIFT;
1042
1043 if (n > flushed) {
1044 if (nr_pages == 0)
1045 nullb->cache_flush_pos = 0;
1046 if (one_round == 0) {
1047 /* give other threads a chance */
1048 spin_unlock_irq(&nullb->lock);
1049 spin_lock_irq(&nullb->lock);
1050 }
1051 goto again;
1052 }
1053 return 0;
5bcd0e0c
SL
1054}
1055
1056static int copy_to_nullb(struct nullb *nullb, struct page *source,
deb78b41 1057 unsigned int off, sector_t sector, size_t n, bool is_fua)
5bcd0e0c
SL
1058{
1059 size_t temp, count = 0;
1060 unsigned int offset;
1061 struct nullb_page *t_page;
1062 void *dst, *src;
1063
1064 while (count < n) {
1065 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1066
deb78b41
SL
1067 if (null_cache_active(nullb) && !is_fua)
1068 null_make_cache_space(nullb, PAGE_SIZE);
1069
5bcd0e0c 1070 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1071 t_page = null_insert_page(nullb, sector,
1072 !null_cache_active(nullb) || is_fua);
5bcd0e0c
SL
1073 if (!t_page)
1074 return -ENOSPC;
1075
1076 src = kmap_atomic(source);
1077 dst = kmap_atomic(t_page->page);
1078 memcpy(dst + offset, src + off + count, temp);
1079 kunmap_atomic(dst);
1080 kunmap_atomic(src);
1081
66231ad3 1082 __set_bit(sector & SECTOR_MASK, t_page->bitmap);
5bcd0e0c 1083
deb78b41
SL
1084 if (is_fua)
1085 null_free_sector(nullb, sector, true);
1086
5bcd0e0c
SL
1087 count += temp;
1088 sector += temp >> SECTOR_SHIFT;
1089 }
1090 return 0;
1091}
1092
1093static int copy_from_nullb(struct nullb *nullb, struct page *dest,
1094 unsigned int off, sector_t sector, size_t n)
1095{
1096 size_t temp, count = 0;
1097 unsigned int offset;
1098 struct nullb_page *t_page;
1099 void *dst, *src;
1100
1101 while (count < n) {
1102 temp = min_t(size_t, nullb->dev->blocksize, n - count);
1103
1104 offset = (sector & SECTOR_MASK) << SECTOR_SHIFT;
deb78b41
SL
1105 t_page = null_lookup_page(nullb, sector, false,
1106 !null_cache_active(nullb));
5bcd0e0c
SL
1107
1108 dst = kmap_atomic(dest);
1109 if (!t_page) {
1110 memset(dst + off + count, 0, temp);
1111 goto next;
1112 }
1113 src = kmap_atomic(t_page->page);
1114 memcpy(dst + off + count, src + offset, temp);
1115 kunmap_atomic(src);
1116next:
1117 kunmap_atomic(dst);
1118
1119 count += temp;
1120 sector += temp >> SECTOR_SHIFT;
1121 }
1122 return 0;
1123}
1124
dd85b492
AJ
1125static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
1126 unsigned int len, unsigned int off)
1127{
1128 void *dst;
1129
1130 dst = kmap_atomic(page);
1131 memset(dst + off, 0xFF, len);
1132 kunmap_atomic(dst);
1133}
1134
0ec4d913
DLM
1135blk_status_t null_handle_discard(struct nullb_device *dev,
1136 sector_t sector, sector_t nr_sectors)
306eb6b4 1137{
49c7089f
DLM
1138 struct nullb *nullb = dev->nullb;
1139 size_t n = nr_sectors << SECTOR_SHIFT;
306eb6b4
SL
1140 size_t temp;
1141
1142 spin_lock_irq(&nullb->lock);
1143 while (n > 0) {
49c7089f 1144 temp = min_t(size_t, n, dev->blocksize);
deb78b41
SL
1145 null_free_sector(nullb, sector, false);
1146 if (null_cache_active(nullb))
1147 null_free_sector(nullb, sector, true);
306eb6b4
SL
1148 sector += temp >> SECTOR_SHIFT;
1149 n -= temp;
1150 }
1151 spin_unlock_irq(&nullb->lock);
49c7089f
DLM
1152
1153 return BLK_STS_OK;
306eb6b4
SL
1154}
1155
deb78b41
SL
1156static int null_handle_flush(struct nullb *nullb)
1157{
1158 int err;
1159
1160 if (!null_cache_active(nullb))
1161 return 0;
1162
1163 spin_lock_irq(&nullb->lock);
1164 while (true) {
1165 err = null_make_cache_space(nullb,
1166 nullb->dev->cache_size * 1024 * 1024);
1167 if (err || nullb->dev->curr_cache == 0)
1168 break;
1169 }
1170
1171 WARN_ON(!radix_tree_empty(&nullb->dev->cache));
1172 spin_unlock_irq(&nullb->lock);
1173 return err;
1174}
1175
5bcd0e0c 1176static int null_transfer(struct nullb *nullb, struct page *page,
deb78b41
SL
1177 unsigned int len, unsigned int off, bool is_write, sector_t sector,
1178 bool is_fua)
f2298c04 1179{
dd85b492
AJ
1180 struct nullb_device *dev = nullb->dev;
1181 unsigned int valid_len = len;
5bcd0e0c
SL
1182 int err = 0;
1183
1184 if (!is_write) {
dd85b492
AJ
1185 if (dev->zoned)
1186 valid_len = null_zone_valid_read_len(nullb,
1187 sector, len);
1188
1189 if (valid_len) {
1190 err = copy_from_nullb(nullb, page, off,
1191 sector, valid_len);
1192 off += valid_len;
1193 len -= valid_len;
1194 }
1195
1196 if (len)
1197 nullb_fill_pattern(nullb, page, len, off);
5bcd0e0c
SL
1198 flush_dcache_page(page);
1199 } else {
1200 flush_dcache_page(page);
deb78b41 1201 err = copy_to_nullb(nullb, page, off, sector, len, is_fua);
5bcd0e0c
SL
1202 }
1203
1204 return err;
1205}
1206
1207static int null_handle_rq(struct nullb_cmd *cmd)
1208{
1209 struct request *rq = cmd->rq;
1210 struct nullb *nullb = cmd->nq->dev->nullb;
1211 int err;
1212 unsigned int len;
49c7089f 1213 sector_t sector = blk_rq_pos(rq);
5bcd0e0c
SL
1214 struct req_iterator iter;
1215 struct bio_vec bvec;
1216
5bcd0e0c
SL
1217 spin_lock_irq(&nullb->lock);
1218 rq_for_each_segment(bvec, rq, iter) {
1219 len = bvec.bv_len;
1220 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
deb78b41 1221 op_is_write(req_op(rq)), sector,
2d62e6b0 1222 rq->cmd_flags & REQ_FUA);
5bcd0e0c
SL
1223 if (err) {
1224 spin_unlock_irq(&nullb->lock);
1225 return err;
1226 }
1227 sector += len >> SECTOR_SHIFT;
1228 }
1229 spin_unlock_irq(&nullb->lock);
1230
1231 return 0;
1232}
1233
1234static int null_handle_bio(struct nullb_cmd *cmd)
1235{
1236 struct bio *bio = cmd->bio;
1237 struct nullb *nullb = cmd->nq->dev->nullb;
1238 int err;
1239 unsigned int len;
49c7089f 1240 sector_t sector = bio->bi_iter.bi_sector;
5bcd0e0c
SL
1241 struct bio_vec bvec;
1242 struct bvec_iter iter;
1243
5bcd0e0c
SL
1244 spin_lock_irq(&nullb->lock);
1245 bio_for_each_segment(bvec, bio, iter) {
1246 len = bvec.bv_len;
1247 err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
deb78b41 1248 op_is_write(bio_op(bio)), sector,
bf7c7a04 1249 bio->bi_opf & REQ_FUA);
5bcd0e0c
SL
1250 if (err) {
1251 spin_unlock_irq(&nullb->lock);
1252 return err;
1253 }
1254 sector += len >> SECTOR_SHIFT;
1255 }
1256 spin_unlock_irq(&nullb->lock);
1257 return 0;
1258}
1259
eff2c4f1
SL
1260static void null_stop_queue(struct nullb *nullb)
1261{
1262 struct request_queue *q = nullb->q;
1263
1264 if (nullb->dev->queue_mode == NULL_Q_MQ)
1265 blk_mq_stop_hw_queues(q);
eff2c4f1
SL
1266}
1267
1268static void null_restart_queue_async(struct nullb *nullb)
1269{
1270 struct request_queue *q = nullb->q;
eff2c4f1
SL
1271
1272 if (nullb->dev->queue_mode == NULL_Q_MQ)
1273 blk_mq_start_stopped_hw_queues(q, true);
eff2c4f1
SL
1274}
1275
adb84284
CK
1276static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
1277{
1278 struct nullb_device *dev = cmd->nq->dev;
1279 struct nullb *nullb = dev->nullb;
1280 blk_status_t sts = BLK_STS_OK;
1281 struct request *rq = cmd->rq;
1282
1283 if (!hrtimer_active(&nullb->bw_timer))
1284 hrtimer_restart(&nullb->bw_timer);
1285
1286 if (atomic_long_sub_return(blk_rq_bytes(rq), &nullb->cur_bytes) < 0) {
1287 null_stop_queue(nullb);
1288 /* race with timer */
1289 if (atomic_long_read(&nullb->cur_bytes) > 0)
1290 null_restart_queue_async(nullb);
1291 /* requeue request */
1292 sts = BLK_STS_DEV_RESOURCE;
1293 }
1294 return sts;
1295}
1296
8f94d1c1
CK
1297static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
1298 sector_t sector,
1299 sector_t nr_sectors)
1300{
1301 struct badblocks *bb = &cmd->nq->dev->badblocks;
1302 sector_t first_bad;
1303 int bad_sectors;
1304
1305 if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
1306 return BLK_STS_IOERR;
1307
1308 return BLK_STS_OK;
1309}
1310
7ea88e22 1311static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
49c7089f
DLM
1312 enum req_opf op,
1313 sector_t sector,
1314 sector_t nr_sectors)
7ea88e22
CK
1315{
1316 struct nullb_device *dev = cmd->nq->dev;
1317 int err;
1318
49c7089f
DLM
1319 if (op == REQ_OP_DISCARD)
1320 return null_handle_discard(dev, sector, nr_sectors);
1321
7ea88e22
CK
1322 if (dev->queue_mode == NULL_Q_BIO)
1323 err = null_handle_bio(cmd);
1324 else
1325 err = null_handle_rq(cmd);
1326
1327 return errno_to_blk_status(err);
1328}
1329
cecbc9ce
BVA
1330static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
1331{
1332 struct nullb_device *dev = cmd->nq->dev;
1333 struct bio *bio;
1334
1335 if (dev->memory_backed)
1336 return;
1337
1338 if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
1339 zero_fill_bio(cmd->bio);
1340 } else if (req_op(cmd->rq) == REQ_OP_READ) {
1341 __rq_for_each_bio(bio, cmd->rq)
1342 zero_fill_bio(bio);
1343 }
1344}
1345
a3d7d674
CK
1346static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
1347{
cecbc9ce
BVA
1348 /*
1349 * Since root privileges are required to configure the null_blk
1350 * driver, it is fine that this driver does not initialize the
1351 * data buffers of read commands. Zero-initialize these buffers
1352 * anyway if KMSAN is enabled to prevent that KMSAN complains
1353 * about null_blk not initializing read data buffers.
1354 */
1355 if (IS_ENABLED(CONFIG_KMSAN))
1356 nullb_zero_read_cmd_buffer(cmd);
1357
a3d7d674
CK
1358 /* Complete IO by inline, softirq or timer */
1359 switch (cmd->nq->dev->irqmode) {
1360 case NULL_IRQ_SOFTIRQ:
1361 switch (cmd->nq->dev->queue_mode) {
1362 case NULL_Q_MQ:
15f73f5b
CH
1363 if (likely(!blk_should_fake_timeout(cmd->rq->q)))
1364 blk_mq_complete_request(cmd->rq);
a3d7d674
CK
1365 break;
1366 case NULL_Q_BIO:
1367 /*
1368 * XXX: no proper submitting cpu information available.
1369 */
1370 end_cmd(cmd);
1371 break;
1372 }
1373 break;
1374 case NULL_IRQ_NONE:
1375 end_cmd(cmd);
1376 break;
1377 case NULL_IRQ_TIMER:
1378 null_cmd_end_timer(cmd);
1379 break;
1380 }
1381}
1382
9dd44c7e
DLM
1383blk_status_t null_process_cmd(struct nullb_cmd *cmd,
1384 enum req_opf op, sector_t sector,
1385 unsigned int nr_sectors)
1386{
1387 struct nullb_device *dev = cmd->nq->dev;
1388 blk_status_t ret;
1389
1390 if (dev->badblocks.shift != -1) {
1391 ret = null_handle_badblocks(cmd, sector, nr_sectors);
1392 if (ret != BLK_STS_OK)
1393 return ret;
1394 }
1395
1396 if (dev->memory_backed)
49c7089f 1397 return null_handle_memory_backed(cmd, op, sector, nr_sectors);
9dd44c7e
DLM
1398
1399 return BLK_STS_OK;
1400}
1401
d4b186ed
CK
1402static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
1403 sector_t nr_sectors, enum req_opf op)
5bcd0e0c
SL
1404{
1405 struct nullb_device *dev = cmd->nq->dev;
eff2c4f1 1406 struct nullb *nullb = dev->nullb;
adb84284 1407 blk_status_t sts;
5bcd0e0c 1408
eff2c4f1 1409 if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
adb84284
CK
1410 sts = null_handle_throttled(cmd);
1411 if (sts != BLK_STS_OK)
1412 return sts;
eff2c4f1
SL
1413 }
1414
d4b186ed
CK
1415 if (op == REQ_OP_FLUSH) {
1416 cmd->error = errno_to_blk_status(null_handle_flush(nullb));
1417 goto out;
1418 }
d4b186ed 1419
9dd44c7e 1420 if (dev->zoned)
de3510e5 1421 sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
9dd44c7e 1422 else
de3510e5
DLM
1423 sts = null_process_cmd(cmd, op, sector, nr_sectors);
1424
1425 /* Do not overwrite errors (e.g. timeout errors) */
1426 if (cmd->error == BLK_STS_OK)
1427 cmd->error = sts;
fceb5d1b 1428
2f54a613 1429out:
a3d7d674 1430 nullb_complete_cmd(cmd);
5bcd0e0c 1431 return BLK_STS_OK;
f2298c04
JA
1432}
1433
eff2c4f1
SL
1434static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
1435{
1436 struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
1437 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1438 unsigned int mbps = nullb->dev->mbps;
1439
1440 if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
1441 return HRTIMER_NORESTART;
1442
1443 atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
1444 null_restart_queue_async(nullb);
1445
1446 hrtimer_forward_now(&nullb->bw_timer, timer_interval);
1447
1448 return HRTIMER_RESTART;
1449}
1450
1451static void nullb_setup_bwtimer(struct nullb *nullb)
1452{
1453 ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
1454
1455 hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1456 nullb->bw_timer.function = nullb_bwtimer_fn;
1457 atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
1458 hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
f2298c04
JA
1459}
1460
1461static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
1462{
1463 int index = 0;
1464
1465 if (nullb->nr_queues != 1)
1466 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
1467
1468 return &nullb->queues[index];
1469}
1470
3e08773c 1471static void null_submit_bio(struct bio *bio)
f2298c04 1472{
d4b186ed
CK
1473 sector_t sector = bio->bi_iter.bi_sector;
1474 sector_t nr_sectors = bio_sectors(bio);
309dca30 1475 struct nullb *nullb = bio->bi_bdev->bd_disk->private_data;
f2298c04 1476 struct nullb_queue *nq = nullb_to_queue(nullb);
f2298c04 1477
3d3472f3 1478 null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
f2298c04
JA
1479}
1480
93b57046
JA
1481static bool should_timeout_request(struct request *rq)
1482{
33f782c4 1483#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
93b57046
JA
1484 if (g_timeout_str[0])
1485 return should_fail(&null_timeout_attr, 1);
33f782c4 1486#endif
24941b90
JA
1487 return false;
1488}
93b57046 1489
24941b90
JA
1490static bool should_requeue_request(struct request *rq)
1491{
1492#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1493 if (g_requeue_str[0])
1494 return should_fail(&null_requeue_attr, 1);
1495#endif
93b57046
JA
1496 return false;
1497}
1498
0a593fbb
JA
1499static int null_map_queues(struct blk_mq_tag_set *set)
1500{
1501 struct nullb *nullb = set->driver_data;
1502 int i, qoff;
15dfc662
SK
1503 unsigned int submit_queues = g_submit_queues;
1504 unsigned int poll_queues = g_poll_queues;
1505
1506 if (nullb) {
1507 struct nullb_device *dev = nullb->dev;
1508
1509 /*
1510 * Refer nr_hw_queues of the tag set to check if the expected
1511 * number of hardware queues are prepared. If block layer failed
1512 * to prepare them, use previous numbers of submit queues and
1513 * poll queues to map queues.
1514 */
1515 if (set->nr_hw_queues ==
1516 dev->submit_queues + dev->poll_queues) {
1517 submit_queues = dev->submit_queues;
1518 poll_queues = dev->poll_queues;
1519 } else if (set->nr_hw_queues ==
1520 dev->prev_submit_queues + dev->prev_poll_queues) {
1521 submit_queues = dev->prev_submit_queues;
1522 poll_queues = dev->prev_poll_queues;
1523 } else {
1524 pr_warn("tag set has unexpected nr_hw_queues: %d\n",
1525 set->nr_hw_queues);
1526 return -EINVAL;
1527 }
1528 }
0a593fbb
JA
1529
1530 for (i = 0, qoff = 0; i < set->nr_maps; i++) {
1531 struct blk_mq_queue_map *map = &set->map[i];
1532
1533 switch (i) {
1534 case HCTX_TYPE_DEFAULT:
15dfc662 1535 map->nr_queues = submit_queues;
0a593fbb
JA
1536 break;
1537 case HCTX_TYPE_READ:
1538 map->nr_queues = 0;
1539 continue;
1540 case HCTX_TYPE_POLL:
15dfc662 1541 map->nr_queues = poll_queues;
0a593fbb
JA
1542 break;
1543 }
1544 map->queue_offset = qoff;
1545 qoff += map->nr_queues;
1546 blk_mq_map_queues(map);
1547 }
1548
1549 return 0;
1550}
1551
1552static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
1553{
1554 struct nullb_queue *nq = hctx->driver_data;
1555 LIST_HEAD(list);
1556 int nr = 0;
1557
1558 spin_lock(&nq->poll_lock);
1559 list_splice_init(&nq->poll_list, &list);
1560 spin_unlock(&nq->poll_lock);
1561
1562 while (!list_empty(&list)) {
1563 struct nullb_cmd *cmd;
1564 struct request *req;
1565
1566 req = list_first_entry(&list, struct request, queuelist);
1567 list_del_init(&req->queuelist);
1568 cmd = blk_mq_rq_to_pdu(req);
1569 cmd->error = null_process_cmd(cmd, req_op(req), blk_rq_pos(req),
1570 blk_rq_sectors(req));
c5eafd79 1571 if (!blk_mq_add_to_batch(req, iob, (__force int) cmd->error,
2385ebf3
ML
1572 blk_mq_end_request_batch))
1573 end_cmd(cmd);
0a593fbb
JA
1574 nr++;
1575 }
1576
1577 return nr;
1578}
1579
5448aca4
JA
1580static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
1581{
0a593fbb 1582 struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
de3510e5
DLM
1583 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
1584
9c7eddf1 1585 pr_info("rq %p timed out\n", rq);
de3510e5 1586
0a593fbb
JA
1587 if (hctx->type == HCTX_TYPE_POLL) {
1588 struct nullb_queue *nq = hctx->driver_data;
1589
1590 spin_lock(&nq->poll_lock);
1591 list_del_init(&rq->queuelist);
1592 spin_unlock(&nq->poll_lock);
1593 }
1594
de3510e5
DLM
1595 /*
1596 * If the device is marked as blocking (i.e. memory backed or zoned
1597 * device), the submission path may be blocked waiting for resources
1598 * and cause real timeouts. For these real timeouts, the submission
1599 * path will complete the request using blk_mq_complete_request().
1600 * Only fake timeouts need to execute blk_mq_complete_request() here.
1601 */
1602 cmd->error = BLK_STS_TIMEOUT;
1603 if (cmd->fake_timeout)
1604 blk_mq_complete_request(rq);
0df0bb08 1605 return BLK_EH_DONE;
5448aca4
JA
1606}
1607
fc17b653 1608static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
74c45052 1609 const struct blk_mq_queue_data *bd)
f2298c04 1610{
74c45052 1611 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
2984c868 1612 struct nullb_queue *nq = hctx->driver_data;
d4b186ed
CK
1613 sector_t nr_sectors = blk_rq_sectors(bd->rq);
1614 sector_t sector = blk_rq_pos(bd->rq);
0a593fbb 1615 const bool is_poll = hctx->type == HCTX_TYPE_POLL;
f2298c04 1616
db5bcf87
JA
1617 might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
1618
0a593fbb 1619 if (!is_poll && nq->dev->irqmode == NULL_IRQ_TIMER) {
3c395a96
PV
1620 hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1621 cmd->timer.function = null_cmd_timer_expired;
1622 }
74c45052 1623 cmd->rq = bd->rq;
ff770422 1624 cmd->error = BLK_STS_OK;
2984c868 1625 cmd->nq = nq;
de3510e5 1626 cmd->fake_timeout = should_timeout_request(bd->rq);
f2298c04 1627
74c45052 1628 blk_mq_start_request(bd->rq);
e2490073 1629
24941b90
JA
1630 if (should_requeue_request(bd->rq)) {
1631 /*
1632 * Alternate between hitting the core BUSY path, and the
1633 * driver driven requeue path
1634 */
1635 nq->requeue_selection++;
1636 if (nq->requeue_selection & 1)
1637 return BLK_STS_RESOURCE;
1638 else {
1639 blk_mq_requeue_request(bd->rq, true);
1640 return BLK_STS_OK;
1641 }
1642 }
0a593fbb
JA
1643
1644 if (is_poll) {
1645 spin_lock(&nq->poll_lock);
1646 list_add_tail(&bd->rq->queuelist, &nq->poll_list);
1647 spin_unlock(&nq->poll_lock);
1648 return BLK_STS_OK;
1649 }
de3510e5 1650 if (cmd->fake_timeout)
24941b90 1651 return BLK_STS_OK;
93b57046 1652
d4b186ed 1653 return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
f2298c04
JA
1654}
1655
de65d2d2
MB
1656static void cleanup_queue(struct nullb_queue *nq)
1657{
1658 kfree(nq->tag_map);
1659 kfree(nq->cmds);
1660}
1661
1662static void cleanup_queues(struct nullb *nullb)
1663{
1664 int i;
1665
1666 for (i = 0; i < nullb->nr_queues; i++)
1667 cleanup_queue(&nullb->queues[i]);
1668
1669 kfree(nullb->queues);
1670}
1671
78b10be2
BVA
1672static void null_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1673{
1674 struct nullb_queue *nq = hctx->driver_data;
1675 struct nullb *nullb = nq->dev->nullb;
1676
1677 nullb->nr_queues--;
1678}
1679
1680static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
1681{
1682 init_waitqueue_head(&nq->wait);
1683 nq->queue_depth = nullb->queue_depth;
1684 nq->dev = nullb->dev;
0a593fbb
JA
1685 INIT_LIST_HEAD(&nq->poll_list);
1686 spin_lock_init(&nq->poll_lock);
78b10be2
BVA
1687}
1688
1689static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
1690 unsigned int hctx_idx)
1691{
1692 struct nullb *nullb = hctx->queue->queuedata;
1693 struct nullb_queue *nq;
1694
596444e7
BVA
1695#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1696 if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
1697 return -EFAULT;
1698#endif
1699
78b10be2
BVA
1700 nq = &nullb->queues[hctx_idx];
1701 hctx->driver_data = nq;
1702 null_init_queue(nullb, nq);
1703 nullb->nr_queues++;
1704
1705 return 0;
1706}
1707
1708static const struct blk_mq_ops null_mq_ops = {
1709 .queue_rq = null_queue_rq,
1710 .complete = null_complete_rq,
1711 .timeout = null_timeout_rq,
0a593fbb
JA
1712 .poll = null_poll,
1713 .map_queues = null_map_queues,
78b10be2
BVA
1714 .init_hctx = null_init_hctx,
1715 .exit_hctx = null_exit_hctx,
1716};
1717
9ae2d0aa
MB
1718static void null_del_dev(struct nullb *nullb)
1719{
9b03b713
BVA
1720 struct nullb_device *dev;
1721
1722 if (!nullb)
1723 return;
1724
1725 dev = nullb->dev;
2984c868 1726
94bc02e3
SL
1727 ida_simple_remove(&nullb_indexes, nullb->index);
1728
9ae2d0aa
MB
1729 list_del_init(&nullb->list);
1730
74ede5af 1731 del_gendisk(nullb->disk);
eff2c4f1
SL
1732
1733 if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
1734 hrtimer_cancel(&nullb->bw_timer);
1735 atomic_long_set(&nullb->cur_bytes, LONG_MAX);
1736 null_restart_queue_async(nullb);
1737 }
1738
132226b3 1739 blk_cleanup_disk(nullb->disk);
2984c868
SL
1740 if (dev->queue_mode == NULL_Q_MQ &&
1741 nullb->tag_set == &nullb->__tag_set)
82f402fe 1742 blk_mq_free_tag_set(nullb->tag_set);
9ae2d0aa 1743 cleanup_queues(nullb);
deb78b41
SL
1744 if (null_cache_active(nullb))
1745 null_free_device_storage(nullb->dev, true);
9ae2d0aa 1746 kfree(nullb);
2984c868 1747 dev->nullb = NULL;
9ae2d0aa
MB
1748}
1749
306eb6b4
SL
1750static void null_config_discard(struct nullb *nullb)
1751{
1752 if (nullb->dev->discard == false)
1753 return;
1592cd15 1754
49c7089f
DLM
1755 if (!nullb->dev->memory_backed) {
1756 nullb->dev->discard = false;
1757 pr_info("discard option is ignored without memory backing\n");
1758 return;
1759 }
1760
1592cd15
CK
1761 if (nullb->dev->zoned) {
1762 nullb->dev->discard = false;
1763 pr_info("discard option is ignored in zoned mode\n");
1764 return;
1765 }
1766
306eb6b4
SL
1767 nullb->q->limits.discard_granularity = nullb->dev->blocksize;
1768 nullb->q->limits.discard_alignment = nullb->dev->blocksize;
1769 blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
8b904b5b 1770 blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
9ae2d0aa
MB
1771}
1772
c62b37d9
CH
1773static const struct block_device_operations null_bio_ops = {
1774 .owner = THIS_MODULE,
1775 .submit_bio = null_submit_bio,
1776 .report_zones = null_report_zones,
1777};
1778
1779static const struct block_device_operations null_rq_ops = {
e3f89564 1780 .owner = THIS_MODULE,
7fc8fb51 1781 .report_zones = null_report_zones,
f2298c04
JA
1782};
1783
1784static int setup_commands(struct nullb_queue *nq)
1785{
1786 struct nullb_cmd *cmd;
1787 int i, tag_size;
1788
6396bb22 1789 nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
f2298c04 1790 if (!nq->cmds)
2d263a78 1791 return -ENOMEM;
f2298c04
JA
1792
1793 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
6396bb22 1794 nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
f2298c04
JA
1795 if (!nq->tag_map) {
1796 kfree(nq->cmds);
2d263a78 1797 return -ENOMEM;
f2298c04
JA
1798 }
1799
1800 for (i = 0; i < nq->queue_depth; i++) {
1801 cmd = &nq->cmds[i];
f2298c04
JA
1802 cmd->tag = -1U;
1803 }
1804
1805 return 0;
1806}
1807
f2298c04
JA
1808static int setup_queues(struct nullb *nullb)
1809{
0a593fbb
JA
1810 int nqueues = nr_cpu_ids;
1811
1812 if (g_poll_queues)
1813 nqueues += g_poll_queues;
1814
1815 nullb->queues = kcalloc(nqueues, sizeof(struct nullb_queue),
6396bb22 1816 GFP_KERNEL);
f2298c04 1817 if (!nullb->queues)
2d263a78 1818 return -ENOMEM;
f2298c04 1819
2984c868 1820 nullb->queue_depth = nullb->dev->hw_queue_depth;
2d263a78
MB
1821 return 0;
1822}
1823
1824static int init_driver_queues(struct nullb *nullb)
1825{
1826 struct nullb_queue *nq;
1827 int i, ret = 0;
f2298c04 1828
2984c868 1829 for (i = 0; i < nullb->dev->submit_queues; i++) {
f2298c04 1830 nq = &nullb->queues[i];
2d263a78
MB
1831
1832 null_init_queue(nullb, nq);
1833
1834 ret = setup_commands(nq);
1835 if (ret)
31f9690e 1836 return ret;
f2298c04
JA
1837 nullb->nr_queues++;
1838 }
2d263a78 1839 return 0;
f2298c04
JA
1840}
1841
9ae2d0aa 1842static int null_gendisk_register(struct nullb *nullb)
f2298c04 1843{
979d5447 1844 sector_t size = ((sector_t)nullb->dev->size * SZ_1M) >> SECTOR_SHIFT;
132226b3 1845 struct gendisk *disk = nullb->disk;
9ae2d0aa 1846
979d5447 1847 set_capacity(disk, size);
9ae2d0aa 1848
9ae2d0aa
MB
1849 disk->major = null_major;
1850 disk->first_minor = nullb->index;
132226b3 1851 disk->minors = 1;
c62b37d9
CH
1852 if (queue_is_mq(nullb->q))
1853 disk->fops = &null_rq_ops;
1854 else
1855 disk->fops = &null_bio_ops;
9ae2d0aa 1856 disk->private_data = nullb;
9ae2d0aa
MB
1857 strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
1858
bf505456 1859 if (nullb->dev->zoned) {
d205bde7
DLM
1860 int ret = null_register_zoned_dev(nullb);
1861
1862 if (ret)
1863 return ret;
bf505456
DLM
1864 }
1865
10e7123d 1866 return add_disk(disk);
9ae2d0aa
MB
1867}
1868
2984c868 1869static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
82f402fe 1870{
0a593fbb
JA
1871 int poll_queues;
1872
82f402fe 1873 set->ops = &null_mq_ops;
2984c868
SL
1874 set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
1875 g_submit_queues;
0a593fbb
JA
1876 poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
1877 if (poll_queues)
1878 set->nr_hw_queues += poll_queues;
2984c868
SL
1879 set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
1880 g_hw_queue_depth;
1881 set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
82f402fe
JA
1882 set->cmd_size = sizeof(struct nullb_cmd);
1883 set->flags = BLK_MQ_F_SHOULD_MERGE;
b3cffc38 1884 if (g_no_sched)
1885 set->flags |= BLK_MQ_F_NO_SCHED;
0905053b
JG
1886 if (g_shared_tag_bitmap)
1887 set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
0a593fbb 1888 set->driver_data = nullb;
19768f80 1889 if (poll_queues)
0a593fbb
JA
1890 set->nr_maps = 3;
1891 else
1892 set->nr_maps = 1;
82f402fe 1893
0d06a42f 1894 if ((nullb && nullb->dev->blocking) || g_blocking)
82f402fe
JA
1895 set->flags |= BLK_MQ_F_BLOCKING;
1896
1897 return blk_mq_alloc_tag_set(set);
1898}
1899
5c4bd1f4 1900static int null_validate_conf(struct nullb_device *dev)
cedcafad
SL
1901{
1902 dev->blocksize = round_down(dev->blocksize, 512);
1903 dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
cedcafad
SL
1904
1905 if (dev->queue_mode == NULL_Q_MQ && dev->use_per_node_hctx) {
1906 if (dev->submit_queues != nr_online_nodes)
1907 dev->submit_queues = nr_online_nodes;
1908 } else if (dev->submit_queues > nr_cpu_ids)
1909 dev->submit_queues = nr_cpu_ids;
1910 else if (dev->submit_queues == 0)
1911 dev->submit_queues = 1;
15dfc662
SK
1912 dev->prev_submit_queues = dev->submit_queues;
1913
1914 if (dev->poll_queues > g_poll_queues)
1915 dev->poll_queues = g_poll_queues;
15dfc662 1916 dev->prev_poll_queues = dev->poll_queues;
cedcafad
SL
1917
1918 dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
1919 dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
5bcd0e0c
SL
1920
1921 /* Do memory allocation, so set blocking */
1922 if (dev->memory_backed)
1923 dev->blocking = true;
deb78b41
SL
1924 else /* cache is meaningless */
1925 dev->cache_size = 0;
1926 dev->cache_size = min_t(unsigned long, ULONG_MAX / 1024 / 1024,
1927 dev->cache_size);
eff2c4f1
SL
1928 dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
1929 /* can not stop a queue */
1930 if (dev->queue_mode == NULL_Q_BIO)
1931 dev->mbps = 0;
5c4bd1f4
DLM
1932
1933 if (dev->zoned &&
1934 (!dev->zone_size || !is_power_of_2(dev->zone_size))) {
1935 pr_err("zone_size must be power-of-two\n");
1936 return -EINVAL;
1937 }
1938
1939 return 0;
cedcafad
SL
1940}
1941
33f782c4 1942#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
24941b90
JA
1943static bool __null_setup_fault(struct fault_attr *attr, char *str)
1944{
1945 if (!str[0])
93b57046
JA
1946 return true;
1947
24941b90 1948 if (!setup_fault_attr(attr, str))
93b57046
JA
1949 return false;
1950
24941b90
JA
1951 attr->verbose = 0;
1952 return true;
1953}
1954#endif
1955
1956static bool null_setup_fault(void)
1957{
1958#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
1959 if (!__null_setup_fault(&null_timeout_attr, g_timeout_str))
1960 return false;
1961 if (!__null_setup_fault(&null_requeue_attr, g_requeue_str))
1962 return false;
596444e7
BVA
1963 if (!__null_setup_fault(&null_init_hctx_attr, g_init_hctx_str))
1964 return false;
33f782c4 1965#endif
93b57046
JA
1966 return true;
1967}
1968
2984c868 1969static int null_add_dev(struct nullb_device *dev)
9ae2d0aa
MB
1970{
1971 struct nullb *nullb;
dc501dc0 1972 int rv;
f2298c04 1973
5c4bd1f4
DLM
1974 rv = null_validate_conf(dev);
1975 if (rv)
1976 return rv;
cedcafad 1977
2984c868 1978 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, dev->home_node);
dc501dc0
RE
1979 if (!nullb) {
1980 rv = -ENOMEM;
24d2f903 1981 goto out;
dc501dc0 1982 }
2984c868
SL
1983 nullb->dev = dev;
1984 dev->nullb = nullb;
f2298c04
JA
1985
1986 spin_lock_init(&nullb->lock);
1987
dc501dc0
RE
1988 rv = setup_queues(nullb);
1989 if (rv)
24d2f903 1990 goto out_free_nullb;
f2298c04 1991
2984c868 1992 if (dev->queue_mode == NULL_Q_MQ) {
82f402fe
JA
1993 if (shared_tags) {
1994 nullb->tag_set = &tag_set;
1995 rv = 0;
1996 } else {
1997 nullb->tag_set = &nullb->__tag_set;
2984c868 1998 rv = null_init_tag_set(nullb, nullb->tag_set);
82f402fe
JA
1999 }
2000
dc501dc0 2001 if (rv)
24d2f903
CH
2002 goto out_cleanup_queues;
2003
93b57046 2004 if (!null_setup_fault())
132226b3 2005 goto out_cleanup_tags;
93b57046 2006
5448aca4 2007 nullb->tag_set->timeout = 5 * HZ;
6759b1a2
CH
2008 nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
2009 if (IS_ERR(nullb->disk)) {
2010 rv = PTR_ERR(nullb->disk);
24d2f903 2011 goto out_cleanup_tags;
6759b1a2
CH
2012 }
2013 nullb->q = nullb->disk->queue;
2984c868 2014 } else if (dev->queue_mode == NULL_Q_BIO) {
132226b3
CH
2015 rv = -ENOMEM;
2016 nullb->disk = blk_alloc_disk(nullb->dev->home_node);
2017 if (!nullb->disk)
24d2f903 2018 goto out_cleanup_queues;
132226b3
CH
2019
2020 nullb->q = nullb->disk->queue;
31f9690e
JK
2021 rv = init_driver_queues(nullb);
2022 if (rv)
132226b3 2023 goto out_cleanup_disk;
f2298c04
JA
2024 }
2025
eff2c4f1
SL
2026 if (dev->mbps) {
2027 set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
2028 nullb_setup_bwtimer(nullb);
2029 }
2030
deb78b41
SL
2031 if (dev->cache_size > 0) {
2032 set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
2033 blk_queue_write_cache(nullb->q, true, true);
deb78b41
SL
2034 }
2035
ca4b2a01 2036 if (dev->zoned) {
d205bde7 2037 rv = null_init_zoned_dev(dev, nullb->q);
ca4b2a01 2038 if (rv)
132226b3 2039 goto out_cleanup_disk;
ca4b2a01
MB
2040 }
2041
f2298c04 2042 nullb->q->queuedata = nullb;
8b904b5b
BVA
2043 blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
2044 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
f2298c04 2045
f2298c04 2046 mutex_lock(&lock);
94bc02e3 2047 nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
cedcafad 2048 dev->index = nullb->index;
f2298c04
JA
2049 mutex_unlock(&lock);
2050
2984c868
SL
2051 blk_queue_logical_block_size(nullb->q, dev->blocksize);
2052 blk_queue_physical_block_size(nullb->q, dev->blocksize);
ea17fd35
DLM
2053 if (!dev->max_sectors)
2054 dev->max_sectors = queue_max_hw_sectors(nullb->q);
2055 dev->max_sectors = min_t(unsigned int, dev->max_sectors,
2056 BLK_DEF_MAX_SECTORS);
2057 blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
f2298c04 2058
cee1b215
MG
2059 if (dev->virt_boundary)
2060 blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
2061
306eb6b4 2062 null_config_discard(nullb);
f2298c04 2063
b2b7e001
MB
2064 sprintf(nullb->disk_name, "nullb%d", nullb->index);
2065
74ede5af 2066 rv = null_gendisk_register(nullb);
9ae2d0aa 2067 if (rv)
ca4b2a01 2068 goto out_cleanup_zone;
a514379b
MB
2069
2070 mutex_lock(&lock);
2071 list_add_tail(&nullb->list, &nullb_list);
2072 mutex_unlock(&lock);
3681c85d 2073
f2298c04 2074 return 0;
ca4b2a01 2075out_cleanup_zone:
d205bde7 2076 null_free_zoned_dev(dev);
132226b3
CH
2077out_cleanup_disk:
2078 blk_cleanup_disk(nullb->disk);
24d2f903 2079out_cleanup_tags:
2984c868 2080 if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set)
82f402fe 2081 blk_mq_free_tag_set(nullb->tag_set);
24d2f903
CH
2082out_cleanup_queues:
2083 cleanup_queues(nullb);
2084out_free_nullb:
2085 kfree(nullb);
2004bfde 2086 dev->nullb = NULL;
24d2f903 2087out:
dc501dc0 2088 return rv;
f2298c04
JA
2089}
2090
2091static int __init null_init(void)
2092{
af096e22 2093 int ret = 0;
f2298c04 2094 unsigned int i;
af096e22 2095 struct nullb *nullb;
2984c868 2096 struct nullb_device *dev;
f2298c04 2097
2984c868 2098 if (g_bs > PAGE_SIZE) {
9c7eddf1
AA
2099 pr_warn("invalid block size\n");
2100 pr_warn("defaults block size to %lu\n", PAGE_SIZE);
2984c868 2101 g_bs = PAGE_SIZE;
9967d8ac 2102 }
f2298c04 2103
ea17fd35
DLM
2104 if (g_max_sectors > BLK_DEF_MAX_SECTORS) {
2105 pr_warn("invalid max sectors\n");
2106 pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS);
2107 g_max_sectors = BLK_DEF_MAX_SECTORS;
2108 }
2109
7ff684a6 2110 if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) {
9c7eddf1 2111 pr_err("invalid home_node value\n");
7ff684a6
JP
2112 g_home_node = NUMA_NO_NODE;
2113 }
2114
e50b1e32 2115 if (g_queue_mode == NULL_Q_RQ) {
9c7eddf1 2116 pr_err("legacy IO path no longer available\n");
e50b1e32
JA
2117 return -EINVAL;
2118 }
2984c868
SL
2119 if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
2120 if (g_submit_queues != nr_online_nodes) {
9c7eddf1 2121 pr_warn("submit_queues param is set to %u.\n",
d15ee6b1 2122 nr_online_nodes);
2984c868 2123 g_submit_queues = nr_online_nodes;
fc1bc354 2124 }
2984c868
SL
2125 } else if (g_submit_queues > nr_cpu_ids)
2126 g_submit_queues = nr_cpu_ids;
2127 else if (g_submit_queues <= 0)
2128 g_submit_queues = 1;
f2298c04 2129
2984c868
SL
2130 if (g_queue_mode == NULL_Q_MQ && shared_tags) {
2131 ret = null_init_tag_set(NULL, &tag_set);
db2d153d
MG
2132 if (ret)
2133 return ret;
2134 }
2135
3bf2bd20
SL
2136 config_group_init(&nullb_subsys.su_group);
2137 mutex_init(&nullb_subsys.su_mutex);
2138
2139 ret = configfs_register_subsystem(&nullb_subsys);
2140 if (ret)
2141 goto err_tagset;
2142
f2298c04
JA
2143 mutex_init(&lock);
2144
f2298c04 2145 null_major = register_blkdev(0, "nullb");
db2d153d
MG
2146 if (null_major < 0) {
2147 ret = null_major;
3bf2bd20 2148 goto err_conf;
db2d153d 2149 }
f2298c04
JA
2150
2151 for (i = 0; i < nr_devices; i++) {
2984c868 2152 dev = null_alloc_dev();
30c516d7
WY
2153 if (!dev) {
2154 ret = -ENOMEM;
2984c868 2155 goto err_dev;
30c516d7 2156 }
2984c868
SL
2157 ret = null_add_dev(dev);
2158 if (ret) {
2159 null_free_dev(dev);
af096e22 2160 goto err_dev;
2984c868 2161 }
f2298c04
JA
2162 }
2163
9c7eddf1 2164 pr_info("module loaded\n");
f2298c04 2165 return 0;
af096e22
MH
2166
2167err_dev:
2168 while (!list_empty(&nullb_list)) {
2169 nullb = list_entry(nullb_list.next, struct nullb, list);
2984c868 2170 dev = nullb->dev;
af096e22 2171 null_del_dev(nullb);
2984c868 2172 null_free_dev(dev);
af096e22 2173 }
af096e22 2174 unregister_blkdev(null_major, "nullb");
3bf2bd20
SL
2175err_conf:
2176 configfs_unregister_subsystem(&nullb_subsys);
db2d153d 2177err_tagset:
2984c868 2178 if (g_queue_mode == NULL_Q_MQ && shared_tags)
db2d153d 2179 blk_mq_free_tag_set(&tag_set);
af096e22 2180 return ret;
f2298c04
JA
2181}
2182
2183static void __exit null_exit(void)
2184{
2185 struct nullb *nullb;
2186
3bf2bd20
SL
2187 configfs_unregister_subsystem(&nullb_subsys);
2188
f2298c04
JA
2189 unregister_blkdev(null_major, "nullb");
2190
2191 mutex_lock(&lock);
2192 while (!list_empty(&nullb_list)) {
2984c868
SL
2193 struct nullb_device *dev;
2194
f2298c04 2195 nullb = list_entry(nullb_list.next, struct nullb, list);
2984c868 2196 dev = nullb->dev;
f2298c04 2197 null_del_dev(nullb);
2984c868 2198 null_free_dev(dev);
f2298c04
JA
2199 }
2200 mutex_unlock(&lock);
6bb9535b 2201
2984c868 2202 if (g_queue_mode == NULL_Q_MQ && shared_tags)
82f402fe 2203 blk_mq_free_tag_set(&tag_set);
f2298c04
JA
2204}
2205
2206module_init(null_init);
2207module_exit(null_exit);
2208
231b3db1 2209MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
f2298c04 2210MODULE_LICENSE("GPL");