Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
9db5579b NP |
2 | /* |
3 | * Ram backed block device driver. | |
4 | * | |
5 | * Copyright (C) 2007 Nick Piggin | |
6 | * Copyright (C) 2007 Novell Inc. | |
7 | * | |
8 | * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright | |
9 | * of their respective owners. | |
10 | */ | |
11 | ||
12 | #include <linux/init.h> | |
287f3ca5 | 13 | #include <linux/initrd.h> |
9db5579b NP |
14 | #include <linux/module.h> |
15 | #include <linux/moduleparam.h> | |
16 | #include <linux/major.h> | |
17 | #include <linux/blkdev.h> | |
18 | #include <linux/bio.h> | |
19 | #include <linux/highmem.h> | |
2a48fc0a | 20 | #include <linux/mutex.h> |
4ee60ec1 | 21 | #include <linux/pagemap.h> |
786bb024 | 22 | #include <linux/xarray.h> |
ff01bb48 | 23 | #include <linux/fs.h> |
5a0e3ad6 | 24 | #include <linux/slab.h> |
23c47d2a | 25 | #include <linux/backing-dev.h> |
f4be591f | 26 | #include <linux/debugfs.h> |
9db5579b | 27 | |
7c0f6ba6 | 28 | #include <linux/uaccess.h> |
9db5579b | 29 | |
9db5579b | 30 | /* |
786bb024 | 31 | * Each block ramdisk device has a xarray brd_pages of pages that stores |
9db5579b NP |
32 | * the pages containing the block device's contents. A brd page's ->index is |
33 | * its offset in PAGE_SIZE units. This is similar to, but in no way connected | |
34 | * with, the kernel's pagecache or buffer cache (which sit above our block | |
35 | * device). | |
36 | */ | |
37 | struct brd_device { | |
7f9b348c | 38 | int brd_number; |
9db5579b NP |
39 | struct gendisk *brd_disk; |
40 | struct list_head brd_list; | |
41 | ||
42 | /* | |
786bb024 | 43 | * Backing store of pages. This is the contents of the block device. |
9db5579b | 44 | */ |
786bb024 | 45 | struct xarray brd_pages; |
f4be591f | 46 | u64 brd_nr_pages; |
9db5579b NP |
47 | }; |
48 | ||
49 | /* | |
50 | * Look up and return a brd's page for a given sector. | |
51 | */ | |
52 | static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) | |
53 | { | |
54 | pgoff_t idx; | |
55 | struct page *page; | |
56 | ||
9db5579b | 57 | idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ |
786bb024 | 58 | page = xa_load(&brd->brd_pages, idx); |
9db5579b NP |
59 | |
60 | BUG_ON(page && page->index != idx); | |
61 | ||
62 | return page; | |
63 | } | |
64 | ||
65 | /* | |
db0ccc44 | 66 | * Insert a new page for a given sector, if one does not already exist. |
9db5579b | 67 | */ |
6ded703c | 68 | static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) |
9db5579b NP |
69 | { |
70 | pgoff_t idx; | |
786bb024 | 71 | struct page *page, *cur; |
6ded703c | 72 | int ret = 0; |
9db5579b NP |
73 | |
74 | page = brd_lookup_page(brd, sector); | |
75 | if (page) | |
db0ccc44 | 76 | return 0; |
9db5579b | 77 | |
6ded703c | 78 | page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); |
9db5579b | 79 | if (!page) |
db0ccc44 | 80 | return -ENOMEM; |
9db5579b | 81 | |
786bb024 | 82 | xa_lock(&brd->brd_pages); |
9db5579b | 83 | |
9db5579b | 84 | idx = sector >> PAGE_SECTORS_SHIFT; |
dfd20b2b | 85 | page->index = idx; |
786bb024 PR |
86 | |
87 | cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp); | |
88 | ||
89 | if (unlikely(cur)) { | |
9db5579b | 90 | __free_page(page); |
786bb024 PR |
91 | ret = xa_err(cur); |
92 | if (!ret && (cur->index != idx)) | |
6ded703c | 93 | ret = -EIO; |
f4be591f CO |
94 | } else { |
95 | brd->brd_nr_pages++; | |
dfd20b2b | 96 | } |
9db5579b | 97 | |
786bb024 PR |
98 | xa_unlock(&brd->brd_pages); |
99 | ||
6ded703c | 100 | return ret; |
9db5579b NP |
101 | } |
102 | ||
103 | /* | |
786bb024 | 104 | * Free all backing store pages and xarray. This must only be called when |
9db5579b NP |
105 | * there are no other users of the device. |
106 | */ | |
9db5579b NP |
107 | static void brd_free_pages(struct brd_device *brd) |
108 | { | |
786bb024 PR |
109 | struct page *page; |
110 | pgoff_t idx; | |
9db5579b | 111 | |
786bb024 PR |
112 | xa_for_each(&brd->brd_pages, idx, page) { |
113 | __free_page(page); | |
6dd4423f | 114 | cond_resched(); |
786bb024 | 115 | } |
936b33f7 | 116 | |
786bb024 | 117 | xa_destroy(&brd->brd_pages); |
9db5579b NP |
118 | } |
119 | ||
120 | /* | |
121 | * copy_to_brd_setup must be called before copy_to_brd. It may sleep. | |
122 | */ | |
6ded703c JA |
123 | static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, |
124 | gfp_t gfp) | |
9db5579b NP |
125 | { |
126 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
127 | size_t copy; | |
db0ccc44 | 128 | int ret; |
9db5579b NP |
129 | |
130 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
6ded703c | 131 | ret = brd_insert_page(brd, sector, gfp); |
db0ccc44 JA |
132 | if (ret) |
133 | return ret; | |
9db5579b NP |
134 | if (copy < n) { |
135 | sector += copy >> SECTOR_SHIFT; | |
6ded703c | 136 | ret = brd_insert_page(brd, sector, gfp); |
9db5579b | 137 | } |
db0ccc44 | 138 | return ret; |
9db5579b NP |
139 | } |
140 | ||
141 | /* | |
142 | * Copy n bytes from src to the brd starting at sector. Does not sleep. | |
143 | */ | |
144 | static void copy_to_brd(struct brd_device *brd, const void *src, | |
145 | sector_t sector, size_t n) | |
146 | { | |
147 | struct page *page; | |
148 | void *dst; | |
149 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
150 | size_t copy; | |
151 | ||
152 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
153 | page = brd_lookup_page(brd, sector); | |
154 | BUG_ON(!page); | |
155 | ||
cfd8005c | 156 | dst = kmap_atomic(page); |
9db5579b | 157 | memcpy(dst + offset, src, copy); |
cfd8005c | 158 | kunmap_atomic(dst); |
9db5579b NP |
159 | |
160 | if (copy < n) { | |
161 | src += copy; | |
162 | sector += copy >> SECTOR_SHIFT; | |
163 | copy = n - copy; | |
164 | page = brd_lookup_page(brd, sector); | |
165 | BUG_ON(!page); | |
166 | ||
cfd8005c | 167 | dst = kmap_atomic(page); |
9db5579b | 168 | memcpy(dst, src, copy); |
cfd8005c | 169 | kunmap_atomic(dst); |
9db5579b NP |
170 | } |
171 | } | |
172 | ||
173 | /* | |
174 | * Copy n bytes to dst from the brd starting at sector. Does not sleep. | |
175 | */ | |
176 | static void copy_from_brd(void *dst, struct brd_device *brd, | |
177 | sector_t sector, size_t n) | |
178 | { | |
179 | struct page *page; | |
180 | void *src; | |
181 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
182 | size_t copy; | |
183 | ||
184 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
185 | page = brd_lookup_page(brd, sector); | |
186 | if (page) { | |
cfd8005c | 187 | src = kmap_atomic(page); |
9db5579b | 188 | memcpy(dst, src + offset, copy); |
cfd8005c | 189 | kunmap_atomic(src); |
9db5579b NP |
190 | } else |
191 | memset(dst, 0, copy); | |
192 | ||
193 | if (copy < n) { | |
194 | dst += copy; | |
195 | sector += copy >> SECTOR_SHIFT; | |
196 | copy = n - copy; | |
197 | page = brd_lookup_page(brd, sector); | |
198 | if (page) { | |
cfd8005c | 199 | src = kmap_atomic(page); |
9db5579b | 200 | memcpy(dst, src, copy); |
cfd8005c | 201 | kunmap_atomic(src); |
9db5579b NP |
202 | } else |
203 | memset(dst, 0, copy); | |
204 | } | |
205 | } | |
206 | ||
207 | /* | |
208 | * Process a single bvec of a bio. | |
209 | */ | |
210 | static int brd_do_bvec(struct brd_device *brd, struct page *page, | |
6ded703c | 211 | unsigned int len, unsigned int off, blk_opf_t opf, |
9db5579b NP |
212 | sector_t sector) |
213 | { | |
214 | void *mem; | |
215 | int err = 0; | |
216 | ||
6ded703c JA |
217 | if (op_is_write(opf)) { |
218 | /* | |
219 | * Must use NOIO because we don't want to recurse back into the | |
220 | * block or filesystem layers from page reclaim. | |
221 | */ | |
222 | gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; | |
223 | ||
224 | err = copy_to_brd_setup(brd, sector, len, gfp); | |
9db5579b NP |
225 | if (err) |
226 | goto out; | |
227 | } | |
228 | ||
cfd8005c | 229 | mem = kmap_atomic(page); |
6ded703c | 230 | if (!op_is_write(opf)) { |
9db5579b NP |
231 | copy_from_brd(mem + off, brd, sector, len); |
232 | flush_dcache_page(page); | |
c2572f2b NP |
233 | } else { |
234 | flush_dcache_page(page); | |
9db5579b | 235 | copy_to_brd(brd, mem + off, sector, len); |
c2572f2b | 236 | } |
cfd8005c | 237 | kunmap_atomic(mem); |
9db5579b NP |
238 | |
239 | out: | |
240 | return err; | |
241 | } | |
242 | ||
3e08773c | 243 | static void brd_submit_bio(struct bio *bio) |
9db5579b | 244 | { |
309dca30 | 245 | struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; |
74cb8994 | 246 | sector_t sector = bio->bi_iter.bi_sector; |
7988613b | 247 | struct bio_vec bvec; |
7988613b | 248 | struct bvec_iter iter; |
9db5579b | 249 | |
7988613b KO |
250 | bio_for_each_segment(bvec, bio, iter) { |
251 | unsigned int len = bvec.bv_len; | |
4246a0b6 CH |
252 | int err; |
253 | ||
f1acbf21 ML |
254 | /* Don't support un-aligned buffer */ |
255 | WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || | |
256 | (len & (SECTOR_SIZE - 1))); | |
257 | ||
c11f0c0b | 258 | err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, |
6ded703c | 259 | bio->bi_opf, sector); |
3e08773c | 260 | if (err) { |
6ded703c JA |
261 | if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { |
262 | bio_wouldblock_error(bio); | |
263 | return; | |
264 | } | |
3e08773c CH |
265 | bio_io_error(bio); |
266 | return; | |
267 | } | |
9db5579b NP |
268 | sector += len >> SECTOR_SHIFT; |
269 | } | |
270 | ||
4246a0b6 | 271 | bio_endio(bio); |
9db5579b NP |
272 | } |
273 | ||
83d5cde4 | 274 | static const struct block_device_operations brd_fops = { |
75acb9cd | 275 | .owner = THIS_MODULE, |
c62b37d9 | 276 | .submit_bio = brd_submit_bio, |
9db5579b NP |
277 | }; |
278 | ||
279 | /* | |
280 | * And now the modules code and kernel interface. | |
281 | */ | |
937af5ec | 282 | static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; |
5657a819 | 283 | module_param(rd_nr, int, 0444); |
9db5579b | 284 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); |
937af5ec | 285 | |
366f4aea | 286 | unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; |
5657a819 | 287 | module_param(rd_size, ulong, 0444); |
9db5579b | 288 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); |
937af5ec BH |
289 | |
290 | static int max_part = 1; | |
5657a819 | 291 | module_param(max_part, int, 0444); |
937af5ec BH |
292 | MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); |
293 | ||
9db5579b NP |
294 | MODULE_LICENSE("GPL"); |
295 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
efedf51c | 296 | MODULE_ALIAS("rd"); |
9db5579b NP |
297 | |
298 | #ifndef MODULE | |
299 | /* Legacy boot options - nonmodular */ | |
300 | static int __init ramdisk_size(char *str) | |
301 | { | |
302 | rd_size = simple_strtol(str, NULL, 0); | |
303 | return 1; | |
304 | } | |
1adbee50 | 305 | __setup("ramdisk_size=", ramdisk_size); |
9db5579b NP |
306 | #endif |
307 | ||
308 | /* | |
309 | * The device scheme is derived from loop.c. Keep them in synch where possible | |
310 | * (should share code eventually). | |
311 | */ | |
312 | static LIST_HEAD(brd_devices); | |
f4be591f | 313 | static struct dentry *brd_debugfs_dir; |
9db5579b | 314 | |
7f9b348c | 315 | static int brd_alloc(int i) |
9db5579b NP |
316 | { |
317 | struct brd_device *brd; | |
318 | struct gendisk *disk; | |
f4be591f | 319 | char buf[DISK_NAME_LEN]; |
e1528830 | 320 | int err = -ENOMEM; |
b5baaba4 CH |
321 | struct queue_limits lim = { |
322 | /* | |
323 | * This is so fdisk will align partitions on 4k, because of | |
324 | * direct_access API needing 4k alignment, returning a PFN | |
325 | * (This is only a problem on very small devices <= 4M, | |
326 | * otherwise fdisk will align on 1M. Regardless this call | |
327 | * is harmless) | |
328 | */ | |
329 | .physical_block_size = PAGE_SIZE, | |
330 | }; | |
9db5579b | 331 | |
00358933 TH |
332 | list_for_each_entry(brd, &brd_devices, brd_list) |
333 | if (brd->brd_number == i) | |
f7bf3586 | 334 | return -EEXIST; |
9db5579b | 335 | brd = kzalloc(sizeof(*brd), GFP_KERNEL); |
00358933 | 336 | if (!brd) |
7f9b348c | 337 | return -ENOMEM; |
9db5579b | 338 | brd->brd_number = i; |
f7bf3586 | 339 | list_add_tail(&brd->brd_list, &brd_devices); |
f7bf3586 | 340 | |
786bb024 | 341 | xa_init(&brd->brd_pages); |
9db5579b | 342 | |
f4be591f CO |
343 | snprintf(buf, DISK_NAME_LEN, "ram%d", i); |
344 | if (!IS_ERR_OR_NULL(brd_debugfs_dir)) | |
345 | debugfs_create_u64(buf, 0444, brd_debugfs_dir, | |
346 | &brd->brd_nr_pages); | |
347 | ||
b5baaba4 | 348 | disk = brd->brd_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); |
74fa8f9c CH |
349 | if (IS_ERR(disk)) { |
350 | err = PTR_ERR(disk); | |
7f9b348c | 351 | goto out_free_dev; |
74fa8f9c | 352 | } |
9db5579b | 353 | disk->major = RAMDISK_MAJOR; |
937af5ec | 354 | disk->first_minor = i * max_part; |
7f9b348c | 355 | disk->minors = max_part; |
9db5579b NP |
356 | disk->fops = &brd_fops; |
357 | disk->private_data = brd; | |
e55e1b48 | 358 | strscpy(disk->disk_name, buf, DISK_NAME_LEN); |
9db5579b | 359 | set_capacity(disk, rd_size * 2); |
7f9b348c | 360 | |
316ba573 | 361 | /* Tell the block layer that this is not a rotational device */ |
7f9b348c | 362 | blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); |
3222d8c2 | 363 | blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); |
67205f80 | 364 | blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); |
e1528830 LC |
365 | err = add_disk(disk); |
366 | if (err) | |
367 | goto out_cleanup_disk; | |
316ba573 | 368 | |
7f9b348c | 369 | return 0; |
9db5579b | 370 | |
e1528830 | 371 | out_cleanup_disk: |
8b9ab626 | 372 | put_disk(disk); |
9db5579b | 373 | out_free_dev: |
f7bf3586 | 374 | list_del(&brd->brd_list); |
9db5579b | 375 | kfree(brd); |
e1528830 | 376 | return err; |
9db5579b NP |
377 | } |
378 | ||
7cc178a6 | 379 | static void brd_probe(dev_t dev) |
9db5579b | 380 | { |
f7bf3586 | 381 | brd_alloc(MINOR(dev) / max_part); |
9db5579b NP |
382 | } |
383 | ||
00358933 | 384 | static void brd_cleanup(void) |
9db5579b | 385 | { |
00358933 TH |
386 | struct brd_device *brd, *next; |
387 | ||
388 | debugfs_remove_recursive(brd_debugfs_dir); | |
389 | ||
390 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { | |
391 | del_gendisk(brd->brd_disk); | |
8b9ab626 | 392 | put_disk(brd->brd_disk); |
00358933 TH |
393 | brd_free_pages(brd); |
394 | list_del(&brd->brd_list); | |
395 | kfree(brd); | |
396 | } | |
9db5579b NP |
397 | } |
398 | ||
c8ab4225 ZL |
399 | static inline void brd_check_and_reset_par(void) |
400 | { | |
401 | if (unlikely(!max_part)) | |
402 | max_part = 1; | |
403 | ||
404 | /* | |
405 | * make sure 'max_part' can be divided exactly by (1U << MINORBITS), | |
406 | * otherwise, it is possiable to get same dev_t when adding partitions. | |
407 | */ | |
408 | if ((1U << MINORBITS) % max_part != 0) | |
409 | max_part = 1UL << fls(max_part); | |
410 | ||
411 | if (max_part > DISK_MAX_PARTS) { | |
412 | pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", | |
413 | DISK_MAX_PARTS, DISK_MAX_PARTS); | |
414 | max_part = DISK_MAX_PARTS; | |
415 | } | |
416 | } | |
417 | ||
9db5579b NP |
418 | static int __init brd_init(void) |
419 | { | |
7f9b348c | 420 | int err, i; |
9db5579b | 421 | |
00358933 TH |
422 | brd_check_and_reset_par(); |
423 | ||
424 | brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); | |
425 | ||
426 | for (i = 0; i < rd_nr; i++) { | |
427 | err = brd_alloc(i); | |
428 | if (err) | |
429 | goto out_free; | |
430 | } | |
431 | ||
9db5579b NP |
432 | /* |
433 | * brd module now has a feature to instantiate underlying device | |
434 | * structure on-demand, provided that there is an access dev node. | |
9db5579b | 435 | * |
937af5ec BH |
436 | * (1) if rd_nr is specified, create that many upfront. else |
437 | * it defaults to CONFIG_BLK_DEV_RAM_COUNT | |
438 | * (2) User can further extend brd devices by create dev node themselves | |
439 | * and have kernel automatically instantiate actual device | |
440 | * on-demand. Example: | |
441 | * mknod /path/devnod_name b 1 X # 1 is the rd major | |
442 | * fdisk -l /path/devnod_name | |
443 | * If (X / max_part) was not already created it will be created | |
444 | * dynamically. | |
9db5579b | 445 | */ |
d7853d1f | 446 | |
00358933 TH |
447 | if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) { |
448 | err = -EIO; | |
449 | goto out_free; | |
9db5579b NP |
450 | } |
451 | ||
937af5ec | 452 | pr_info("brd: module loaded\n"); |
9db5579b NP |
453 | return 0; |
454 | ||
455 | out_free: | |
00358933 | 456 | brd_cleanup(); |
9db5579b | 457 | |
937af5ec | 458 | pr_info("brd: module NOT loaded !!!\n"); |
7f9b348c | 459 | return err; |
9db5579b NP |
460 | } |
461 | ||
462 | static void __exit brd_exit(void) | |
463 | { | |
9db5579b | 464 | |
f7bf3586 | 465 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
00358933 | 466 | brd_cleanup(); |
9db5579b | 467 | |
937af5ec | 468 | pr_info("brd: module unloaded\n"); |
9db5579b NP |
469 | } |
470 | ||
471 | module_init(brd_init); | |
472 | module_exit(brd_exit); | |
473 |