Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
9db5579b NP |
2 | /* |
3 | * Ram backed block device driver. | |
4 | * | |
5 | * Copyright (C) 2007 Nick Piggin | |
6 | * Copyright (C) 2007 Novell Inc. | |
7 | * | |
8 | * Parts derived from drivers/block/rd.c, and drivers/block/loop.c, copyright | |
9 | * of their respective owners. | |
10 | */ | |
11 | ||
12 | #include <linux/init.h> | |
287f3ca5 | 13 | #include <linux/initrd.h> |
9db5579b NP |
14 | #include <linux/module.h> |
15 | #include <linux/moduleparam.h> | |
16 | #include <linux/major.h> | |
17 | #include <linux/blkdev.h> | |
18 | #include <linux/bio.h> | |
19 | #include <linux/highmem.h> | |
2a48fc0a | 20 | #include <linux/mutex.h> |
4ee60ec1 | 21 | #include <linux/pagemap.h> |
786bb024 | 22 | #include <linux/xarray.h> |
ff01bb48 | 23 | #include <linux/fs.h> |
5a0e3ad6 | 24 | #include <linux/slab.h> |
23c47d2a | 25 | #include <linux/backing-dev.h> |
f4be591f | 26 | #include <linux/debugfs.h> |
9db5579b | 27 | |
7c0f6ba6 | 28 | #include <linux/uaccess.h> |
9db5579b | 29 | |
9db5579b | 30 | /* |
786bb024 | 31 | * Each block ramdisk device has a xarray brd_pages of pages that stores |
9db5579b NP |
32 | * the pages containing the block device's contents. A brd page's ->index is |
33 | * its offset in PAGE_SIZE units. This is similar to, but in no way connected | |
34 | * with, the kernel's pagecache or buffer cache (which sit above our block | |
35 | * device). | |
36 | */ | |
37 | struct brd_device { | |
7f9b348c | 38 | int brd_number; |
9db5579b NP |
39 | struct gendisk *brd_disk; |
40 | struct list_head brd_list; | |
41 | ||
42 | /* | |
786bb024 | 43 | * Backing store of pages. This is the contents of the block device. |
9db5579b | 44 | */ |
786bb024 | 45 | struct xarray brd_pages; |
f4be591f | 46 | u64 brd_nr_pages; |
9db5579b NP |
47 | }; |
48 | ||
49 | /* | |
50 | * Look up and return a brd's page for a given sector. | |
51 | */ | |
52 | static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) | |
53 | { | |
54 | pgoff_t idx; | |
55 | struct page *page; | |
56 | ||
9db5579b | 57 | idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */ |
786bb024 | 58 | page = xa_load(&brd->brd_pages, idx); |
9db5579b NP |
59 | |
60 | BUG_ON(page && page->index != idx); | |
61 | ||
62 | return page; | |
63 | } | |
64 | ||
65 | /* | |
db0ccc44 | 66 | * Insert a new page for a given sector, if one does not already exist. |
9db5579b | 67 | */ |
6ded703c | 68 | static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) |
9db5579b NP |
69 | { |
70 | pgoff_t idx; | |
786bb024 | 71 | struct page *page, *cur; |
6ded703c | 72 | int ret = 0; |
9db5579b NP |
73 | |
74 | page = brd_lookup_page(brd, sector); | |
75 | if (page) | |
db0ccc44 | 76 | return 0; |
9db5579b | 77 | |
6ded703c | 78 | page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); |
9db5579b | 79 | if (!page) |
db0ccc44 | 80 | return -ENOMEM; |
9db5579b | 81 | |
786bb024 | 82 | xa_lock(&brd->brd_pages); |
9db5579b | 83 | |
9db5579b | 84 | idx = sector >> PAGE_SECTORS_SHIFT; |
dfd20b2b | 85 | page->index = idx; |
786bb024 PR |
86 | |
87 | cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp); | |
88 | ||
89 | if (unlikely(cur)) { | |
9db5579b | 90 | __free_page(page); |
786bb024 PR |
91 | ret = xa_err(cur); |
92 | if (!ret && (cur->index != idx)) | |
6ded703c | 93 | ret = -EIO; |
f4be591f CO |
94 | } else { |
95 | brd->brd_nr_pages++; | |
dfd20b2b | 96 | } |
9db5579b | 97 | |
786bb024 PR |
98 | xa_unlock(&brd->brd_pages); |
99 | ||
6ded703c | 100 | return ret; |
9db5579b NP |
101 | } |
102 | ||
103 | /* | |
786bb024 | 104 | * Free all backing store pages and xarray. This must only be called when |
9db5579b NP |
105 | * there are no other users of the device. |
106 | */ | |
9db5579b NP |
107 | static void brd_free_pages(struct brd_device *brd) |
108 | { | |
786bb024 PR |
109 | struct page *page; |
110 | pgoff_t idx; | |
9db5579b | 111 | |
786bb024 PR |
112 | xa_for_each(&brd->brd_pages, idx, page) { |
113 | __free_page(page); | |
114 | cond_resched_rcu(); | |
115 | } | |
936b33f7 | 116 | |
786bb024 | 117 | xa_destroy(&brd->brd_pages); |
9db5579b NP |
118 | } |
119 | ||
120 | /* | |
121 | * copy_to_brd_setup must be called before copy_to_brd. It may sleep. | |
122 | */ | |
6ded703c JA |
123 | static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, |
124 | gfp_t gfp) | |
9db5579b NP |
125 | { |
126 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
127 | size_t copy; | |
db0ccc44 | 128 | int ret; |
9db5579b NP |
129 | |
130 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
6ded703c | 131 | ret = brd_insert_page(brd, sector, gfp); |
db0ccc44 JA |
132 | if (ret) |
133 | return ret; | |
9db5579b NP |
134 | if (copy < n) { |
135 | sector += copy >> SECTOR_SHIFT; | |
6ded703c | 136 | ret = brd_insert_page(brd, sector, gfp); |
9db5579b | 137 | } |
db0ccc44 | 138 | return ret; |
9db5579b NP |
139 | } |
140 | ||
141 | /* | |
142 | * Copy n bytes from src to the brd starting at sector. Does not sleep. | |
143 | */ | |
144 | static void copy_to_brd(struct brd_device *brd, const void *src, | |
145 | sector_t sector, size_t n) | |
146 | { | |
147 | struct page *page; | |
148 | void *dst; | |
149 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
150 | size_t copy; | |
151 | ||
152 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
153 | page = brd_lookup_page(brd, sector); | |
154 | BUG_ON(!page); | |
155 | ||
cfd8005c | 156 | dst = kmap_atomic(page); |
9db5579b | 157 | memcpy(dst + offset, src, copy); |
cfd8005c | 158 | kunmap_atomic(dst); |
9db5579b NP |
159 | |
160 | if (copy < n) { | |
161 | src += copy; | |
162 | sector += copy >> SECTOR_SHIFT; | |
163 | copy = n - copy; | |
164 | page = brd_lookup_page(brd, sector); | |
165 | BUG_ON(!page); | |
166 | ||
cfd8005c | 167 | dst = kmap_atomic(page); |
9db5579b | 168 | memcpy(dst, src, copy); |
cfd8005c | 169 | kunmap_atomic(dst); |
9db5579b NP |
170 | } |
171 | } | |
172 | ||
173 | /* | |
174 | * Copy n bytes to dst from the brd starting at sector. Does not sleep. | |
175 | */ | |
176 | static void copy_from_brd(void *dst, struct brd_device *brd, | |
177 | sector_t sector, size_t n) | |
178 | { | |
179 | struct page *page; | |
180 | void *src; | |
181 | unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; | |
182 | size_t copy; | |
183 | ||
184 | copy = min_t(size_t, n, PAGE_SIZE - offset); | |
185 | page = brd_lookup_page(brd, sector); | |
186 | if (page) { | |
cfd8005c | 187 | src = kmap_atomic(page); |
9db5579b | 188 | memcpy(dst, src + offset, copy); |
cfd8005c | 189 | kunmap_atomic(src); |
9db5579b NP |
190 | } else |
191 | memset(dst, 0, copy); | |
192 | ||
193 | if (copy < n) { | |
194 | dst += copy; | |
195 | sector += copy >> SECTOR_SHIFT; | |
196 | copy = n - copy; | |
197 | page = brd_lookup_page(brd, sector); | |
198 | if (page) { | |
cfd8005c | 199 | src = kmap_atomic(page); |
9db5579b | 200 | memcpy(dst, src, copy); |
cfd8005c | 201 | kunmap_atomic(src); |
9db5579b NP |
202 | } else |
203 | memset(dst, 0, copy); | |
204 | } | |
205 | } | |
206 | ||
207 | /* | |
208 | * Process a single bvec of a bio. | |
209 | */ | |
210 | static int brd_do_bvec(struct brd_device *brd, struct page *page, | |
6ded703c | 211 | unsigned int len, unsigned int off, blk_opf_t opf, |
9db5579b NP |
212 | sector_t sector) |
213 | { | |
214 | void *mem; | |
215 | int err = 0; | |
216 | ||
6ded703c JA |
217 | if (op_is_write(opf)) { |
218 | /* | |
219 | * Must use NOIO because we don't want to recurse back into the | |
220 | * block or filesystem layers from page reclaim. | |
221 | */ | |
222 | gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; | |
223 | ||
224 | err = copy_to_brd_setup(brd, sector, len, gfp); | |
9db5579b NP |
225 | if (err) |
226 | goto out; | |
227 | } | |
228 | ||
cfd8005c | 229 | mem = kmap_atomic(page); |
6ded703c | 230 | if (!op_is_write(opf)) { |
9db5579b NP |
231 | copy_from_brd(mem + off, brd, sector, len); |
232 | flush_dcache_page(page); | |
c2572f2b NP |
233 | } else { |
234 | flush_dcache_page(page); | |
9db5579b | 235 | copy_to_brd(brd, mem + off, sector, len); |
c2572f2b | 236 | } |
cfd8005c | 237 | kunmap_atomic(mem); |
9db5579b NP |
238 | |
239 | out: | |
240 | return err; | |
241 | } | |
242 | ||
3e08773c | 243 | static void brd_submit_bio(struct bio *bio) |
9db5579b | 244 | { |
309dca30 | 245 | struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; |
74cb8994 | 246 | sector_t sector = bio->bi_iter.bi_sector; |
7988613b | 247 | struct bio_vec bvec; |
7988613b | 248 | struct bvec_iter iter; |
9db5579b | 249 | |
7988613b KO |
250 | bio_for_each_segment(bvec, bio, iter) { |
251 | unsigned int len = bvec.bv_len; | |
4246a0b6 CH |
252 | int err; |
253 | ||
f1acbf21 ML |
254 | /* Don't support un-aligned buffer */ |
255 | WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || | |
256 | (len & (SECTOR_SIZE - 1))); | |
257 | ||
c11f0c0b | 258 | err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, |
6ded703c | 259 | bio->bi_opf, sector); |
3e08773c | 260 | if (err) { |
6ded703c JA |
261 | if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { |
262 | bio_wouldblock_error(bio); | |
263 | return; | |
264 | } | |
3e08773c CH |
265 | bio_io_error(bio); |
266 | return; | |
267 | } | |
9db5579b NP |
268 | sector += len >> SECTOR_SHIFT; |
269 | } | |
270 | ||
4246a0b6 | 271 | bio_endio(bio); |
9db5579b NP |
272 | } |
273 | ||
83d5cde4 | 274 | static const struct block_device_operations brd_fops = { |
75acb9cd | 275 | .owner = THIS_MODULE, |
c62b37d9 | 276 | .submit_bio = brd_submit_bio, |
9db5579b NP |
277 | }; |
278 | ||
279 | /* | |
280 | * And now the modules code and kernel interface. | |
281 | */ | |
937af5ec | 282 | static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; |
5657a819 | 283 | module_param(rd_nr, int, 0444); |
9db5579b | 284 | MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); |
937af5ec | 285 | |
366f4aea | 286 | unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; |
5657a819 | 287 | module_param(rd_size, ulong, 0444); |
9db5579b | 288 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); |
937af5ec BH |
289 | |
290 | static int max_part = 1; | |
5657a819 | 291 | module_param(max_part, int, 0444); |
937af5ec BH |
292 | MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices"); |
293 | ||
9db5579b NP |
294 | MODULE_LICENSE("GPL"); |
295 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
efedf51c | 296 | MODULE_ALIAS("rd"); |
9db5579b NP |
297 | |
298 | #ifndef MODULE | |
299 | /* Legacy boot options - nonmodular */ | |
300 | static int __init ramdisk_size(char *str) | |
301 | { | |
302 | rd_size = simple_strtol(str, NULL, 0); | |
303 | return 1; | |
304 | } | |
1adbee50 | 305 | __setup("ramdisk_size=", ramdisk_size); |
9db5579b NP |
306 | #endif |
307 | ||
308 | /* | |
309 | * The device scheme is derived from loop.c. Keep them in synch where possible | |
310 | * (should share code eventually). | |
311 | */ | |
312 | static LIST_HEAD(brd_devices); | |
f4be591f | 313 | static struct dentry *brd_debugfs_dir; |
9db5579b | 314 | |
7f9b348c | 315 | static int brd_alloc(int i) |
9db5579b NP |
316 | { |
317 | struct brd_device *brd; | |
318 | struct gendisk *disk; | |
f4be591f | 319 | char buf[DISK_NAME_LEN]; |
e1528830 | 320 | int err = -ENOMEM; |
9db5579b | 321 | |
00358933 TH |
322 | list_for_each_entry(brd, &brd_devices, brd_list) |
323 | if (brd->brd_number == i) | |
f7bf3586 | 324 | return -EEXIST; |
9db5579b | 325 | brd = kzalloc(sizeof(*brd), GFP_KERNEL); |
00358933 | 326 | if (!brd) |
7f9b348c | 327 | return -ENOMEM; |
9db5579b | 328 | brd->brd_number = i; |
f7bf3586 | 329 | list_add_tail(&brd->brd_list, &brd_devices); |
f7bf3586 | 330 | |
786bb024 | 331 | xa_init(&brd->brd_pages); |
9db5579b | 332 | |
f4be591f CO |
333 | snprintf(buf, DISK_NAME_LEN, "ram%d", i); |
334 | if (!IS_ERR_OR_NULL(brd_debugfs_dir)) | |
335 | debugfs_create_u64(buf, 0444, brd_debugfs_dir, | |
336 | &brd->brd_nr_pages); | |
337 | ||
7f9b348c | 338 | disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); |
9db5579b | 339 | if (!disk) |
7f9b348c CH |
340 | goto out_free_dev; |
341 | ||
9db5579b | 342 | disk->major = RAMDISK_MAJOR; |
937af5ec | 343 | disk->first_minor = i * max_part; |
7f9b348c | 344 | disk->minors = max_part; |
9db5579b NP |
345 | disk->fops = &brd_fops; |
346 | disk->private_data = brd; | |
e55e1b48 | 347 | strscpy(disk->disk_name, buf, DISK_NAME_LEN); |
9db5579b | 348 | set_capacity(disk, rd_size * 2); |
7f9b348c CH |
349 | |
350 | /* | |
351 | * This is so fdisk will align partitions on 4k, because of | |
352 | * direct_access API needing 4k alignment, returning a PFN | |
353 | * (This is only a problem on very small devices <= 4M, | |
354 | * otherwise fdisk will align on 1M. Regardless this call | |
355 | * is harmless) | |
356 | */ | |
357 | blk_queue_physical_block_size(disk->queue, PAGE_SIZE); | |
9db5579b | 358 | |
316ba573 | 359 | /* Tell the block layer that this is not a rotational device */ |
7f9b348c | 360 | blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); |
3222d8c2 | 361 | blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue); |
67205f80 | 362 | blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); |
e1528830 LC |
363 | err = add_disk(disk); |
364 | if (err) | |
365 | goto out_cleanup_disk; | |
316ba573 | 366 | |
7f9b348c | 367 | return 0; |
9db5579b | 368 | |
e1528830 | 369 | out_cleanup_disk: |
8b9ab626 | 370 | put_disk(disk); |
9db5579b | 371 | out_free_dev: |
f7bf3586 | 372 | list_del(&brd->brd_list); |
9db5579b | 373 | kfree(brd); |
e1528830 | 374 | return err; |
9db5579b NP |
375 | } |
376 | ||
7cc178a6 | 377 | static void brd_probe(dev_t dev) |
9db5579b | 378 | { |
f7bf3586 | 379 | brd_alloc(MINOR(dev) / max_part); |
9db5579b NP |
380 | } |
381 | ||
00358933 | 382 | static void brd_cleanup(void) |
9db5579b | 383 | { |
00358933 TH |
384 | struct brd_device *brd, *next; |
385 | ||
386 | debugfs_remove_recursive(brd_debugfs_dir); | |
387 | ||
388 | list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { | |
389 | del_gendisk(brd->brd_disk); | |
8b9ab626 | 390 | put_disk(brd->brd_disk); |
00358933 TH |
391 | brd_free_pages(brd); |
392 | list_del(&brd->brd_list); | |
393 | kfree(brd); | |
394 | } | |
9db5579b NP |
395 | } |
396 | ||
c8ab4225 ZL |
397 | static inline void brd_check_and_reset_par(void) |
398 | { | |
399 | if (unlikely(!max_part)) | |
400 | max_part = 1; | |
401 | ||
402 | /* | |
403 | * make sure 'max_part' can be divided exactly by (1U << MINORBITS), | |
404 | * otherwise, it is possiable to get same dev_t when adding partitions. | |
405 | */ | |
406 | if ((1U << MINORBITS) % max_part != 0) | |
407 | max_part = 1UL << fls(max_part); | |
408 | ||
409 | if (max_part > DISK_MAX_PARTS) { | |
410 | pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", | |
411 | DISK_MAX_PARTS, DISK_MAX_PARTS); | |
412 | max_part = DISK_MAX_PARTS; | |
413 | } | |
414 | } | |
415 | ||
9db5579b NP |
416 | static int __init brd_init(void) |
417 | { | |
7f9b348c | 418 | int err, i; |
9db5579b | 419 | |
00358933 TH |
420 | brd_check_and_reset_par(); |
421 | ||
422 | brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL); | |
423 | ||
424 | for (i = 0; i < rd_nr; i++) { | |
425 | err = brd_alloc(i); | |
426 | if (err) | |
427 | goto out_free; | |
428 | } | |
429 | ||
9db5579b NP |
430 | /* |
431 | * brd module now has a feature to instantiate underlying device | |
432 | * structure on-demand, provided that there is an access dev node. | |
9db5579b | 433 | * |
937af5ec BH |
434 | * (1) if rd_nr is specified, create that many upfront. else |
435 | * it defaults to CONFIG_BLK_DEV_RAM_COUNT | |
436 | * (2) User can further extend brd devices by create dev node themselves | |
437 | * and have kernel automatically instantiate actual device | |
438 | * on-demand. Example: | |
439 | * mknod /path/devnod_name b 1 X # 1 is the rd major | |
440 | * fdisk -l /path/devnod_name | |
441 | * If (X / max_part) was not already created it will be created | |
442 | * dynamically. | |
9db5579b | 443 | */ |
d7853d1f | 444 | |
00358933 TH |
445 | if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) { |
446 | err = -EIO; | |
447 | goto out_free; | |
9db5579b NP |
448 | } |
449 | ||
937af5ec | 450 | pr_info("brd: module loaded\n"); |
9db5579b NP |
451 | return 0; |
452 | ||
453 | out_free: | |
00358933 | 454 | brd_cleanup(); |
9db5579b | 455 | |
937af5ec | 456 | pr_info("brd: module NOT loaded !!!\n"); |
7f9b348c | 457 | return err; |
9db5579b NP |
458 | } |
459 | ||
460 | static void __exit brd_exit(void) | |
461 | { | |
9db5579b | 462 | |
f7bf3586 | 463 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
00358933 | 464 | brd_cleanup(); |
9db5579b | 465 | |
937af5ec | 466 | pr_info("brd: module unloaded\n"); |
9db5579b NP |
467 | } |
468 | ||
469 | module_init(brd_init); | |
470 | module_exit(brd_exit); | |
471 |