Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta. | |
3 | * | |
4 | * (C) Chad Page, Theodore Ts'o, et. al, 1995. | |
5 | * | |
6 | * This RAM disk is designed to have filesystems created on it and mounted | |
7 | * just like a regular floppy disk. | |
8 | * | |
9 | * It also does something suggested by Linus: use the buffer cache as the | |
10 | * RAM disk data. This makes it possible to dynamically allocate the RAM disk | |
11 | * buffer - with some consequences I have to deal with as I write this. | |
12 | * | |
13 | * This code is based on the original ramdisk.c, written mostly by | |
14 | * Theodore Ts'o (TYT) in 1991. The code was largely rewritten by | |
15 | * Chad Page to use the buffer cache to store the RAM disk data in | |
16 | * 1995; Theodore then took over the driver again, and cleaned it up | |
17 | * for inclusion in the mainline kernel. | |
18 | * | |
19 | * The original CRAMDISK code was written by Richard Lyons, and | |
20 | * adapted by Chad Page to use the new RAM disk interface. Theodore | |
21 | * Ts'o rewrote it so that both the compressed RAM disk loader and the | |
22 | * kernel decompressor uses the same inflate.c codebase. The RAM disk | |
23 | * loader now also loads into a dynamic (buffer cache based) RAM disk, | |
24 | * not the old static RAM disk. Support for the old static RAM disk has | |
25 | * been completely removed. | |
26 | * | |
27 | * Loadable module support added by Tom Dyas. | |
28 | * | |
29 | * Further cleanups by Chad Page (page0588@sundance.sjsu.edu): | |
30 | * Cosmetic changes in #ifdef MODULE, code movement, etc. | |
31 | * When the RAM disk module is removed, free the protected buffers | |
32 | * Default RAM disk size changed to 2.88 MB | |
33 | * | |
34 | * Added initrd: Werner Almesberger & Hans Lermen, Feb '96 | |
35 | * | |
36 | * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB) | |
37 | * - Chad Page | |
38 | * | |
39 | * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98 | |
40 | * | |
41 | * Make block size and block size shift for RAM disks a global macro | |
42 | * and set blk_size for -ENOSPC, Werner Fink <werner@suse.de>, Apr '99 | |
43 | */ | |
44 | ||
1da177e4 LT |
45 | #include <linux/string.h> |
46 | #include <linux/slab.h> | |
47 | #include <asm/atomic.h> | |
48 | #include <linux/bio.h> | |
49 | #include <linux/module.h> | |
50 | #include <linux/moduleparam.h> | |
51 | #include <linux/init.h> | |
1da177e4 LT |
52 | #include <linux/pagemap.h> |
53 | #include <linux/blkdev.h> | |
54 | #include <linux/genhd.h> | |
55 | #include <linux/buffer_head.h> /* for invalidate_bdev() */ | |
56 | #include <linux/backing-dev.h> | |
57 | #include <linux/blkpg.h> | |
58 | #include <linux/writeback.h> | |
59 | ||
60 | #include <asm/uaccess.h> | |
61 | ||
62 | /* Various static variables go here. Most are used only in the RAM disk code. | |
63 | */ | |
64 | ||
65 | static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT]; | |
66 | static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */ | |
67 | static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT]; | |
68 | ||
69 | /* | |
70 | * Parameters for the boot-loading of the RAM disk. These are set by | |
71 | * init/main.c (from arguments to the kernel command line) or from the | |
72 | * architecture-specific setup routine (from the stored boot sector | |
73 | * information). | |
74 | */ | |
cccf2508 | 75 | int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ |
1da177e4 LT |
76 | /* |
77 | * It would be very desirable to have a soft-blocksize (that in the case | |
78 | * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because | |
79 | * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of | |
80 | * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages | |
81 | * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only | |
82 | * 1 page will be protected. Depending on the size of the ramdisk you | |
83 | * may want to change the ramdisk blocksize to achieve a better or worse MM | |
84 | * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that | |
85 | * supposes the filesystem in the image uses a BLOCK_SIZE blocksize). | |
86 | */ | |
bef317e3 | 87 | static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE; |
1da177e4 LT |
88 | |
89 | /* | |
90 | * Copyright (C) 2000 Linus Torvalds. | |
91 | * 2000 Transmeta Corp. | |
92 | * aops copied from ramfs. | |
93 | */ | |
94 | ||
95 | /* | |
96 | * If a ramdisk page has buffers, some may be uptodate and some may be not. | |
97 | * To bring the page uptodate we zero out the non-uptodate buffers. The | |
98 | * page must be locked. | |
99 | */ | |
100 | static void make_page_uptodate(struct page *page) | |
101 | { | |
102 | if (page_has_buffers(page)) { | |
103 | struct buffer_head *bh = page_buffers(page); | |
104 | struct buffer_head *head = bh; | |
105 | ||
106 | do { | |
107 | if (!buffer_uptodate(bh)) { | |
108 | memset(bh->b_data, 0, bh->b_size); | |
109 | /* | |
110 | * akpm: I'm totally undecided about this. The | |
111 | * buffer has just been magically brought "up to | |
112 | * date", but nobody should want to be reading | |
113 | * it anyway, because it hasn't been used for | |
114 | * anything yet. It is still in a "not read | |
115 | * from disk yet" state. | |
116 | * | |
117 | * But non-uptodate buffers against an uptodate | |
118 | * page are against the rules. So do it anyway. | |
119 | */ | |
120 | set_buffer_uptodate(bh); | |
121 | } | |
122 | } while ((bh = bh->b_this_page) != head); | |
123 | } else { | |
124 | memset(page_address(page), 0, PAGE_CACHE_SIZE); | |
125 | } | |
126 | flush_dcache_page(page); | |
127 | SetPageUptodate(page); | |
128 | } | |
129 | ||
130 | static int ramdisk_readpage(struct file *file, struct page *page) | |
131 | { | |
132 | if (!PageUptodate(page)) | |
133 | make_page_uptodate(page); | |
134 | unlock_page(page); | |
135 | return 0; | |
136 | } | |
137 | ||
138 | static int ramdisk_prepare_write(struct file *file, struct page *page, | |
139 | unsigned offset, unsigned to) | |
140 | { | |
141 | if (!PageUptodate(page)) | |
142 | make_page_uptodate(page); | |
143 | return 0; | |
144 | } | |
145 | ||
146 | static int ramdisk_commit_write(struct file *file, struct page *page, | |
147 | unsigned offset, unsigned to) | |
148 | { | |
149 | set_page_dirty(page); | |
150 | return 0; | |
151 | } | |
152 | ||
153 | /* | |
59c51591 | 154 | * ->writepage to the blockdev's mapping has to redirty the page so that the |
994fc28c | 155 | * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM |
1da177e4 LT |
156 | * won't try to (pointlessly) write the page again for a while. |
157 | * | |
158 | * Really, these pages should not be on the LRU at all. | |
159 | */ | |
160 | static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) | |
161 | { | |
162 | if (!PageUptodate(page)) | |
163 | make_page_uptodate(page); | |
164 | SetPageDirty(page); | |
165 | if (wbc->for_reclaim) | |
994fc28c | 166 | return AOP_WRITEPAGE_ACTIVATE; |
1da177e4 LT |
167 | unlock_page(page); |
168 | return 0; | |
169 | } | |
170 | ||
171 | /* | |
172 | * This is a little speedup thing: short-circuit attempts to write back the | |
173 | * ramdisk blockdev inode to its non-existent backing store. | |
174 | */ | |
175 | static int ramdisk_writepages(struct address_space *mapping, | |
176 | struct writeback_control *wbc) | |
177 | { | |
178 | return 0; | |
179 | } | |
180 | ||
181 | /* | |
182 | * ramdisk blockdev pages have their own ->set_page_dirty() because we don't | |
183 | * want them to contribute to dirty memory accounting. | |
184 | */ | |
185 | static int ramdisk_set_page_dirty(struct page *page) | |
186 | { | |
4741c9fd AM |
187 | if (!TestSetPageDirty(page)) |
188 | return 1; | |
1da177e4 LT |
189 | return 0; |
190 | } | |
191 | ||
f5e54d6e | 192 | static const struct address_space_operations ramdisk_aops = { |
1da177e4 LT |
193 | .readpage = ramdisk_readpage, |
194 | .prepare_write = ramdisk_prepare_write, | |
195 | .commit_write = ramdisk_commit_write, | |
196 | .writepage = ramdisk_writepage, | |
197 | .set_page_dirty = ramdisk_set_page_dirty, | |
198 | .writepages = ramdisk_writepages, | |
199 | }; | |
200 | ||
201 | static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, | |
202 | struct address_space *mapping) | |
203 | { | |
204 | pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); | |
205 | unsigned int vec_offset = vec->bv_offset; | |
206 | int offset = (sector << 9) & ~PAGE_CACHE_MASK; | |
207 | int size = vec->bv_len; | |
208 | int err = 0; | |
209 | ||
210 | do { | |
211 | int count; | |
212 | struct page *page; | |
213 | char *src; | |
214 | char *dst; | |
215 | ||
216 | count = PAGE_CACHE_SIZE - offset; | |
217 | if (count > size) | |
218 | count = size; | |
219 | size -= count; | |
220 | ||
221 | page = grab_cache_page(mapping, index); | |
222 | if (!page) { | |
223 | err = -ENOMEM; | |
224 | goto out; | |
225 | } | |
226 | ||
227 | if (!PageUptodate(page)) | |
228 | make_page_uptodate(page); | |
229 | ||
230 | index++; | |
231 | ||
232 | if (rw == READ) { | |
233 | src = kmap_atomic(page, KM_USER0) + offset; | |
234 | dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; | |
235 | } else { | |
236 | src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; | |
237 | dst = kmap_atomic(page, KM_USER1) + offset; | |
238 | } | |
239 | offset = 0; | |
240 | vec_offset += count; | |
241 | ||
242 | memcpy(dst, src, count); | |
243 | ||
244 | kunmap_atomic(src, KM_USER0); | |
245 | kunmap_atomic(dst, KM_USER1); | |
246 | ||
247 | if (rw == READ) | |
248 | flush_dcache_page(vec->bv_page); | |
249 | else | |
250 | set_page_dirty(page); | |
251 | unlock_page(page); | |
252 | put_page(page); | |
253 | } while (size); | |
254 | ||
255 | out: | |
256 | return err; | |
257 | } | |
258 | ||
259 | /* | |
260 | * Basically, my strategy here is to set up a buffer-head which can't be | |
261 | * deleted, and make that my Ramdisk. If the request is outside of the | |
262 | * allocated size, we must get rid of it... | |
263 | * | |
264 | * 19-JAN-1998 Richard Gooch <rgooch@atnf.csiro.au> Added devfs support | |
265 | * | |
266 | */ | |
165125e1 | 267 | static int rd_make_request(struct request_queue *q, struct bio *bio) |
1da177e4 LT |
268 | { |
269 | struct block_device *bdev = bio->bi_bdev; | |
270 | struct address_space * mapping = bdev->bd_inode->i_mapping; | |
271 | sector_t sector = bio->bi_sector; | |
272 | unsigned long len = bio->bi_size >> 9; | |
273 | int rw = bio_data_dir(bio); | |
274 | struct bio_vec *bvec; | |
275 | int ret = 0, i; | |
276 | ||
277 | if (sector + len > get_capacity(bdev->bd_disk)) | |
278 | goto fail; | |
279 | ||
280 | if (rw==READA) | |
281 | rw=READ; | |
282 | ||
283 | bio_for_each_segment(bvec, bio, i) { | |
284 | ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping); | |
285 | sector += bvec->bv_len >> 9; | |
286 | } | |
287 | if (ret) | |
288 | goto fail; | |
289 | ||
290 | bio_endio(bio, bio->bi_size, 0); | |
291 | return 0; | |
292 | fail: | |
293 | bio_io_error(bio, bio->bi_size); | |
294 | return 0; | |
295 | } | |
296 | ||
297 | static int rd_ioctl(struct inode *inode, struct file *file, | |
298 | unsigned int cmd, unsigned long arg) | |
299 | { | |
300 | int error; | |
301 | struct block_device *bdev = inode->i_bdev; | |
302 | ||
303 | if (cmd != BLKFLSBUF) | |
304 | return -ENOTTY; | |
305 | ||
306 | /* | |
307 | * special: we want to release the ramdisk memory, it's not like with | |
308 | * the other blockdevices where this ioctl only flushes away the buffer | |
309 | * cache | |
310 | */ | |
311 | error = -EBUSY; | |
c039e313 | 312 | mutex_lock(&bdev->bd_mutex); |
1da177e4 LT |
313 | if (bdev->bd_openers <= 2) { |
314 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | |
315 | error = 0; | |
316 | } | |
c039e313 | 317 | mutex_unlock(&bdev->bd_mutex); |
1da177e4 LT |
318 | return error; |
319 | } | |
320 | ||
321 | /* | |
322 | * This is the backing_dev_info for the blockdev inode itself. It doesn't need | |
323 | * writeback and it does not contribute to dirty memory accounting. | |
324 | */ | |
325 | static struct backing_dev_info rd_backing_dev_info = { | |
326 | .ra_pages = 0, /* No readahead */ | |
327 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY, | |
328 | .unplug_io_fn = default_unplug_io_fn, | |
329 | }; | |
330 | ||
331 | /* | |
332 | * This is the backing_dev_info for the files which live atop the ramdisk | |
333 | * "device". These files do need writeback and they do contribute to dirty | |
334 | * memory accounting. | |
335 | */ | |
336 | static struct backing_dev_info rd_file_backing_dev_info = { | |
337 | .ra_pages = 0, /* No readahead */ | |
338 | .capabilities = BDI_CAP_MAP_COPY, /* Does contribute to dirty memory */ | |
339 | .unplug_io_fn = default_unplug_io_fn, | |
340 | }; | |
341 | ||
342 | static int rd_open(struct inode *inode, struct file *filp) | |
343 | { | |
344 | unsigned unit = iminor(inode); | |
345 | ||
346 | if (rd_bdev[unit] == NULL) { | |
347 | struct block_device *bdev = inode->i_bdev; | |
348 | struct address_space *mapping; | |
349 | unsigned bsize; | |
b4e3ca1a | 350 | gfp_t gfp_mask; |
1da177e4 LT |
351 | |
352 | inode = igrab(bdev->bd_inode); | |
353 | rd_bdev[unit] = bdev; | |
354 | bdev->bd_openers++; | |
355 | bsize = bdev_hardsect_size(bdev); | |
356 | bdev->bd_block_size = bsize; | |
357 | inode->i_blkbits = blksize_bits(bsize); | |
358 | inode->i_size = get_capacity(bdev->bd_disk)<<9; | |
359 | ||
360 | mapping = inode->i_mapping; | |
361 | mapping->a_ops = &ramdisk_aops; | |
362 | mapping->backing_dev_info = &rd_backing_dev_info; | |
363 | bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info; | |
364 | ||
365 | /* | |
366 | * Deep badness. rd_blkdev_pagecache_IO() needs to allocate | |
367 | * pagecache pages within a request_fn. We cannot recur back | |
368 | * into the filesytem which is mounted atop the ramdisk, because | |
369 | * that would deadlock on fs locks. And we really don't want | |
370 | * to reenter rd_blkdev_pagecache_IO when we're already within | |
371 | * that function. | |
372 | * | |
373 | * So we turn off __GFP_FS and __GFP_IO. | |
374 | * | |
375 | * And to give this thing a hope of working, turn on __GFP_HIGH. | |
376 | * Hopefully, there's enough regular memory allocation going on | |
377 | * for the page allocator emergency pools to keep the ramdisk | |
378 | * driver happy. | |
379 | */ | |
380 | gfp_mask = mapping_gfp_mask(mapping); | |
381 | gfp_mask &= ~(__GFP_FS|__GFP_IO); | |
382 | gfp_mask |= __GFP_HIGH; | |
383 | mapping_set_gfp_mask(mapping, gfp_mask); | |
384 | } | |
385 | ||
386 | return 0; | |
387 | } | |
388 | ||
389 | static struct block_device_operations rd_bd_op = { | |
390 | .owner = THIS_MODULE, | |
391 | .open = rd_open, | |
392 | .ioctl = rd_ioctl, | |
393 | }; | |
394 | ||
395 | /* | |
396 | * Before freeing the module, invalidate all of the protected buffers! | |
397 | */ | |
398 | static void __exit rd_cleanup(void) | |
399 | { | |
400 | int i; | |
401 | ||
402 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
403 | struct block_device *bdev = rd_bdev[i]; | |
404 | rd_bdev[i] = NULL; | |
405 | if (bdev) { | |
f98393a6 | 406 | invalidate_bdev(bdev); |
1da177e4 LT |
407 | blkdev_put(bdev); |
408 | } | |
409 | del_gendisk(rd_disks[i]); | |
410 | put_disk(rd_disks[i]); | |
411 | blk_cleanup_queue(rd_queue[i]); | |
412 | } | |
1da177e4 LT |
413 | unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); |
414 | } | |
415 | ||
416 | /* | |
417 | * This is the registration and initialization section of the RAM disk driver | |
418 | */ | |
419 | static int __init rd_init(void) | |
420 | { | |
421 | int i; | |
422 | int err = -ENOMEM; | |
423 | ||
424 | if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 || | |
425 | (rd_blocksize & (rd_blocksize-1))) { | |
426 | printk("RAMDISK: wrong blocksize %d, reverting to defaults\n", | |
427 | rd_blocksize); | |
428 | rd_blocksize = BLOCK_SIZE; | |
429 | } | |
430 | ||
431 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { | |
432 | rd_disks[i] = alloc_disk(1); | |
433 | if (!rd_disks[i]) | |
434 | goto out; | |
ea6f94df AM |
435 | |
436 | rd_queue[i] = blk_alloc_queue(GFP_KERNEL); | |
437 | if (!rd_queue[i]) { | |
438 | put_disk(rd_disks[i]); | |
439 | goto out; | |
440 | } | |
1da177e4 LT |
441 | } |
442 | ||
443 | if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) { | |
444 | err = -EIO; | |
445 | goto out; | |
446 | } | |
447 | ||
1da177e4 LT |
448 | for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) { |
449 | struct gendisk *disk = rd_disks[i]; | |
450 | ||
1da177e4 LT |
451 | blk_queue_make_request(rd_queue[i], &rd_make_request); |
452 | blk_queue_hardsect_size(rd_queue[i], rd_blocksize); | |
453 | ||
454 | /* rd_size is given in kB */ | |
455 | disk->major = RAMDISK_MAJOR; | |
456 | disk->first_minor = i; | |
457 | disk->fops = &rd_bd_op; | |
458 | disk->queue = rd_queue[i]; | |
459 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | |
460 | sprintf(disk->disk_name, "ram%d", i); | |
1da177e4 LT |
461 | set_capacity(disk, rd_size * 2); |
462 | add_disk(rd_disks[i]); | |
463 | } | |
464 | ||
465 | /* rd_size is given in kB */ | |
466 | printk("RAMDISK driver initialized: " | |
467 | "%d RAM disks of %dK size %d blocksize\n", | |
468 | CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize); | |
469 | ||
470 | return 0; | |
1da177e4 LT |
471 | out: |
472 | while (i--) { | |
473 | put_disk(rd_disks[i]); | |
474 | blk_cleanup_queue(rd_queue[i]); | |
475 | } | |
476 | return err; | |
477 | } | |
478 | ||
479 | module_init(rd_init); | |
480 | module_exit(rd_cleanup); | |
481 | ||
482 | /* options - nonmodular */ | |
483 | #ifndef MODULE | |
484 | static int __init ramdisk_size(char *str) | |
485 | { | |
486 | rd_size = simple_strtol(str,NULL,0); | |
487 | return 1; | |
488 | } | |
489 | static int __init ramdisk_size2(char *str) /* kludge */ | |
490 | { | |
491 | return ramdisk_size(str); | |
492 | } | |
493 | static int __init ramdisk_blocksize(char *str) | |
494 | { | |
495 | rd_blocksize = simple_strtol(str,NULL,0); | |
496 | return 1; | |
497 | } | |
498 | __setup("ramdisk=", ramdisk_size); | |
499 | __setup("ramdisk_size=", ramdisk_size2); | |
500 | __setup("ramdisk_blocksize=", ramdisk_blocksize); | |
501 | #endif | |
502 | ||
503 | /* options - modular */ | |
504 | module_param(rd_size, int, 0); | |
505 | MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); | |
506 | module_param(rd_blocksize, int, 0); | |
507 | MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes."); | |
508 | MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); | |
509 | ||
510 | MODULE_LICENSE("GPL"); |