Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
25 | #include <linux/genhd.h> | |
26 | #include <linux/highmem.h> | |
5a0e3ad6 | 27 | #include <linux/slab.h> |
b09ab054 | 28 | #include <linux/backing-dev.h> |
306b0c95 | 29 | #include <linux/string.h> |
306b0c95 | 30 | #include <linux/vmalloc.h> |
fcfa8d95 | 31 | #include <linux/err.h> |
85508ec6 | 32 | #include <linux/idr.h> |
6566d1a3 | 33 | #include <linux/sysfs.h> |
c0265342 | 34 | #include <linux/debugfs.h> |
1dd6c834 | 35 | #include <linux/cpuhotplug.h> |
306b0c95 | 36 | |
16a4bfb9 | 37 | #include "zram_drv.h" |
306b0c95 | 38 | |
85508ec6 | 39 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
40 | /* idr index must be protected */ |
41 | static DEFINE_MUTEX(zram_index_mutex); | |
42 | ||
f1e3cfff | 43 | static int zram_major; |
b7ca232e | 44 | static const char *default_compressor = "lzo"; |
306b0c95 | 45 | |
306b0c95 | 46 | /* Module params (documentation at end) */ |
ca3d70bd | 47 | static unsigned int num_devices = 1; |
60f5921a SS |
48 | /* |
49 | * Pages that compress to sizes equals or greater than this are stored | |
50 | * uncompressed in memory. | |
51 | */ | |
52 | static size_t huge_class_size; | |
33863c21 | 53 | |
1f7319c7 MK |
54 | static void zram_free_page(struct zram *zram, size_t index); |
55 | ||
c4d6c4cc MK |
56 | static void zram_slot_lock(struct zram *zram, u32 index) |
57 | { | |
58 | bit_spin_lock(ZRAM_LOCK, &zram->table[index].value); | |
59 | } | |
60 | ||
61 | static void zram_slot_unlock(struct zram *zram, u32 index) | |
62 | { | |
63 | bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value); | |
64 | } | |
65 | ||
08eee69f | 66 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 67 | { |
08eee69f | 68 | return zram->disksize; |
be2d1d56 SS |
69 | } |
70 | ||
c0265342 MK |
71 | static inline bool zram_allocated(struct zram *zram, u32 index) |
72 | { | |
73 | ||
74 | return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) || | |
75 | zram->table[index].handle; | |
76 | } | |
77 | ||
9b3bb7ab SS |
78 | static inline struct zram *dev_to_zram(struct device *dev) |
79 | { | |
80 | return (struct zram *)dev_to_disk(dev)->private_data; | |
81 | } | |
82 | ||
643ae61d MK |
83 | static unsigned long zram_get_handle(struct zram *zram, u32 index) |
84 | { | |
85 | return zram->table[index].handle; | |
86 | } | |
87 | ||
88 | static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) | |
89 | { | |
90 | zram->table[index].handle = handle; | |
91 | } | |
92 | ||
b31177f2 | 93 | /* flag operations require table entry bit_spin_lock() being held */ |
c0265342 | 94 | static bool zram_test_flag(struct zram *zram, u32 index, |
522698d7 | 95 | enum zram_pageflags flag) |
99ebbd30 | 96 | { |
beb6602c | 97 | return zram->table[index].value & BIT(flag); |
522698d7 | 98 | } |
99ebbd30 | 99 | |
beb6602c | 100 | static void zram_set_flag(struct zram *zram, u32 index, |
522698d7 SS |
101 | enum zram_pageflags flag) |
102 | { | |
beb6602c | 103 | zram->table[index].value |= BIT(flag); |
522698d7 | 104 | } |
99ebbd30 | 105 | |
beb6602c | 106 | static void zram_clear_flag(struct zram *zram, u32 index, |
522698d7 SS |
107 | enum zram_pageflags flag) |
108 | { | |
beb6602c | 109 | zram->table[index].value &= ~BIT(flag); |
522698d7 | 110 | } |
99ebbd30 | 111 | |
beb6602c | 112 | static inline void zram_set_element(struct zram *zram, u32 index, |
8e19d540 | 113 | unsigned long element) |
114 | { | |
beb6602c | 115 | zram->table[index].element = element; |
8e19d540 | 116 | } |
117 | ||
643ae61d | 118 | static unsigned long zram_get_element(struct zram *zram, u32 index) |
8e19d540 | 119 | { |
643ae61d | 120 | return zram->table[index].element; |
8e19d540 | 121 | } |
122 | ||
beb6602c | 123 | static size_t zram_get_obj_size(struct zram *zram, u32 index) |
522698d7 | 124 | { |
beb6602c | 125 | return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); |
99ebbd30 AM |
126 | } |
127 | ||
beb6602c | 128 | static void zram_set_obj_size(struct zram *zram, |
522698d7 | 129 | u32 index, size_t size) |
9b3bb7ab | 130 | { |
beb6602c | 131 | unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 132 | |
beb6602c | 133 | zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; |
522698d7 SS |
134 | } |
135 | ||
1f7319c7 | 136 | #if PAGE_SIZE != 4096 |
1c53e0d2 | 137 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
138 | { |
139 | return bvec->bv_len != PAGE_SIZE; | |
140 | } | |
1f7319c7 MK |
141 | #else |
142 | static inline bool is_partial_io(struct bio_vec *bvec) | |
143 | { | |
144 | return false; | |
145 | } | |
146 | #endif | |
522698d7 SS |
147 | |
148 | /* | |
149 | * Check if request is within bounds and aligned on zram logical blocks. | |
150 | */ | |
1c53e0d2 | 151 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
152 | sector_t start, unsigned int size) |
153 | { | |
154 | u64 end, bound; | |
155 | ||
156 | /* unaligned request */ | |
157 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 158 | return false; |
522698d7 | 159 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 160 | return false; |
522698d7 SS |
161 | |
162 | end = start + (size >> SECTOR_SHIFT); | |
163 | bound = zram->disksize >> SECTOR_SHIFT; | |
164 | /* out of range range */ | |
165 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 166 | return false; |
522698d7 SS |
167 | |
168 | /* I/O request is valid */ | |
1c53e0d2 | 169 | return true; |
522698d7 SS |
170 | } |
171 | ||
172 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
173 | { | |
e86942c7 | 174 | *index += (*offset + bvec->bv_len) / PAGE_SIZE; |
522698d7 SS |
175 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; |
176 | } | |
177 | ||
178 | static inline void update_used_max(struct zram *zram, | |
179 | const unsigned long pages) | |
180 | { | |
181 | unsigned long old_max, cur_max; | |
182 | ||
183 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
184 | ||
185 | do { | |
186 | cur_max = old_max; | |
187 | if (pages > cur_max) | |
188 | old_max = atomic_long_cmpxchg( | |
189 | &zram->stats.max_used_pages, cur_max, pages); | |
190 | } while (old_max != cur_max); | |
191 | } | |
192 | ||
48ad1abe | 193 | static inline void zram_fill_page(void *ptr, unsigned long len, |
8e19d540 | 194 | unsigned long value) |
195 | { | |
8e19d540 | 196 | WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); |
48ad1abe | 197 | memset_l(ptr, value, len / sizeof(unsigned long)); |
8e19d540 | 198 | } |
199 | ||
200 | static bool page_same_filled(void *ptr, unsigned long *element) | |
522698d7 SS |
201 | { |
202 | unsigned int pos; | |
203 | unsigned long *page; | |
f0fe9984 | 204 | unsigned long val; |
522698d7 SS |
205 | |
206 | page = (unsigned long *)ptr; | |
f0fe9984 | 207 | val = page[0]; |
522698d7 | 208 | |
f0fe9984 SP |
209 | for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { |
210 | if (val != page[pos]) | |
1c53e0d2 | 211 | return false; |
522698d7 SS |
212 | } |
213 | ||
f0fe9984 | 214 | *element = val; |
8e19d540 | 215 | |
1c53e0d2 | 216 | return true; |
522698d7 SS |
217 | } |
218 | ||
9b3bb7ab SS |
219 | static ssize_t initstate_show(struct device *dev, |
220 | struct device_attribute *attr, char *buf) | |
221 | { | |
a68eb3b6 | 222 | u32 val; |
9b3bb7ab SS |
223 | struct zram *zram = dev_to_zram(dev); |
224 | ||
a68eb3b6 SS |
225 | down_read(&zram->init_lock); |
226 | val = init_done(zram); | |
227 | up_read(&zram->init_lock); | |
9b3bb7ab | 228 | |
56b4e8cb | 229 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
230 | } |
231 | ||
522698d7 SS |
232 | static ssize_t disksize_show(struct device *dev, |
233 | struct device_attribute *attr, char *buf) | |
234 | { | |
235 | struct zram *zram = dev_to_zram(dev); | |
236 | ||
237 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
238 | } | |
239 | ||
9ada9da9 MK |
240 | static ssize_t mem_limit_store(struct device *dev, |
241 | struct device_attribute *attr, const char *buf, size_t len) | |
242 | { | |
243 | u64 limit; | |
244 | char *tmp; | |
245 | struct zram *zram = dev_to_zram(dev); | |
246 | ||
247 | limit = memparse(buf, &tmp); | |
248 | if (buf == tmp) /* no chars parsed, invalid input */ | |
249 | return -EINVAL; | |
250 | ||
251 | down_write(&zram->init_lock); | |
252 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
253 | up_write(&zram->init_lock); | |
254 | ||
255 | return len; | |
256 | } | |
257 | ||
461a8eee MK |
258 | static ssize_t mem_used_max_store(struct device *dev, |
259 | struct device_attribute *attr, const char *buf, size_t len) | |
260 | { | |
261 | int err; | |
262 | unsigned long val; | |
263 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
264 | |
265 | err = kstrtoul(buf, 10, &val); | |
266 | if (err || val != 0) | |
267 | return -EINVAL; | |
268 | ||
269 | down_read(&zram->init_lock); | |
5a99e95b | 270 | if (init_done(zram)) { |
461a8eee | 271 | atomic_long_set(&zram->stats.max_used_pages, |
beb6602c | 272 | zs_get_total_pages(zram->mem_pool)); |
5a99e95b | 273 | } |
461a8eee MK |
274 | up_read(&zram->init_lock); |
275 | ||
276 | return len; | |
277 | } | |
278 | ||
013bf95a MK |
279 | #ifdef CONFIG_ZRAM_WRITEBACK |
280 | static bool zram_wb_enabled(struct zram *zram) | |
281 | { | |
282 | return zram->backing_dev; | |
283 | } | |
284 | ||
285 | static void reset_bdev(struct zram *zram) | |
286 | { | |
287 | struct block_device *bdev; | |
288 | ||
289 | if (!zram_wb_enabled(zram)) | |
290 | return; | |
291 | ||
292 | bdev = zram->bdev; | |
293 | if (zram->old_block_size) | |
294 | set_blocksize(bdev, zram->old_block_size); | |
295 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
296 | /* hope filp_close flush all of IO */ | |
297 | filp_close(zram->backing_dev, NULL); | |
298 | zram->backing_dev = NULL; | |
299 | zram->old_block_size = 0; | |
300 | zram->bdev = NULL; | |
4f7a7bea MK |
301 | zram->disk->queue->backing_dev_info->capabilities |= |
302 | BDI_CAP_SYNCHRONOUS_IO; | |
1363d466 MK |
303 | kvfree(zram->bitmap); |
304 | zram->bitmap = NULL; | |
013bf95a MK |
305 | } |
306 | ||
307 | static ssize_t backing_dev_show(struct device *dev, | |
308 | struct device_attribute *attr, char *buf) | |
309 | { | |
310 | struct zram *zram = dev_to_zram(dev); | |
311 | struct file *file = zram->backing_dev; | |
312 | char *p; | |
313 | ssize_t ret; | |
314 | ||
315 | down_read(&zram->init_lock); | |
316 | if (!zram_wb_enabled(zram)) { | |
317 | memcpy(buf, "none\n", 5); | |
318 | up_read(&zram->init_lock); | |
319 | return 5; | |
320 | } | |
321 | ||
322 | p = file_path(file, buf, PAGE_SIZE - 1); | |
323 | if (IS_ERR(p)) { | |
324 | ret = PTR_ERR(p); | |
325 | goto out; | |
326 | } | |
327 | ||
328 | ret = strlen(p); | |
329 | memmove(buf, p, ret); | |
330 | buf[ret++] = '\n'; | |
331 | out: | |
332 | up_read(&zram->init_lock); | |
333 | return ret; | |
334 | } | |
335 | ||
336 | static ssize_t backing_dev_store(struct device *dev, | |
337 | struct device_attribute *attr, const char *buf, size_t len) | |
338 | { | |
339 | char *file_name; | |
c8bd134a | 340 | size_t sz; |
013bf95a MK |
341 | struct file *backing_dev = NULL; |
342 | struct inode *inode; | |
343 | struct address_space *mapping; | |
1363d466 MK |
344 | unsigned int bitmap_sz, old_block_size = 0; |
345 | unsigned long nr_pages, *bitmap = NULL; | |
013bf95a MK |
346 | struct block_device *bdev = NULL; |
347 | int err; | |
348 | struct zram *zram = dev_to_zram(dev); | |
349 | ||
350 | file_name = kmalloc(PATH_MAX, GFP_KERNEL); | |
351 | if (!file_name) | |
352 | return -ENOMEM; | |
353 | ||
354 | down_write(&zram->init_lock); | |
355 | if (init_done(zram)) { | |
356 | pr_info("Can't setup backing device for initialized device\n"); | |
357 | err = -EBUSY; | |
358 | goto out; | |
359 | } | |
360 | ||
c8bd134a PK |
361 | strlcpy(file_name, buf, PATH_MAX); |
362 | /* ignore trailing newline */ | |
363 | sz = strlen(file_name); | |
364 | if (sz > 0 && file_name[sz - 1] == '\n') | |
365 | file_name[sz - 1] = 0x00; | |
013bf95a MK |
366 | |
367 | backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); | |
368 | if (IS_ERR(backing_dev)) { | |
369 | err = PTR_ERR(backing_dev); | |
370 | backing_dev = NULL; | |
371 | goto out; | |
372 | } | |
373 | ||
374 | mapping = backing_dev->f_mapping; | |
375 | inode = mapping->host; | |
376 | ||
377 | /* Support only block device in this moment */ | |
378 | if (!S_ISBLK(inode->i_mode)) { | |
379 | err = -ENOTBLK; | |
380 | goto out; | |
381 | } | |
382 | ||
383 | bdev = bdgrab(I_BDEV(inode)); | |
384 | err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); | |
385 | if (err < 0) | |
386 | goto out; | |
387 | ||
1363d466 MK |
388 | nr_pages = i_size_read(inode) >> PAGE_SHIFT; |
389 | bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); | |
390 | bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); | |
391 | if (!bitmap) { | |
392 | err = -ENOMEM; | |
393 | goto out; | |
394 | } | |
395 | ||
013bf95a MK |
396 | old_block_size = block_size(bdev); |
397 | err = set_blocksize(bdev, PAGE_SIZE); | |
398 | if (err) | |
399 | goto out; | |
400 | ||
401 | reset_bdev(zram); | |
1363d466 | 402 | spin_lock_init(&zram->bitmap_lock); |
013bf95a MK |
403 | |
404 | zram->old_block_size = old_block_size; | |
405 | zram->bdev = bdev; | |
406 | zram->backing_dev = backing_dev; | |
1363d466 MK |
407 | zram->bitmap = bitmap; |
408 | zram->nr_pages = nr_pages; | |
4f7a7bea MK |
409 | /* |
410 | * With writeback feature, zram does asynchronous IO so it's no longer | |
411 | * synchronous device so let's remove synchronous io flag. Othewise, | |
412 | * upper layer(e.g., swap) could wait IO completion rather than | |
413 | * (submit and return), which will cause system sluggish. | |
414 | * Furthermore, when the IO function returns(e.g., swap_readpage), | |
415 | * upper layer expects IO was done so it could deallocate the page | |
416 | * freely but in fact, IO is going on so finally could cause | |
417 | * use-after-free when the IO is really done. | |
418 | */ | |
419 | zram->disk->queue->backing_dev_info->capabilities &= | |
420 | ~BDI_CAP_SYNCHRONOUS_IO; | |
013bf95a MK |
421 | up_write(&zram->init_lock); |
422 | ||
423 | pr_info("setup backing device %s\n", file_name); | |
424 | kfree(file_name); | |
425 | ||
426 | return len; | |
427 | out: | |
1363d466 MK |
428 | if (bitmap) |
429 | kvfree(bitmap); | |
430 | ||
013bf95a MK |
431 | if (bdev) |
432 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
433 | ||
434 | if (backing_dev) | |
435 | filp_close(backing_dev, NULL); | |
436 | ||
437 | up_write(&zram->init_lock); | |
438 | ||
439 | kfree(file_name); | |
440 | ||
441 | return err; | |
442 | } | |
443 | ||
1363d466 MK |
444 | static unsigned long get_entry_bdev(struct zram *zram) |
445 | { | |
446 | unsigned long entry; | |
447 | ||
448 | spin_lock(&zram->bitmap_lock); | |
449 | /* skip 0 bit to confuse zram.handle = 0 */ | |
450 | entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); | |
451 | if (entry == zram->nr_pages) { | |
452 | spin_unlock(&zram->bitmap_lock); | |
453 | return 0; | |
454 | } | |
455 | ||
456 | set_bit(entry, zram->bitmap); | |
457 | spin_unlock(&zram->bitmap_lock); | |
458 | ||
459 | return entry; | |
460 | } | |
461 | ||
462 | static void put_entry_bdev(struct zram *zram, unsigned long entry) | |
463 | { | |
464 | int was_set; | |
465 | ||
466 | spin_lock(&zram->bitmap_lock); | |
467 | was_set = test_and_clear_bit(entry, zram->bitmap); | |
468 | spin_unlock(&zram->bitmap_lock); | |
469 | WARN_ON_ONCE(!was_set); | |
470 | } | |
471 | ||
384bc41f | 472 | static void zram_page_end_io(struct bio *bio) |
db8ffbd4 | 473 | { |
263663cd | 474 | struct page *page = bio_first_page_all(bio); |
db8ffbd4 MK |
475 | |
476 | page_endio(page, op_is_write(bio_op(bio)), | |
477 | blk_status_to_errno(bio->bi_status)); | |
478 | bio_put(bio); | |
479 | } | |
480 | ||
8e654f8f MK |
481 | /* |
482 | * Returns 1 if the submission is successful. | |
483 | */ | |
484 | static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, | |
485 | unsigned long entry, struct bio *parent) | |
486 | { | |
487 | struct bio *bio; | |
488 | ||
489 | bio = bio_alloc(GFP_ATOMIC, 1); | |
490 | if (!bio) | |
491 | return -ENOMEM; | |
492 | ||
493 | bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); | |
a0725ab0 | 494 | bio_set_dev(bio, zram->bdev); |
8e654f8f MK |
495 | if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { |
496 | bio_put(bio); | |
497 | return -EIO; | |
498 | } | |
499 | ||
500 | if (!parent) { | |
501 | bio->bi_opf = REQ_OP_READ; | |
502 | bio->bi_end_io = zram_page_end_io; | |
503 | } else { | |
504 | bio->bi_opf = parent->bi_opf; | |
505 | bio_chain(bio, parent); | |
506 | } | |
507 | ||
508 | submit_bio(bio); | |
509 | return 1; | |
510 | } | |
511 | ||
512 | struct zram_work { | |
513 | struct work_struct work; | |
514 | struct zram *zram; | |
515 | unsigned long entry; | |
516 | struct bio *bio; | |
517 | }; | |
518 | ||
519 | #if PAGE_SIZE != 4096 | |
520 | static void zram_sync_read(struct work_struct *work) | |
521 | { | |
522 | struct bio_vec bvec; | |
523 | struct zram_work *zw = container_of(work, struct zram_work, work); | |
524 | struct zram *zram = zw->zram; | |
525 | unsigned long entry = zw->entry; | |
526 | struct bio *bio = zw->bio; | |
527 | ||
528 | read_from_bdev_async(zram, &bvec, entry, bio); | |
529 | } | |
530 | ||
531 | /* | |
532 | * Block layer want one ->make_request_fn to be active at a time | |
533 | * so if we use chained IO with parent IO in same context, | |
534 | * it's a deadlock. To avoid, it, it uses worker thread context. | |
535 | */ | |
536 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
537 | unsigned long entry, struct bio *bio) | |
538 | { | |
539 | struct zram_work work; | |
540 | ||
541 | work.zram = zram; | |
542 | work.entry = entry; | |
543 | work.bio = bio; | |
544 | ||
545 | INIT_WORK_ONSTACK(&work.work, zram_sync_read); | |
546 | queue_work(system_unbound_wq, &work.work); | |
547 | flush_work(&work.work); | |
548 | destroy_work_on_stack(&work.work); | |
549 | ||
550 | return 1; | |
551 | } | |
552 | #else | |
553 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
554 | unsigned long entry, struct bio *bio) | |
555 | { | |
556 | WARN_ON(1); | |
557 | return -EIO; | |
558 | } | |
559 | #endif | |
560 | ||
561 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, | |
562 | unsigned long entry, struct bio *parent, bool sync) | |
563 | { | |
564 | if (sync) | |
565 | return read_from_bdev_sync(zram, bvec, entry, parent); | |
566 | else | |
567 | return read_from_bdev_async(zram, bvec, entry, parent); | |
568 | } | |
569 | ||
db8ffbd4 MK |
570 | static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, |
571 | u32 index, struct bio *parent, | |
572 | unsigned long *pentry) | |
573 | { | |
574 | struct bio *bio; | |
575 | unsigned long entry; | |
576 | ||
577 | bio = bio_alloc(GFP_ATOMIC, 1); | |
578 | if (!bio) | |
579 | return -ENOMEM; | |
580 | ||
581 | entry = get_entry_bdev(zram); | |
582 | if (!entry) { | |
583 | bio_put(bio); | |
584 | return -ENOSPC; | |
585 | } | |
586 | ||
587 | bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); | |
a0725ab0 | 588 | bio_set_dev(bio, zram->bdev); |
db8ffbd4 MK |
589 | if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, |
590 | bvec->bv_offset)) { | |
591 | bio_put(bio); | |
592 | put_entry_bdev(zram, entry); | |
593 | return -EIO; | |
594 | } | |
595 | ||
596 | if (!parent) { | |
597 | bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; | |
598 | bio->bi_end_io = zram_page_end_io; | |
599 | } else { | |
600 | bio->bi_opf = parent->bi_opf; | |
601 | bio_chain(bio, parent); | |
602 | } | |
603 | ||
604 | submit_bio(bio); | |
605 | *pentry = entry; | |
606 | ||
607 | return 0; | |
608 | } | |
609 | ||
610 | static void zram_wb_clear(struct zram *zram, u32 index) | |
611 | { | |
612 | unsigned long entry; | |
613 | ||
614 | zram_clear_flag(zram, index, ZRAM_WB); | |
615 | entry = zram_get_element(zram, index); | |
616 | zram_set_element(zram, index, 0); | |
617 | put_entry_bdev(zram, entry); | |
618 | } | |
619 | ||
013bf95a MK |
620 | #else |
621 | static bool zram_wb_enabled(struct zram *zram) { return false; } | |
622 | static inline void reset_bdev(struct zram *zram) {}; | |
db8ffbd4 MK |
623 | static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, |
624 | u32 index, struct bio *parent, | |
625 | unsigned long *pentry) | |
626 | ||
627 | { | |
628 | return -EIO; | |
629 | } | |
8e654f8f MK |
630 | |
631 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, | |
632 | unsigned long entry, struct bio *parent, bool sync) | |
633 | { | |
634 | return -EIO; | |
635 | } | |
db8ffbd4 | 636 | static void zram_wb_clear(struct zram *zram, u32 index) {} |
013bf95a MK |
637 | #endif |
638 | ||
c0265342 MK |
639 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
640 | ||
641 | static struct dentry *zram_debugfs_root; | |
642 | ||
643 | static void zram_debugfs_create(void) | |
644 | { | |
645 | zram_debugfs_root = debugfs_create_dir("zram", NULL); | |
646 | } | |
647 | ||
648 | static void zram_debugfs_destroy(void) | |
649 | { | |
650 | debugfs_remove_recursive(zram_debugfs_root); | |
651 | } | |
652 | ||
653 | static void zram_accessed(struct zram *zram, u32 index) | |
654 | { | |
655 | zram->table[index].ac_time = ktime_get_boottime(); | |
656 | } | |
657 | ||
658 | static void zram_reset_access(struct zram *zram, u32 index) | |
659 | { | |
660 | zram->table[index].ac_time = 0; | |
661 | } | |
662 | ||
663 | static ssize_t read_block_state(struct file *file, char __user *buf, | |
664 | size_t count, loff_t *ppos) | |
665 | { | |
666 | char *kbuf; | |
667 | ssize_t index, written = 0; | |
668 | struct zram *zram = file->private_data; | |
669 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
670 | struct timespec64 ts; | |
671 | ||
672 | kbuf = kvmalloc(count, GFP_KERNEL); | |
673 | if (!kbuf) | |
674 | return -ENOMEM; | |
675 | ||
676 | down_read(&zram->init_lock); | |
677 | if (!init_done(zram)) { | |
678 | up_read(&zram->init_lock); | |
679 | kvfree(kbuf); | |
680 | return -EINVAL; | |
681 | } | |
682 | ||
683 | for (index = *ppos; index < nr_pages; index++) { | |
684 | int copied; | |
685 | ||
686 | zram_slot_lock(zram, index); | |
687 | if (!zram_allocated(zram, index)) | |
688 | goto next; | |
689 | ||
690 | ts = ktime_to_timespec64(zram->table[index].ac_time); | |
691 | copied = snprintf(kbuf + written, count, | |
692 | "%12zd %12lld.%06lu %c%c%c\n", | |
693 | index, (s64)ts.tv_sec, | |
694 | ts.tv_nsec / NSEC_PER_USEC, | |
695 | zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', | |
696 | zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', | |
697 | zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.'); | |
698 | ||
699 | if (count < copied) { | |
700 | zram_slot_unlock(zram, index); | |
701 | break; | |
702 | } | |
703 | written += copied; | |
704 | count -= copied; | |
705 | next: | |
706 | zram_slot_unlock(zram, index); | |
707 | *ppos += 1; | |
708 | } | |
709 | ||
710 | up_read(&zram->init_lock); | |
711 | if (copy_to_user(buf, kbuf, written)) | |
712 | written = -EFAULT; | |
713 | kvfree(kbuf); | |
714 | ||
715 | return written; | |
716 | } | |
717 | ||
718 | static const struct file_operations proc_zram_block_state_op = { | |
719 | .open = simple_open, | |
720 | .read = read_block_state, | |
721 | .llseek = default_llseek, | |
722 | }; | |
723 | ||
724 | static void zram_debugfs_register(struct zram *zram) | |
725 | { | |
726 | if (!zram_debugfs_root) | |
727 | return; | |
728 | ||
729 | zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, | |
730 | zram_debugfs_root); | |
731 | debugfs_create_file("block_state", 0400, zram->debugfs_dir, | |
732 | zram, &proc_zram_block_state_op); | |
733 | } | |
734 | ||
735 | static void zram_debugfs_unregister(struct zram *zram) | |
736 | { | |
737 | debugfs_remove_recursive(zram->debugfs_dir); | |
738 | } | |
739 | #else | |
740 | static void zram_debugfs_create(void) {}; | |
741 | static void zram_debugfs_destroy(void) {}; | |
742 | static void zram_accessed(struct zram *zram, u32 index) {}; | |
743 | static void zram_reset_access(struct zram *zram, u32 index) {}; | |
744 | static void zram_debugfs_register(struct zram *zram) {}; | |
745 | static void zram_debugfs_unregister(struct zram *zram) {}; | |
746 | #endif | |
013bf95a | 747 | |
43209ea2 SS |
748 | /* |
749 | * We switched to per-cpu streams and this attr is not needed anymore. | |
750 | * However, we will keep it around for some time, because: | |
751 | * a) we may revert per-cpu streams in the future | |
752 | * b) it's visible to user space and we need to follow our 2 years | |
753 | * retirement rule; but we already have a number of 'soon to be | |
754 | * altered' attrs, so max_comp_streams need to wait for the next | |
755 | * layoff cycle. | |
756 | */ | |
522698d7 SS |
757 | static ssize_t max_comp_streams_show(struct device *dev, |
758 | struct device_attribute *attr, char *buf) | |
759 | { | |
43209ea2 | 760 | return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); |
522698d7 SS |
761 | } |
762 | ||
beca3ec7 SS |
763 | static ssize_t max_comp_streams_store(struct device *dev, |
764 | struct device_attribute *attr, const char *buf, size_t len) | |
765 | { | |
43209ea2 | 766 | return len; |
beca3ec7 SS |
767 | } |
768 | ||
e46b8a03 SS |
769 | static ssize_t comp_algorithm_show(struct device *dev, |
770 | struct device_attribute *attr, char *buf) | |
771 | { | |
772 | size_t sz; | |
773 | struct zram *zram = dev_to_zram(dev); | |
774 | ||
775 | down_read(&zram->init_lock); | |
776 | sz = zcomp_available_show(zram->compressor, buf); | |
777 | up_read(&zram->init_lock); | |
778 | ||
779 | return sz; | |
780 | } | |
781 | ||
782 | static ssize_t comp_algorithm_store(struct device *dev, | |
783 | struct device_attribute *attr, const char *buf, size_t len) | |
784 | { | |
785 | struct zram *zram = dev_to_zram(dev); | |
f357e345 | 786 | char compressor[ARRAY_SIZE(zram->compressor)]; |
4bbacd51 SS |
787 | size_t sz; |
788 | ||
415403be SS |
789 | strlcpy(compressor, buf, sizeof(compressor)); |
790 | /* ignore trailing newline */ | |
791 | sz = strlen(compressor); | |
792 | if (sz > 0 && compressor[sz - 1] == '\n') | |
793 | compressor[sz - 1] = 0x00; | |
794 | ||
795 | if (!zcomp_available_algorithm(compressor)) | |
1d5b43bf LH |
796 | return -EINVAL; |
797 | ||
e46b8a03 SS |
798 | down_write(&zram->init_lock); |
799 | if (init_done(zram)) { | |
800 | up_write(&zram->init_lock); | |
801 | pr_info("Can't change algorithm for initialized device\n"); | |
802 | return -EBUSY; | |
803 | } | |
4bbacd51 | 804 | |
f357e345 | 805 | strcpy(zram->compressor, compressor); |
e46b8a03 SS |
806 | up_write(&zram->init_lock); |
807 | return len; | |
808 | } | |
809 | ||
522698d7 SS |
810 | static ssize_t compact_store(struct device *dev, |
811 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 812 | { |
522698d7 | 813 | struct zram *zram = dev_to_zram(dev); |
306b0c95 | 814 | |
522698d7 SS |
815 | down_read(&zram->init_lock); |
816 | if (!init_done(zram)) { | |
817 | up_read(&zram->init_lock); | |
818 | return -EINVAL; | |
819 | } | |
306b0c95 | 820 | |
beb6602c | 821 | zs_compact(zram->mem_pool); |
522698d7 | 822 | up_read(&zram->init_lock); |
d2d5e762 | 823 | |
522698d7 | 824 | return len; |
d2d5e762 WY |
825 | } |
826 | ||
522698d7 SS |
827 | static ssize_t io_stat_show(struct device *dev, |
828 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 829 | { |
522698d7 SS |
830 | struct zram *zram = dev_to_zram(dev); |
831 | ssize_t ret; | |
d2d5e762 | 832 | |
522698d7 SS |
833 | down_read(&zram->init_lock); |
834 | ret = scnprintf(buf, PAGE_SIZE, | |
835 | "%8llu %8llu %8llu %8llu\n", | |
836 | (u64)atomic64_read(&zram->stats.failed_reads), | |
837 | (u64)atomic64_read(&zram->stats.failed_writes), | |
838 | (u64)atomic64_read(&zram->stats.invalid_io), | |
839 | (u64)atomic64_read(&zram->stats.notify_free)); | |
840 | up_read(&zram->init_lock); | |
306b0c95 | 841 | |
522698d7 | 842 | return ret; |
9b3bb7ab SS |
843 | } |
844 | ||
522698d7 SS |
845 | static ssize_t mm_stat_show(struct device *dev, |
846 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 847 | { |
522698d7 | 848 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 849 | struct zs_pool_stats pool_stats; |
522698d7 SS |
850 | u64 orig_size, mem_used = 0; |
851 | long max_used; | |
852 | ssize_t ret; | |
a539c72a | 853 | |
7d3f3938 SS |
854 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
855 | ||
522698d7 | 856 | down_read(&zram->init_lock); |
7d3f3938 | 857 | if (init_done(zram)) { |
beb6602c MK |
858 | mem_used = zs_get_total_pages(zram->mem_pool); |
859 | zs_pool_stats(zram->mem_pool, &pool_stats); | |
7d3f3938 | 860 | } |
9b3bb7ab | 861 | |
522698d7 SS |
862 | orig_size = atomic64_read(&zram->stats.pages_stored); |
863 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 864 | |
522698d7 | 865 | ret = scnprintf(buf, PAGE_SIZE, |
89e85bce | 866 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", |
522698d7 SS |
867 | orig_size << PAGE_SHIFT, |
868 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
869 | mem_used << PAGE_SHIFT, | |
870 | zram->limit_pages << PAGE_SHIFT, | |
871 | max_used << PAGE_SHIFT, | |
8e19d540 | 872 | (u64)atomic64_read(&zram->stats.same_pages), |
89e85bce MK |
873 | pool_stats.pages_compacted, |
874 | (u64)atomic64_read(&zram->stats.huge_pages)); | |
522698d7 | 875 | up_read(&zram->init_lock); |
9b3bb7ab | 876 | |
522698d7 SS |
877 | return ret; |
878 | } | |
879 | ||
623e47fc SS |
880 | static ssize_t debug_stat_show(struct device *dev, |
881 | struct device_attribute *attr, char *buf) | |
882 | { | |
883 | int version = 1; | |
884 | struct zram *zram = dev_to_zram(dev); | |
885 | ssize_t ret; | |
886 | ||
887 | down_read(&zram->init_lock); | |
888 | ret = scnprintf(buf, PAGE_SIZE, | |
889 | "version: %d\n%8llu\n", | |
890 | version, | |
891 | (u64)atomic64_read(&zram->stats.writestall)); | |
892 | up_read(&zram->init_lock); | |
893 | ||
894 | return ret; | |
895 | } | |
896 | ||
522698d7 SS |
897 | static DEVICE_ATTR_RO(io_stat); |
898 | static DEVICE_ATTR_RO(mm_stat); | |
623e47fc | 899 | static DEVICE_ATTR_RO(debug_stat); |
522698d7 | 900 | |
beb6602c | 901 | static void zram_meta_free(struct zram *zram, u64 disksize) |
522698d7 SS |
902 | { |
903 | size_t num_pages = disksize >> PAGE_SHIFT; | |
904 | size_t index; | |
1fec1172 GM |
905 | |
906 | /* Free all pages that are still in this zram device */ | |
302128dc MK |
907 | for (index = 0; index < num_pages; index++) |
908 | zram_free_page(zram, index); | |
1fec1172 | 909 | |
beb6602c MK |
910 | zs_destroy_pool(zram->mem_pool); |
911 | vfree(zram->table); | |
9b3bb7ab SS |
912 | } |
913 | ||
beb6602c | 914 | static bool zram_meta_alloc(struct zram *zram, u64 disksize) |
9b3bb7ab SS |
915 | { |
916 | size_t num_pages; | |
9b3bb7ab | 917 | |
9b3bb7ab | 918 | num_pages = disksize >> PAGE_SHIFT; |
fad953ce | 919 | zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); |
beb6602c MK |
920 | if (!zram->table) |
921 | return false; | |
9b3bb7ab | 922 | |
beb6602c MK |
923 | zram->mem_pool = zs_create_pool(zram->disk->disk_name); |
924 | if (!zram->mem_pool) { | |
925 | vfree(zram->table); | |
926 | return false; | |
9b3bb7ab SS |
927 | } |
928 | ||
60f5921a SS |
929 | if (!huge_class_size) |
930 | huge_class_size = zs_huge_class_size(zram->mem_pool); | |
beb6602c | 931 | return true; |
9b3bb7ab SS |
932 | } |
933 | ||
d2d5e762 WY |
934 | /* |
935 | * To protect concurrent access to the same index entry, | |
936 | * caller should hold this table index entry's bit_spinlock to | |
937 | * indicate this index entry is accessing. | |
938 | */ | |
f1e3cfff | 939 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 940 | { |
db8ffbd4 MK |
941 | unsigned long handle; |
942 | ||
d7eac6b6 MK |
943 | zram_reset_access(zram, index); |
944 | ||
89e85bce MK |
945 | if (zram_test_flag(zram, index, ZRAM_HUGE)) { |
946 | zram_clear_flag(zram, index, ZRAM_HUGE); | |
947 | atomic64_dec(&zram->stats.huge_pages); | |
948 | } | |
949 | ||
db8ffbd4 MK |
950 | if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { |
951 | zram_wb_clear(zram, index); | |
952 | atomic64_dec(&zram->stats.pages_stored); | |
953 | return; | |
954 | } | |
306b0c95 | 955 | |
8e19d540 | 956 | /* |
957 | * No memory is allocated for same element filled pages. | |
958 | * Simply clear same page flag. | |
959 | */ | |
beb6602c MK |
960 | if (zram_test_flag(zram, index, ZRAM_SAME)) { |
961 | zram_clear_flag(zram, index, ZRAM_SAME); | |
643ae61d | 962 | zram_set_element(zram, index, 0); |
8e19d540 | 963 | atomic64_dec(&zram->stats.same_pages); |
51f9f82c | 964 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 NG |
965 | return; |
966 | } | |
967 | ||
db8ffbd4 | 968 | handle = zram_get_handle(zram, index); |
8e19d540 | 969 | if (!handle) |
970 | return; | |
971 | ||
beb6602c | 972 | zs_free(zram->mem_pool, handle); |
306b0c95 | 973 | |
beb6602c | 974 | atomic64_sub(zram_get_obj_size(zram, index), |
d2d5e762 | 975 | &zram->stats.compr_data_size); |
90a7806e | 976 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 | 977 | |
643ae61d | 978 | zram_set_handle(zram, index, 0); |
beb6602c | 979 | zram_set_obj_size(zram, index, 0); |
306b0c95 NG |
980 | } |
981 | ||
8e654f8f MK |
982 | static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, |
983 | struct bio *bio, bool partial_io) | |
306b0c95 | 984 | { |
1f7319c7 | 985 | int ret; |
92967471 | 986 | unsigned long handle; |
ebaf9ab5 | 987 | unsigned int size; |
1f7319c7 | 988 | void *src, *dst; |
1f7319c7 | 989 | |
8e654f8f MK |
990 | if (zram_wb_enabled(zram)) { |
991 | zram_slot_lock(zram, index); | |
992 | if (zram_test_flag(zram, index, ZRAM_WB)) { | |
993 | struct bio_vec bvec; | |
994 | ||
995 | zram_slot_unlock(zram, index); | |
996 | ||
997 | bvec.bv_page = page; | |
998 | bvec.bv_len = PAGE_SIZE; | |
999 | bvec.bv_offset = 0; | |
1000 | return read_from_bdev(zram, &bvec, | |
1001 | zram_get_element(zram, index), | |
1002 | bio, partial_io); | |
1003 | } | |
1004 | zram_slot_unlock(zram, index); | |
1005 | } | |
1006 | ||
86c49814 | 1007 | zram_slot_lock(zram, index); |
643ae61d | 1008 | handle = zram_get_handle(zram, index); |
ae94264e MK |
1009 | if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { |
1010 | unsigned long value; | |
1011 | void *mem; | |
1012 | ||
1013 | value = handle ? zram_get_element(zram, index) : 0; | |
1014 | mem = kmap_atomic(page); | |
1015 | zram_fill_page(mem, PAGE_SIZE, value); | |
1016 | kunmap_atomic(mem); | |
1017 | zram_slot_unlock(zram, index); | |
1018 | return 0; | |
1019 | } | |
1020 | ||
beb6602c | 1021 | size = zram_get_obj_size(zram, index); |
306b0c95 | 1022 | |
beb6602c | 1023 | src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); |
ebaf9ab5 | 1024 | if (size == PAGE_SIZE) { |
1f7319c7 MK |
1025 | dst = kmap_atomic(page); |
1026 | memcpy(dst, src, PAGE_SIZE); | |
1027 | kunmap_atomic(dst); | |
1028 | ret = 0; | |
ebaf9ab5 SS |
1029 | } else { |
1030 | struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
1031 | ||
1f7319c7 MK |
1032 | dst = kmap_atomic(page); |
1033 | ret = zcomp_decompress(zstrm, src, size, dst); | |
1034 | kunmap_atomic(dst); | |
ebaf9ab5 SS |
1035 | zcomp_stream_put(zram->comp); |
1036 | } | |
beb6602c | 1037 | zs_unmap_object(zram->mem_pool, handle); |
86c49814 | 1038 | zram_slot_unlock(zram, index); |
a1dd52af | 1039 | |
8c921b2b | 1040 | /* Should NEVER happen. Return bio error if it does. */ |
1f7319c7 | 1041 | if (unlikely(ret)) |
8c921b2b | 1042 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
306b0c95 | 1043 | |
1f7319c7 | 1044 | return ret; |
306b0c95 NG |
1045 | } |
1046 | ||
37b51fdd | 1047 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
8e654f8f | 1048 | u32 index, int offset, struct bio *bio) |
924bd88d JM |
1049 | { |
1050 | int ret; | |
37b51fdd | 1051 | struct page *page; |
37b51fdd | 1052 | |
1f7319c7 MK |
1053 | page = bvec->bv_page; |
1054 | if (is_partial_io(bvec)) { | |
1055 | /* Use a temporary buffer to decompress the page */ | |
1056 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1057 | if (!page) | |
1058 | return -ENOMEM; | |
924bd88d JM |
1059 | } |
1060 | ||
8e654f8f | 1061 | ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); |
1f7319c7 MK |
1062 | if (unlikely(ret)) |
1063 | goto out; | |
7e5a5104 | 1064 | |
1f7319c7 MK |
1065 | if (is_partial_io(bvec)) { |
1066 | void *dst = kmap_atomic(bvec->bv_page); | |
1067 | void *src = kmap_atomic(page); | |
37b51fdd | 1068 | |
1f7319c7 MK |
1069 | memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); |
1070 | kunmap_atomic(src); | |
1071 | kunmap_atomic(dst); | |
37b51fdd | 1072 | } |
1f7319c7 | 1073 | out: |
37b51fdd | 1074 | if (is_partial_io(bvec)) |
1f7319c7 | 1075 | __free_page(page); |
37b51fdd | 1076 | |
37b51fdd | 1077 | return ret; |
924bd88d JM |
1078 | } |
1079 | ||
db8ffbd4 MK |
1080 | static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, |
1081 | u32 index, struct bio *bio) | |
306b0c95 | 1082 | { |
ae85a807 | 1083 | int ret = 0; |
1f7319c7 | 1084 | unsigned long alloced_pages; |
da9556a2 | 1085 | unsigned long handle = 0; |
97ec7c8b MK |
1086 | unsigned int comp_len = 0; |
1087 | void *src, *dst, *mem; | |
1088 | struct zcomp_strm *zstrm; | |
1089 | struct page *page = bvec->bv_page; | |
1090 | unsigned long element = 0; | |
1091 | enum zram_pageflags flags = 0; | |
db8ffbd4 | 1092 | bool allow_wb = true; |
97ec7c8b MK |
1093 | |
1094 | mem = kmap_atomic(page); | |
1095 | if (page_same_filled(mem, &element)) { | |
1096 | kunmap_atomic(mem); | |
1097 | /* Free memory associated with this sector now. */ | |
1098 | flags = ZRAM_SAME; | |
1099 | atomic64_inc(&zram->stats.same_pages); | |
1100 | goto out; | |
1101 | } | |
1102 | kunmap_atomic(mem); | |
924bd88d | 1103 | |
da9556a2 | 1104 | compress_again: |
97ec7c8b | 1105 | zstrm = zcomp_stream_get(zram->comp); |
1f7319c7 | 1106 | src = kmap_atomic(page); |
97ec7c8b | 1107 | ret = zcomp_compress(zstrm, src, &comp_len); |
1f7319c7 | 1108 | kunmap_atomic(src); |
306b0c95 | 1109 | |
b7ca232e | 1110 | if (unlikely(ret)) { |
97ec7c8b | 1111 | zcomp_stream_put(zram->comp); |
8c921b2b | 1112 | pr_err("Compression failed! err=%d\n", ret); |
97ec7c8b | 1113 | zs_free(zram->mem_pool, handle); |
1f7319c7 | 1114 | return ret; |
8c921b2b | 1115 | } |
da9556a2 | 1116 | |
60f5921a | 1117 | if (unlikely(comp_len >= huge_class_size)) { |
89e85bce | 1118 | comp_len = PAGE_SIZE; |
db8ffbd4 MK |
1119 | if (zram_wb_enabled(zram) && allow_wb) { |
1120 | zcomp_stream_put(zram->comp); | |
1121 | ret = write_to_bdev(zram, bvec, index, bio, &element); | |
1122 | if (!ret) { | |
1123 | flags = ZRAM_WB; | |
1124 | ret = 1; | |
1125 | goto out; | |
1126 | } | |
1127 | allow_wb = false; | |
1128 | goto compress_again; | |
1129 | } | |
db8ffbd4 | 1130 | } |
a1dd52af | 1131 | |
da9556a2 SS |
1132 | /* |
1133 | * handle allocation has 2 paths: | |
1134 | * a) fast path is executed with preemption disabled (for | |
1135 | * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, | |
1136 | * since we can't sleep; | |
1137 | * b) slow path enables preemption and attempts to allocate | |
1138 | * the page with __GFP_DIRECT_RECLAIM bit set. we have to | |
1139 | * put per-cpu compression stream and, thus, to re-do | |
1140 | * the compression once handle is allocated. | |
1141 | * | |
1142 | * if we have a 'non-null' handle here then we are coming | |
1143 | * from the slow path and handle has already been allocated. | |
1144 | */ | |
1145 | if (!handle) | |
beb6602c | 1146 | handle = zs_malloc(zram->mem_pool, comp_len, |
da9556a2 SS |
1147 | __GFP_KSWAPD_RECLAIM | |
1148 | __GFP_NOWARN | | |
9bc482d3 MK |
1149 | __GFP_HIGHMEM | |
1150 | __GFP_MOVABLE); | |
fd1a30de | 1151 | if (!handle) { |
2aea8493 | 1152 | zcomp_stream_put(zram->comp); |
623e47fc | 1153 | atomic64_inc(&zram->stats.writestall); |
beb6602c | 1154 | handle = zs_malloc(zram->mem_pool, comp_len, |
9bc482d3 MK |
1155 | GFP_NOIO | __GFP_HIGHMEM | |
1156 | __GFP_MOVABLE); | |
da9556a2 SS |
1157 | if (handle) |
1158 | goto compress_again; | |
1f7319c7 | 1159 | return -ENOMEM; |
8c921b2b | 1160 | } |
9ada9da9 | 1161 | |
beb6602c | 1162 | alloced_pages = zs_get_total_pages(zram->mem_pool); |
12372755 SS |
1163 | update_used_max(zram, alloced_pages); |
1164 | ||
461a8eee | 1165 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
97ec7c8b | 1166 | zcomp_stream_put(zram->comp); |
beb6602c | 1167 | zs_free(zram->mem_pool, handle); |
1f7319c7 MK |
1168 | return -ENOMEM; |
1169 | } | |
1170 | ||
beb6602c | 1171 | dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); |
1f7319c7 MK |
1172 | |
1173 | src = zstrm->buffer; | |
1174 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1175 | src = kmap_atomic(page); |
1f7319c7 MK |
1176 | memcpy(dst, src, comp_len); |
1177 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1178 | kunmap_atomic(src); |
306b0c95 | 1179 | |
2aea8493 | 1180 | zcomp_stream_put(zram->comp); |
beb6602c | 1181 | zs_unmap_object(zram->mem_pool, handle); |
4ebbe7f7 MK |
1182 | atomic64_add(comp_len, &zram->stats.compr_data_size); |
1183 | out: | |
f40ac2ae SS |
1184 | /* |
1185 | * Free memory associated with this sector | |
1186 | * before overwriting unused sectors. | |
1187 | */ | |
86c49814 | 1188 | zram_slot_lock(zram, index); |
f40ac2ae | 1189 | zram_free_page(zram, index); |
db8ffbd4 | 1190 | |
89e85bce MK |
1191 | if (comp_len == PAGE_SIZE) { |
1192 | zram_set_flag(zram, index, ZRAM_HUGE); | |
1193 | atomic64_inc(&zram->stats.huge_pages); | |
1194 | } | |
1195 | ||
db8ffbd4 MK |
1196 | if (flags) { |
1197 | zram_set_flag(zram, index, flags); | |
4ebbe7f7 | 1198 | zram_set_element(zram, index, element); |
db8ffbd4 | 1199 | } else { |
4ebbe7f7 MK |
1200 | zram_set_handle(zram, index, handle); |
1201 | zram_set_obj_size(zram, index, comp_len); | |
1202 | } | |
86c49814 | 1203 | zram_slot_unlock(zram, index); |
306b0c95 | 1204 | |
8c921b2b | 1205 | /* Update stats */ |
90a7806e | 1206 | atomic64_inc(&zram->stats.pages_stored); |
ae85a807 | 1207 | return ret; |
1f7319c7 MK |
1208 | } |
1209 | ||
1210 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, | |
db8ffbd4 | 1211 | u32 index, int offset, struct bio *bio) |
1f7319c7 MK |
1212 | { |
1213 | int ret; | |
1214 | struct page *page = NULL; | |
1215 | void *src; | |
1216 | struct bio_vec vec; | |
1217 | ||
1218 | vec = *bvec; | |
1219 | if (is_partial_io(bvec)) { | |
1220 | void *dst; | |
1221 | /* | |
1222 | * This is a partial IO. We need to read the full page | |
1223 | * before to write the changes. | |
1224 | */ | |
1225 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1226 | if (!page) | |
1227 | return -ENOMEM; | |
1228 | ||
8e654f8f | 1229 | ret = __zram_bvec_read(zram, page, index, bio, true); |
1f7319c7 MK |
1230 | if (ret) |
1231 | goto out; | |
1232 | ||
1233 | src = kmap_atomic(bvec->bv_page); | |
1234 | dst = kmap_atomic(page); | |
1235 | memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); | |
1236 | kunmap_atomic(dst); | |
1237 | kunmap_atomic(src); | |
1238 | ||
1239 | vec.bv_page = page; | |
1240 | vec.bv_len = PAGE_SIZE; | |
1241 | vec.bv_offset = 0; | |
1242 | } | |
1243 | ||
db8ffbd4 | 1244 | ret = __zram_bvec_write(zram, &vec, index, bio); |
924bd88d | 1245 | out: |
397c6066 | 1246 | if (is_partial_io(bvec)) |
1f7319c7 | 1247 | __free_page(page); |
924bd88d | 1248 | return ret; |
8c921b2b JM |
1249 | } |
1250 | ||
f4659d8e JK |
1251 | /* |
1252 | * zram_bio_discard - handler on discard request | |
1253 | * @index: physical block index in PAGE_SIZE units | |
1254 | * @offset: byte offset within physical block | |
1255 | */ | |
1256 | static void zram_bio_discard(struct zram *zram, u32 index, | |
1257 | int offset, struct bio *bio) | |
1258 | { | |
1259 | size_t n = bio->bi_iter.bi_size; | |
1260 | ||
1261 | /* | |
1262 | * zram manages data in physical block size units. Because logical block | |
1263 | * size isn't identical with physical block size on some arch, we | |
1264 | * could get a discard request pointing to a specific offset within a | |
1265 | * certain physical block. Although we can handle this request by | |
1266 | * reading that physiclal block and decompressing and partially zeroing | |
1267 | * and re-compressing and then re-storing it, this isn't reasonable | |
1268 | * because our intent with a discard request is to save memory. So | |
1269 | * skipping this logical block is appropriate here. | |
1270 | */ | |
1271 | if (offset) { | |
38515c73 | 1272 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
1273 | return; |
1274 | ||
38515c73 | 1275 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
1276 | index++; |
1277 | } | |
1278 | ||
1279 | while (n >= PAGE_SIZE) { | |
86c49814 | 1280 | zram_slot_lock(zram, index); |
f4659d8e | 1281 | zram_free_page(zram, index); |
86c49814 | 1282 | zram_slot_unlock(zram, index); |
015254da | 1283 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
1284 | index++; |
1285 | n -= PAGE_SIZE; | |
1286 | } | |
1287 | } | |
1288 | ||
ae85a807 MK |
1289 | /* |
1290 | * Returns errno if it has some problem. Otherwise return 0 or 1. | |
1291 | * Returns 0 if IO request was done synchronously | |
1292 | * Returns 1 if IO request was successfully submitted. | |
1293 | */ | |
522698d7 | 1294 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
3f289dcb | 1295 | int offset, unsigned int op, struct bio *bio) |
9b3bb7ab | 1296 | { |
522698d7 | 1297 | unsigned long start_time = jiffies; |
d62e26b3 | 1298 | struct request_queue *q = zram->disk->queue; |
9b3bb7ab | 1299 | int ret; |
9b3bb7ab | 1300 | |
ddcf35d3 | 1301 | generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT, |
522698d7 | 1302 | &zram->disk->part0); |
46a51c80 | 1303 | |
3f289dcb | 1304 | if (!op_is_write(op)) { |
522698d7 | 1305 | atomic64_inc(&zram->stats.num_reads); |
8e654f8f | 1306 | ret = zram_bvec_read(zram, bvec, index, offset, bio); |
1f7319c7 | 1307 | flush_dcache_page(bvec->bv_page); |
522698d7 SS |
1308 | } else { |
1309 | atomic64_inc(&zram->stats.num_writes); | |
db8ffbd4 | 1310 | ret = zram_bvec_write(zram, bvec, index, offset, bio); |
1b672224 | 1311 | } |
9b3bb7ab | 1312 | |
ddcf35d3 | 1313 | generic_end_io_acct(q, op, &zram->disk->part0, start_time); |
9b3bb7ab | 1314 | |
d7eac6b6 MK |
1315 | zram_slot_lock(zram, index); |
1316 | zram_accessed(zram, index); | |
1317 | zram_slot_unlock(zram, index); | |
1318 | ||
ae85a807 | 1319 | if (unlikely(ret < 0)) { |
3f289dcb | 1320 | if (!op_is_write(op)) |
522698d7 SS |
1321 | atomic64_inc(&zram->stats.failed_reads); |
1322 | else | |
1323 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 1324 | } |
9b3bb7ab | 1325 | |
1b672224 | 1326 | return ret; |
8c921b2b JM |
1327 | } |
1328 | ||
be257c61 | 1329 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 1330 | { |
abf54548 | 1331 | int offset; |
8c921b2b | 1332 | u32 index; |
7988613b KO |
1333 | struct bio_vec bvec; |
1334 | struct bvec_iter iter; | |
8c921b2b | 1335 | |
4f024f37 KO |
1336 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
1337 | offset = (bio->bi_iter.bi_sector & | |
1338 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 1339 | |
31edeacd CH |
1340 | switch (bio_op(bio)) { |
1341 | case REQ_OP_DISCARD: | |
1342 | case REQ_OP_WRITE_ZEROES: | |
f4659d8e | 1343 | zram_bio_discard(zram, index, offset, bio); |
4246a0b6 | 1344 | bio_endio(bio); |
f4659d8e | 1345 | return; |
31edeacd CH |
1346 | default: |
1347 | break; | |
f4659d8e JK |
1348 | } |
1349 | ||
7988613b | 1350 | bio_for_each_segment(bvec, bio, iter) { |
e86942c7 MK |
1351 | struct bio_vec bv = bvec; |
1352 | unsigned int unwritten = bvec.bv_len; | |
924bd88d | 1353 | |
e86942c7 MK |
1354 | do { |
1355 | bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, | |
1356 | unwritten); | |
abf54548 | 1357 | if (zram_bvec_rw(zram, &bv, index, offset, |
3f289dcb | 1358 | bio_op(bio), bio) < 0) |
924bd88d JM |
1359 | goto out; |
1360 | ||
e86942c7 MK |
1361 | bv.bv_offset += bv.bv_len; |
1362 | unwritten -= bv.bv_len; | |
924bd88d | 1363 | |
e86942c7 MK |
1364 | update_position(&index, &offset, &bv); |
1365 | } while (unwritten); | |
a1dd52af | 1366 | } |
306b0c95 | 1367 | |
4246a0b6 | 1368 | bio_endio(bio); |
7d7854b4 | 1369 | return; |
306b0c95 NG |
1370 | |
1371 | out: | |
306b0c95 | 1372 | bio_io_error(bio); |
306b0c95 NG |
1373 | } |
1374 | ||
306b0c95 | 1375 | /* |
f1e3cfff | 1376 | * Handler function for all zram I/O requests. |
306b0c95 | 1377 | */ |
dece1635 | 1378 | static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) |
306b0c95 | 1379 | { |
f1e3cfff | 1380 | struct zram *zram = queue->queuedata; |
306b0c95 | 1381 | |
54850e73 | 1382 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
1383 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 1384 | atomic64_inc(&zram->stats.invalid_io); |
a09759ac | 1385 | goto error; |
6642a67c JM |
1386 | } |
1387 | ||
be257c61 | 1388 | __zram_make_request(zram, bio); |
dece1635 | 1389 | return BLK_QC_T_NONE; |
a09759ac | 1390 | |
0900beae JM |
1391 | error: |
1392 | bio_io_error(bio); | |
dece1635 | 1393 | return BLK_QC_T_NONE; |
306b0c95 NG |
1394 | } |
1395 | ||
2ccbec05 NG |
1396 | static void zram_slot_free_notify(struct block_device *bdev, |
1397 | unsigned long index) | |
107c161b | 1398 | { |
f1e3cfff | 1399 | struct zram *zram; |
107c161b | 1400 | |
f1e3cfff | 1401 | zram = bdev->bd_disk->private_data; |
a0c516cb | 1402 | |
86c49814 | 1403 | zram_slot_lock(zram, index); |
f614a9f4 | 1404 | zram_free_page(zram, index); |
86c49814 | 1405 | zram_slot_unlock(zram, index); |
f614a9f4 | 1406 | atomic64_inc(&zram->stats.notify_free); |
107c161b NG |
1407 | } |
1408 | ||
8c7f0102 | 1409 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
3f289dcb | 1410 | struct page *page, unsigned int op) |
8c7f0102 | 1411 | { |
ae85a807 | 1412 | int offset, ret; |
8c7f0102 | 1413 | u32 index; |
1414 | struct zram *zram; | |
1415 | struct bio_vec bv; | |
1416 | ||
98cc093c HY |
1417 | if (PageTransHuge(page)) |
1418 | return -ENOTSUPP; | |
8c7f0102 | 1419 | zram = bdev->bd_disk->private_data; |
08eee69f | 1420 | |
8c7f0102 | 1421 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
1422 | atomic64_inc(&zram->stats.invalid_io); | |
ae85a807 | 1423 | ret = -EINVAL; |
a09759ac | 1424 | goto out; |
8c7f0102 | 1425 | } |
1426 | ||
1427 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
4ca82dab | 1428 | offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; |
8c7f0102 | 1429 | |
1430 | bv.bv_page = page; | |
1431 | bv.bv_len = PAGE_SIZE; | |
1432 | bv.bv_offset = 0; | |
1433 | ||
3f289dcb | 1434 | ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); |
08eee69f | 1435 | out: |
8c7f0102 | 1436 | /* |
1437 | * If I/O fails, just return error(ie, non-zero) without | |
1438 | * calling page_endio. | |
1439 | * It causes resubmit the I/O with bio request by upper functions | |
1440 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
1441 | * bio->bi_end_io does things to handle the error | |
1442 | * (e.g., SetPageError, set_page_dirty and extra works). | |
1443 | */ | |
ae85a807 MK |
1444 | if (unlikely(ret < 0)) |
1445 | return ret; | |
1446 | ||
1447 | switch (ret) { | |
1448 | case 0: | |
3f289dcb | 1449 | page_endio(page, op_is_write(op), 0); |
ae85a807 MK |
1450 | break; |
1451 | case 1: | |
1452 | ret = 0; | |
1453 | break; | |
1454 | default: | |
1455 | WARN_ON(1); | |
1456 | } | |
1457 | return ret; | |
8c7f0102 | 1458 | } |
1459 | ||
522698d7 SS |
1460 | static void zram_reset_device(struct zram *zram) |
1461 | { | |
522698d7 SS |
1462 | struct zcomp *comp; |
1463 | u64 disksize; | |
306b0c95 | 1464 | |
522698d7 | 1465 | down_write(&zram->init_lock); |
9b3bb7ab | 1466 | |
522698d7 SS |
1467 | zram->limit_pages = 0; |
1468 | ||
1469 | if (!init_done(zram)) { | |
1470 | up_write(&zram->init_lock); | |
1471 | return; | |
1472 | } | |
1473 | ||
522698d7 SS |
1474 | comp = zram->comp; |
1475 | disksize = zram->disksize; | |
522698d7 | 1476 | zram->disksize = 0; |
522698d7 SS |
1477 | |
1478 | set_capacity(zram->disk, 0); | |
1479 | part_stat_set_all(&zram->disk->part0, 0); | |
1480 | ||
1481 | up_write(&zram->init_lock); | |
1482 | /* I/O operation under all of CPU are done so let's free */ | |
beb6602c | 1483 | zram_meta_free(zram, disksize); |
302128dc | 1484 | memset(&zram->stats, 0, sizeof(zram->stats)); |
522698d7 | 1485 | zcomp_destroy(comp); |
013bf95a | 1486 | reset_bdev(zram); |
522698d7 SS |
1487 | } |
1488 | ||
1489 | static ssize_t disksize_store(struct device *dev, | |
1490 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 1491 | { |
522698d7 SS |
1492 | u64 disksize; |
1493 | struct zcomp *comp; | |
2f6a3bed | 1494 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 1495 | int err; |
2f6a3bed | 1496 | |
522698d7 SS |
1497 | disksize = memparse(buf, NULL); |
1498 | if (!disksize) | |
1499 | return -EINVAL; | |
2f6a3bed | 1500 | |
beb6602c MK |
1501 | down_write(&zram->init_lock); |
1502 | if (init_done(zram)) { | |
1503 | pr_info("Cannot change disksize for initialized device\n"); | |
1504 | err = -EBUSY; | |
1505 | goto out_unlock; | |
1506 | } | |
1507 | ||
522698d7 | 1508 | disksize = PAGE_ALIGN(disksize); |
beb6602c MK |
1509 | if (!zram_meta_alloc(zram, disksize)) { |
1510 | err = -ENOMEM; | |
1511 | goto out_unlock; | |
1512 | } | |
522698d7 | 1513 | |
da9556a2 | 1514 | comp = zcomp_create(zram->compressor); |
522698d7 | 1515 | if (IS_ERR(comp)) { |
70864969 | 1516 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1517 | zram->compressor); |
1518 | err = PTR_ERR(comp); | |
1519 | goto out_free_meta; | |
1520 | } | |
1521 | ||
522698d7 SS |
1522 | zram->comp = comp; |
1523 | zram->disksize = disksize; | |
1524 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | |
e447a015 MK |
1525 | |
1526 | revalidate_disk(zram->disk); | |
e7ccfc4c | 1527 | up_write(&zram->init_lock); |
522698d7 SS |
1528 | |
1529 | return len; | |
1530 | ||
522698d7 | 1531 | out_free_meta: |
beb6602c MK |
1532 | zram_meta_free(zram, disksize); |
1533 | out_unlock: | |
1534 | up_write(&zram->init_lock); | |
522698d7 | 1535 | return err; |
2f6a3bed SS |
1536 | } |
1537 | ||
522698d7 SS |
1538 | static ssize_t reset_store(struct device *dev, |
1539 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1540 | { |
522698d7 SS |
1541 | int ret; |
1542 | unsigned short do_reset; | |
1543 | struct zram *zram; | |
1544 | struct block_device *bdev; | |
4f2109f6 | 1545 | |
f405c445 SS |
1546 | ret = kstrtou16(buf, 10, &do_reset); |
1547 | if (ret) | |
1548 | return ret; | |
1549 | ||
1550 | if (!do_reset) | |
1551 | return -EINVAL; | |
1552 | ||
522698d7 SS |
1553 | zram = dev_to_zram(dev); |
1554 | bdev = bdget_disk(zram->disk, 0); | |
522698d7 SS |
1555 | if (!bdev) |
1556 | return -ENOMEM; | |
4f2109f6 | 1557 | |
522698d7 | 1558 | mutex_lock(&bdev->bd_mutex); |
f405c445 SS |
1559 | /* Do not reset an active device or claimed device */ |
1560 | if (bdev->bd_openers || zram->claim) { | |
1561 | mutex_unlock(&bdev->bd_mutex); | |
1562 | bdput(bdev); | |
1563 | return -EBUSY; | |
522698d7 SS |
1564 | } |
1565 | ||
f405c445 SS |
1566 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1567 | zram->claim = true; | |
1568 | mutex_unlock(&bdev->bd_mutex); | |
522698d7 | 1569 | |
f405c445 | 1570 | /* Make sure all the pending I/O are finished */ |
522698d7 SS |
1571 | fsync_bdev(bdev); |
1572 | zram_reset_device(zram); | |
e447a015 | 1573 | revalidate_disk(zram->disk); |
522698d7 SS |
1574 | bdput(bdev); |
1575 | ||
f405c445 SS |
1576 | mutex_lock(&bdev->bd_mutex); |
1577 | zram->claim = false; | |
1578 | mutex_unlock(&bdev->bd_mutex); | |
1579 | ||
522698d7 | 1580 | return len; |
f405c445 SS |
1581 | } |
1582 | ||
1583 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1584 | { | |
1585 | int ret = 0; | |
1586 | struct zram *zram; | |
1587 | ||
1588 | WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); | |
1589 | ||
1590 | zram = bdev->bd_disk->private_data; | |
1591 | /* zram was claimed to reset so open request fails */ | |
1592 | if (zram->claim) | |
1593 | ret = -EBUSY; | |
4f2109f6 SS |
1594 | |
1595 | return ret; | |
1596 | } | |
1597 | ||
522698d7 | 1598 | static const struct block_device_operations zram_devops = { |
f405c445 | 1599 | .open = zram_open, |
522698d7 SS |
1600 | .swap_slot_free_notify = zram_slot_free_notify, |
1601 | .rw_page = zram_rw_page, | |
1602 | .owner = THIS_MODULE | |
1603 | }; | |
1604 | ||
1605 | static DEVICE_ATTR_WO(compact); | |
1606 | static DEVICE_ATTR_RW(disksize); | |
1607 | static DEVICE_ATTR_RO(initstate); | |
1608 | static DEVICE_ATTR_WO(reset); | |
c87d1655 SS |
1609 | static DEVICE_ATTR_WO(mem_limit); |
1610 | static DEVICE_ATTR_WO(mem_used_max); | |
522698d7 SS |
1611 | static DEVICE_ATTR_RW(max_comp_streams); |
1612 | static DEVICE_ATTR_RW(comp_algorithm); | |
013bf95a MK |
1613 | #ifdef CONFIG_ZRAM_WRITEBACK |
1614 | static DEVICE_ATTR_RW(backing_dev); | |
1615 | #endif | |
a68eb3b6 | 1616 | |
9b3bb7ab SS |
1617 | static struct attribute *zram_disk_attrs[] = { |
1618 | &dev_attr_disksize.attr, | |
1619 | &dev_attr_initstate.attr, | |
1620 | &dev_attr_reset.attr, | |
99ebbd30 | 1621 | &dev_attr_compact.attr, |
9ada9da9 | 1622 | &dev_attr_mem_limit.attr, |
461a8eee | 1623 | &dev_attr_mem_used_max.attr, |
beca3ec7 | 1624 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1625 | &dev_attr_comp_algorithm.attr, |
013bf95a MK |
1626 | #ifdef CONFIG_ZRAM_WRITEBACK |
1627 | &dev_attr_backing_dev.attr, | |
1628 | #endif | |
2f6a3bed | 1629 | &dev_attr_io_stat.attr, |
4f2109f6 | 1630 | &dev_attr_mm_stat.attr, |
623e47fc | 1631 | &dev_attr_debug_stat.attr, |
9b3bb7ab SS |
1632 | NULL, |
1633 | }; | |
1634 | ||
bc1bb362 | 1635 | static const struct attribute_group zram_disk_attr_group = { |
9b3bb7ab SS |
1636 | .attrs = zram_disk_attrs, |
1637 | }; | |
1638 | ||
98af4d4d HR |
1639 | static const struct attribute_group *zram_disk_attr_groups[] = { |
1640 | &zram_disk_attr_group, | |
1641 | NULL, | |
1642 | }; | |
1643 | ||
92ff1528 SS |
1644 | /* |
1645 | * Allocate and initialize new zram device. the function returns | |
1646 | * '>= 0' device_id upon success, and negative value otherwise. | |
1647 | */ | |
1648 | static int zram_add(void) | |
306b0c95 | 1649 | { |
85508ec6 | 1650 | struct zram *zram; |
ee980160 | 1651 | struct request_queue *queue; |
92ff1528 | 1652 | int ret, device_id; |
85508ec6 SS |
1653 | |
1654 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1655 | if (!zram) | |
1656 | return -ENOMEM; | |
1657 | ||
92ff1528 | 1658 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1659 | if (ret < 0) |
1660 | goto out_free_dev; | |
92ff1528 | 1661 | device_id = ret; |
de1a21a0 | 1662 | |
0900beae | 1663 | init_rwsem(&zram->init_lock); |
306b0c95 | 1664 | |
ee980160 SS |
1665 | queue = blk_alloc_queue(GFP_KERNEL); |
1666 | if (!queue) { | |
306b0c95 NG |
1667 | pr_err("Error allocating disk queue for device %d\n", |
1668 | device_id); | |
85508ec6 SS |
1669 | ret = -ENOMEM; |
1670 | goto out_free_idr; | |
306b0c95 NG |
1671 | } |
1672 | ||
ee980160 | 1673 | blk_queue_make_request(queue, zram_make_request); |
306b0c95 | 1674 | |
85508ec6 | 1675 | /* gendisk structure */ |
f1e3cfff NG |
1676 | zram->disk = alloc_disk(1); |
1677 | if (!zram->disk) { | |
70864969 | 1678 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1679 | device_id); |
201c7b72 | 1680 | ret = -ENOMEM; |
39a9b8ac | 1681 | goto out_free_queue; |
306b0c95 NG |
1682 | } |
1683 | ||
f1e3cfff NG |
1684 | zram->disk->major = zram_major; |
1685 | zram->disk->first_minor = device_id; | |
1686 | zram->disk->fops = &zram_devops; | |
ee980160 SS |
1687 | zram->disk->queue = queue; |
1688 | zram->disk->queue->queuedata = zram; | |
f1e3cfff NG |
1689 | zram->disk->private_data = zram; |
1690 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1691 | |
33863c21 | 1692 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1693 | set_capacity(zram->disk, 0); |
b67d1ec1 | 1694 | /* zram devices sort of resembles non-rotational disks */ |
8b904b5b BVA |
1695 | blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); |
1696 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); | |
e447a015 | 1697 | |
a1dd52af NG |
1698 | /* |
1699 | * To ensure that we always get PAGE_SIZE aligned | |
1700 | * and n*PAGE_SIZED sized I/O requests. | |
1701 | */ | |
f1e3cfff | 1702 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1703 | blk_queue_logical_block_size(zram->disk->queue, |
1704 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1705 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1706 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1707 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1708 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
8b904b5b | 1709 | blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); |
31edeacd | 1710 | |
f4659d8e JK |
1711 | /* |
1712 | * zram_bio_discard() will clear all logical blocks if logical block | |
1713 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1714 | * different, we will skip discarding some parts of logical blocks in | |
1715 | * the part of the request range which isn't aligned to physical block | |
1716 | * size. So we can't ensure that all discarded logical blocks are | |
1717 | * zeroed. | |
1718 | */ | |
1719 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
31edeacd | 1720 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); |
5d83d5a0 | 1721 | |
e447a015 | 1722 | zram->disk->queue->backing_dev_info->capabilities |= |
23c47d2a | 1723 | (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); |
98af4d4d HR |
1724 | device_add_disk(NULL, zram->disk, zram_disk_attr_groups); |
1725 | ||
e46b8a03 | 1726 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
d12b63c9 | 1727 | |
c0265342 | 1728 | zram_debugfs_register(zram); |
d12b63c9 | 1729 | pr_info("Added device: %s\n", zram->disk->disk_name); |
92ff1528 | 1730 | return device_id; |
de1a21a0 | 1731 | |
39a9b8ac | 1732 | out_free_queue: |
ee980160 | 1733 | blk_cleanup_queue(queue); |
85508ec6 SS |
1734 | out_free_idr: |
1735 | idr_remove(&zram_index_idr, device_id); | |
1736 | out_free_dev: | |
1737 | kfree(zram); | |
de1a21a0 | 1738 | return ret; |
306b0c95 NG |
1739 | } |
1740 | ||
6566d1a3 | 1741 | static int zram_remove(struct zram *zram) |
306b0c95 | 1742 | { |
6566d1a3 SS |
1743 | struct block_device *bdev; |
1744 | ||
1745 | bdev = bdget_disk(zram->disk, 0); | |
1746 | if (!bdev) | |
1747 | return -ENOMEM; | |
1748 | ||
1749 | mutex_lock(&bdev->bd_mutex); | |
1750 | if (bdev->bd_openers || zram->claim) { | |
1751 | mutex_unlock(&bdev->bd_mutex); | |
1752 | bdput(bdev); | |
1753 | return -EBUSY; | |
1754 | } | |
1755 | ||
1756 | zram->claim = true; | |
1757 | mutex_unlock(&bdev->bd_mutex); | |
1758 | ||
c0265342 | 1759 | zram_debugfs_unregister(zram); |
306b0c95 | 1760 | |
6566d1a3 SS |
1761 | /* Make sure all the pending I/O are finished */ |
1762 | fsync_bdev(bdev); | |
85508ec6 | 1763 | zram_reset_device(zram); |
6566d1a3 SS |
1764 | bdput(bdev); |
1765 | ||
1766 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
1767 | ||
85508ec6 | 1768 | del_gendisk(zram->disk); |
392db380 | 1769 | blk_cleanup_queue(zram->disk->queue); |
85508ec6 SS |
1770 | put_disk(zram->disk); |
1771 | kfree(zram); | |
6566d1a3 SS |
1772 | return 0; |
1773 | } | |
1774 | ||
1775 | /* zram-control sysfs attributes */ | |
27104a53 GKH |
1776 | |
1777 | /* | |
1778 | * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a | |
1779 | * sense that reading from this file does alter the state of your system -- it | |
1780 | * creates a new un-initialized zram device and returns back this device's | |
1781 | * device_id (or an error code if it fails to create a new device). | |
1782 | */ | |
6566d1a3 SS |
1783 | static ssize_t hot_add_show(struct class *class, |
1784 | struct class_attribute *attr, | |
1785 | char *buf) | |
1786 | { | |
1787 | int ret; | |
1788 | ||
1789 | mutex_lock(&zram_index_mutex); | |
1790 | ret = zram_add(); | |
1791 | mutex_unlock(&zram_index_mutex); | |
1792 | ||
1793 | if (ret < 0) | |
1794 | return ret; | |
1795 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
1796 | } | |
f40609d1 | 1797 | static CLASS_ATTR_RO(hot_add); |
6566d1a3 SS |
1798 | |
1799 | static ssize_t hot_remove_store(struct class *class, | |
1800 | struct class_attribute *attr, | |
1801 | const char *buf, | |
1802 | size_t count) | |
1803 | { | |
1804 | struct zram *zram; | |
1805 | int ret, dev_id; | |
1806 | ||
1807 | /* dev_id is gendisk->first_minor, which is `int' */ | |
1808 | ret = kstrtoint(buf, 10, &dev_id); | |
1809 | if (ret) | |
1810 | return ret; | |
1811 | if (dev_id < 0) | |
1812 | return -EINVAL; | |
1813 | ||
1814 | mutex_lock(&zram_index_mutex); | |
1815 | ||
1816 | zram = idr_find(&zram_index_idr, dev_id); | |
17ec4cd9 | 1817 | if (zram) { |
6566d1a3 | 1818 | ret = zram_remove(zram); |
529e71e1 TI |
1819 | if (!ret) |
1820 | idr_remove(&zram_index_idr, dev_id); | |
17ec4cd9 | 1821 | } else { |
6566d1a3 | 1822 | ret = -ENODEV; |
17ec4cd9 | 1823 | } |
6566d1a3 SS |
1824 | |
1825 | mutex_unlock(&zram_index_mutex); | |
1826 | return ret ? ret : count; | |
85508ec6 | 1827 | } |
27104a53 | 1828 | static CLASS_ATTR_WO(hot_remove); |
a096cafc | 1829 | |
27104a53 GKH |
1830 | static struct attribute *zram_control_class_attrs[] = { |
1831 | &class_attr_hot_add.attr, | |
1832 | &class_attr_hot_remove.attr, | |
1833 | NULL, | |
6566d1a3 | 1834 | }; |
27104a53 | 1835 | ATTRIBUTE_GROUPS(zram_control_class); |
6566d1a3 SS |
1836 | |
1837 | static struct class zram_control_class = { | |
1838 | .name = "zram-control", | |
1839 | .owner = THIS_MODULE, | |
27104a53 | 1840 | .class_groups = zram_control_class_groups, |
6566d1a3 SS |
1841 | }; |
1842 | ||
85508ec6 SS |
1843 | static int zram_remove_cb(int id, void *ptr, void *data) |
1844 | { | |
1845 | zram_remove(ptr); | |
1846 | return 0; | |
1847 | } | |
a096cafc | 1848 | |
85508ec6 SS |
1849 | static void destroy_devices(void) |
1850 | { | |
6566d1a3 | 1851 | class_unregister(&zram_control_class); |
85508ec6 | 1852 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
c0265342 | 1853 | zram_debugfs_destroy(); |
85508ec6 | 1854 | idr_destroy(&zram_index_idr); |
a096cafc | 1855 | unregister_blkdev(zram_major, "zram"); |
1dd6c834 | 1856 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
306b0c95 NG |
1857 | } |
1858 | ||
f1e3cfff | 1859 | static int __init zram_init(void) |
306b0c95 | 1860 | { |
92ff1528 | 1861 | int ret; |
306b0c95 | 1862 | |
1dd6c834 AMG |
1863 | ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", |
1864 | zcomp_cpu_up_prepare, zcomp_cpu_dead); | |
1865 | if (ret < 0) | |
1866 | return ret; | |
1867 | ||
6566d1a3 SS |
1868 | ret = class_register(&zram_control_class); |
1869 | if (ret) { | |
70864969 | 1870 | pr_err("Unable to register zram-control class\n"); |
1dd6c834 | 1871 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
6566d1a3 SS |
1872 | return ret; |
1873 | } | |
1874 | ||
c0265342 | 1875 | zram_debugfs_create(); |
f1e3cfff NG |
1876 | zram_major = register_blkdev(0, "zram"); |
1877 | if (zram_major <= 0) { | |
70864969 | 1878 | pr_err("Unable to get major number\n"); |
6566d1a3 | 1879 | class_unregister(&zram_control_class); |
1dd6c834 | 1880 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
a096cafc | 1881 | return -EBUSY; |
306b0c95 NG |
1882 | } |
1883 | ||
92ff1528 | 1884 | while (num_devices != 0) { |
6566d1a3 | 1885 | mutex_lock(&zram_index_mutex); |
92ff1528 | 1886 | ret = zram_add(); |
6566d1a3 | 1887 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 1888 | if (ret < 0) |
a096cafc | 1889 | goto out_error; |
92ff1528 | 1890 | num_devices--; |
de1a21a0 NG |
1891 | } |
1892 | ||
306b0c95 | 1893 | return 0; |
de1a21a0 | 1894 | |
a096cafc | 1895 | out_error: |
85508ec6 | 1896 | destroy_devices(); |
306b0c95 NG |
1897 | return ret; |
1898 | } | |
1899 | ||
f1e3cfff | 1900 | static void __exit zram_exit(void) |
306b0c95 | 1901 | { |
85508ec6 | 1902 | destroy_devices(); |
306b0c95 NG |
1903 | } |
1904 | ||
f1e3cfff NG |
1905 | module_init(zram_init); |
1906 | module_exit(zram_exit); | |
306b0c95 | 1907 | |
9b3bb7ab | 1908 | module_param(num_devices, uint, 0); |
c3cdb40e | 1909 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 1910 | |
306b0c95 NG |
1911 | MODULE_LICENSE("Dual BSD/GPL"); |
1912 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
f1e3cfff | 1913 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |