Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
25 | #include <linux/genhd.h> | |
26 | #include <linux/highmem.h> | |
5a0e3ad6 | 27 | #include <linux/slab.h> |
b09ab054 | 28 | #include <linux/backing-dev.h> |
306b0c95 | 29 | #include <linux/string.h> |
306b0c95 | 30 | #include <linux/vmalloc.h> |
fcfa8d95 | 31 | #include <linux/err.h> |
85508ec6 | 32 | #include <linux/idr.h> |
6566d1a3 | 33 | #include <linux/sysfs.h> |
c0265342 | 34 | #include <linux/debugfs.h> |
1dd6c834 | 35 | #include <linux/cpuhotplug.h> |
306b0c95 | 36 | |
16a4bfb9 | 37 | #include "zram_drv.h" |
306b0c95 | 38 | |
85508ec6 | 39 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
40 | /* idr index must be protected */ |
41 | static DEFINE_MUTEX(zram_index_mutex); | |
42 | ||
f1e3cfff | 43 | static int zram_major; |
b7ca232e | 44 | static const char *default_compressor = "lzo"; |
306b0c95 | 45 | |
306b0c95 | 46 | /* Module params (documentation at end) */ |
ca3d70bd | 47 | static unsigned int num_devices = 1; |
60f5921a SS |
48 | /* |
49 | * Pages that compress to sizes equals or greater than this are stored | |
50 | * uncompressed in memory. | |
51 | */ | |
52 | static size_t huge_class_size; | |
33863c21 | 53 | |
1f7319c7 | 54 | static void zram_free_page(struct zram *zram, size_t index); |
a939888e MK |
55 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
56 | u32 index, int offset, struct bio *bio); | |
57 | ||
1f7319c7 | 58 | |
3c9959e0 MK |
59 | static int zram_slot_trylock(struct zram *zram, u32 index) |
60 | { | |
7e529283 | 61 | return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); |
3c9959e0 MK |
62 | } |
63 | ||
c4d6c4cc MK |
64 | static void zram_slot_lock(struct zram *zram, u32 index) |
65 | { | |
7e529283 | 66 | bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); |
c4d6c4cc MK |
67 | } |
68 | ||
69 | static void zram_slot_unlock(struct zram *zram, u32 index) | |
70 | { | |
7e529283 | 71 | bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); |
c4d6c4cc MK |
72 | } |
73 | ||
08eee69f | 74 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 75 | { |
08eee69f | 76 | return zram->disksize; |
be2d1d56 SS |
77 | } |
78 | ||
9b3bb7ab SS |
79 | static inline struct zram *dev_to_zram(struct device *dev) |
80 | { | |
81 | return (struct zram *)dev_to_disk(dev)->private_data; | |
82 | } | |
83 | ||
643ae61d MK |
84 | static unsigned long zram_get_handle(struct zram *zram, u32 index) |
85 | { | |
86 | return zram->table[index].handle; | |
87 | } | |
88 | ||
89 | static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) | |
90 | { | |
91 | zram->table[index].handle = handle; | |
92 | } | |
93 | ||
b31177f2 | 94 | /* flag operations require table entry bit_spin_lock() being held */ |
c0265342 | 95 | static bool zram_test_flag(struct zram *zram, u32 index, |
522698d7 | 96 | enum zram_pageflags flag) |
99ebbd30 | 97 | { |
7e529283 | 98 | return zram->table[index].flags & BIT(flag); |
522698d7 | 99 | } |
99ebbd30 | 100 | |
beb6602c | 101 | static void zram_set_flag(struct zram *zram, u32 index, |
522698d7 SS |
102 | enum zram_pageflags flag) |
103 | { | |
7e529283 | 104 | zram->table[index].flags |= BIT(flag); |
522698d7 | 105 | } |
99ebbd30 | 106 | |
beb6602c | 107 | static void zram_clear_flag(struct zram *zram, u32 index, |
522698d7 SS |
108 | enum zram_pageflags flag) |
109 | { | |
7e529283 | 110 | zram->table[index].flags &= ~BIT(flag); |
522698d7 | 111 | } |
99ebbd30 | 112 | |
beb6602c | 113 | static inline void zram_set_element(struct zram *zram, u32 index, |
8e19d540 | 114 | unsigned long element) |
115 | { | |
beb6602c | 116 | zram->table[index].element = element; |
8e19d540 | 117 | } |
118 | ||
643ae61d | 119 | static unsigned long zram_get_element(struct zram *zram, u32 index) |
8e19d540 | 120 | { |
643ae61d | 121 | return zram->table[index].element; |
8e19d540 | 122 | } |
123 | ||
beb6602c | 124 | static size_t zram_get_obj_size(struct zram *zram, u32 index) |
522698d7 | 125 | { |
7e529283 | 126 | return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); |
99ebbd30 AM |
127 | } |
128 | ||
beb6602c | 129 | static void zram_set_obj_size(struct zram *zram, |
522698d7 | 130 | u32 index, size_t size) |
9b3bb7ab | 131 | { |
7e529283 | 132 | unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 133 | |
7e529283 | 134 | zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; |
522698d7 SS |
135 | } |
136 | ||
a939888e MK |
137 | static inline bool zram_allocated(struct zram *zram, u32 index) |
138 | { | |
139 | return zram_get_obj_size(zram, index) || | |
140 | zram_test_flag(zram, index, ZRAM_SAME) || | |
141 | zram_test_flag(zram, index, ZRAM_WB); | |
142 | } | |
143 | ||
1f7319c7 | 144 | #if PAGE_SIZE != 4096 |
1c53e0d2 | 145 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
146 | { |
147 | return bvec->bv_len != PAGE_SIZE; | |
148 | } | |
1f7319c7 MK |
149 | #else |
150 | static inline bool is_partial_io(struct bio_vec *bvec) | |
151 | { | |
152 | return false; | |
153 | } | |
154 | #endif | |
522698d7 SS |
155 | |
156 | /* | |
157 | * Check if request is within bounds and aligned on zram logical blocks. | |
158 | */ | |
1c53e0d2 | 159 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
160 | sector_t start, unsigned int size) |
161 | { | |
162 | u64 end, bound; | |
163 | ||
164 | /* unaligned request */ | |
165 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 166 | return false; |
522698d7 | 167 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 168 | return false; |
522698d7 SS |
169 | |
170 | end = start + (size >> SECTOR_SHIFT); | |
171 | bound = zram->disksize >> SECTOR_SHIFT; | |
172 | /* out of range range */ | |
173 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 174 | return false; |
522698d7 SS |
175 | |
176 | /* I/O request is valid */ | |
1c53e0d2 | 177 | return true; |
522698d7 SS |
178 | } |
179 | ||
180 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
181 | { | |
e86942c7 | 182 | *index += (*offset + bvec->bv_len) / PAGE_SIZE; |
522698d7 SS |
183 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; |
184 | } | |
185 | ||
186 | static inline void update_used_max(struct zram *zram, | |
187 | const unsigned long pages) | |
188 | { | |
189 | unsigned long old_max, cur_max; | |
190 | ||
191 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
192 | ||
193 | do { | |
194 | cur_max = old_max; | |
195 | if (pages > cur_max) | |
196 | old_max = atomic_long_cmpxchg( | |
197 | &zram->stats.max_used_pages, cur_max, pages); | |
198 | } while (old_max != cur_max); | |
199 | } | |
200 | ||
48ad1abe | 201 | static inline void zram_fill_page(void *ptr, unsigned long len, |
8e19d540 | 202 | unsigned long value) |
203 | { | |
8e19d540 | 204 | WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); |
48ad1abe | 205 | memset_l(ptr, value, len / sizeof(unsigned long)); |
8e19d540 | 206 | } |
207 | ||
208 | static bool page_same_filled(void *ptr, unsigned long *element) | |
522698d7 SS |
209 | { |
210 | unsigned int pos; | |
211 | unsigned long *page; | |
f0fe9984 | 212 | unsigned long val; |
522698d7 SS |
213 | |
214 | page = (unsigned long *)ptr; | |
f0fe9984 | 215 | val = page[0]; |
522698d7 | 216 | |
f0fe9984 SP |
217 | for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { |
218 | if (val != page[pos]) | |
1c53e0d2 | 219 | return false; |
522698d7 SS |
220 | } |
221 | ||
f0fe9984 | 222 | *element = val; |
8e19d540 | 223 | |
1c53e0d2 | 224 | return true; |
522698d7 SS |
225 | } |
226 | ||
9b3bb7ab SS |
227 | static ssize_t initstate_show(struct device *dev, |
228 | struct device_attribute *attr, char *buf) | |
229 | { | |
a68eb3b6 | 230 | u32 val; |
9b3bb7ab SS |
231 | struct zram *zram = dev_to_zram(dev); |
232 | ||
a68eb3b6 SS |
233 | down_read(&zram->init_lock); |
234 | val = init_done(zram); | |
235 | up_read(&zram->init_lock); | |
9b3bb7ab | 236 | |
56b4e8cb | 237 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
238 | } |
239 | ||
522698d7 SS |
240 | static ssize_t disksize_show(struct device *dev, |
241 | struct device_attribute *attr, char *buf) | |
242 | { | |
243 | struct zram *zram = dev_to_zram(dev); | |
244 | ||
245 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
246 | } | |
247 | ||
9ada9da9 MK |
248 | static ssize_t mem_limit_store(struct device *dev, |
249 | struct device_attribute *attr, const char *buf, size_t len) | |
250 | { | |
251 | u64 limit; | |
252 | char *tmp; | |
253 | struct zram *zram = dev_to_zram(dev); | |
254 | ||
255 | limit = memparse(buf, &tmp); | |
256 | if (buf == tmp) /* no chars parsed, invalid input */ | |
257 | return -EINVAL; | |
258 | ||
259 | down_write(&zram->init_lock); | |
260 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
261 | up_write(&zram->init_lock); | |
262 | ||
263 | return len; | |
264 | } | |
265 | ||
461a8eee MK |
266 | static ssize_t mem_used_max_store(struct device *dev, |
267 | struct device_attribute *attr, const char *buf, size_t len) | |
268 | { | |
269 | int err; | |
270 | unsigned long val; | |
271 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
272 | |
273 | err = kstrtoul(buf, 10, &val); | |
274 | if (err || val != 0) | |
275 | return -EINVAL; | |
276 | ||
277 | down_read(&zram->init_lock); | |
5a99e95b | 278 | if (init_done(zram)) { |
461a8eee | 279 | atomic_long_set(&zram->stats.max_used_pages, |
beb6602c | 280 | zs_get_total_pages(zram->mem_pool)); |
5a99e95b | 281 | } |
461a8eee MK |
282 | up_read(&zram->init_lock); |
283 | ||
284 | return len; | |
285 | } | |
286 | ||
e82592c4 MK |
287 | static ssize_t idle_store(struct device *dev, |
288 | struct device_attribute *attr, const char *buf, size_t len) | |
289 | { | |
290 | struct zram *zram = dev_to_zram(dev); | |
291 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
292 | int index; | |
293 | char mode_buf[8]; | |
294 | ssize_t sz; | |
295 | ||
296 | sz = strscpy(mode_buf, buf, sizeof(mode_buf)); | |
297 | if (sz <= 0) | |
298 | return -EINVAL; | |
299 | ||
300 | /* ignore trailing new line */ | |
301 | if (mode_buf[sz - 1] == '\n') | |
302 | mode_buf[sz - 1] = 0x00; | |
303 | ||
304 | if (strcmp(mode_buf, "all")) | |
305 | return -EINVAL; | |
306 | ||
307 | down_read(&zram->init_lock); | |
308 | if (!init_done(zram)) { | |
309 | up_read(&zram->init_lock); | |
310 | return -EINVAL; | |
311 | } | |
312 | ||
313 | for (index = 0; index < nr_pages; index++) { | |
a939888e MK |
314 | /* |
315 | * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. | |
316 | * See the comment in writeback_store. | |
317 | */ | |
e82592c4 | 318 | zram_slot_lock(zram, index); |
1d69a3f8 MK |
319 | if (zram_allocated(zram, index) && |
320 | !zram_test_flag(zram, index, ZRAM_UNDER_WB)) | |
321 | zram_set_flag(zram, index, ZRAM_IDLE); | |
e82592c4 MK |
322 | zram_slot_unlock(zram, index); |
323 | } | |
324 | ||
325 | up_read(&zram->init_lock); | |
326 | ||
327 | return len; | |
328 | } | |
329 | ||
013bf95a | 330 | #ifdef CONFIG_ZRAM_WRITEBACK |
1d69a3f8 MK |
331 | static ssize_t writeback_limit_enable_store(struct device *dev, |
332 | struct device_attribute *attr, const char *buf, size_t len) | |
333 | { | |
334 | struct zram *zram = dev_to_zram(dev); | |
335 | u64 val; | |
336 | ssize_t ret = -EINVAL; | |
337 | ||
338 | if (kstrtoull(buf, 10, &val)) | |
339 | return ret; | |
340 | ||
341 | down_read(&zram->init_lock); | |
342 | spin_lock(&zram->wb_limit_lock); | |
343 | zram->wb_limit_enable = val; | |
344 | spin_unlock(&zram->wb_limit_lock); | |
345 | up_read(&zram->init_lock); | |
346 | ret = len; | |
347 | ||
348 | return ret; | |
349 | } | |
350 | ||
351 | static ssize_t writeback_limit_enable_show(struct device *dev, | |
352 | struct device_attribute *attr, char *buf) | |
353 | { | |
354 | bool val; | |
355 | struct zram *zram = dev_to_zram(dev); | |
356 | ||
357 | down_read(&zram->init_lock); | |
358 | spin_lock(&zram->wb_limit_lock); | |
359 | val = zram->wb_limit_enable; | |
360 | spin_unlock(&zram->wb_limit_lock); | |
361 | up_read(&zram->init_lock); | |
362 | ||
363 | return scnprintf(buf, PAGE_SIZE, "%d\n", val); | |
364 | } | |
365 | ||
bb416d18 MK |
366 | static ssize_t writeback_limit_store(struct device *dev, |
367 | struct device_attribute *attr, const char *buf, size_t len) | |
368 | { | |
369 | struct zram *zram = dev_to_zram(dev); | |
370 | u64 val; | |
371 | ssize_t ret = -EINVAL; | |
372 | ||
373 | if (kstrtoull(buf, 10, &val)) | |
374 | return ret; | |
375 | ||
376 | down_read(&zram->init_lock); | |
1d69a3f8 MK |
377 | spin_lock(&zram->wb_limit_lock); |
378 | zram->bd_wb_limit = val; | |
379 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
380 | up_read(&zram->init_lock); |
381 | ret = len; | |
382 | ||
383 | return ret; | |
384 | } | |
385 | ||
386 | static ssize_t writeback_limit_show(struct device *dev, | |
387 | struct device_attribute *attr, char *buf) | |
388 | { | |
389 | u64 val; | |
390 | struct zram *zram = dev_to_zram(dev); | |
391 | ||
392 | down_read(&zram->init_lock); | |
1d69a3f8 MK |
393 | spin_lock(&zram->wb_limit_lock); |
394 | val = zram->bd_wb_limit; | |
395 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
396 | up_read(&zram->init_lock); |
397 | ||
398 | return scnprintf(buf, PAGE_SIZE, "%llu\n", val); | |
399 | } | |
400 | ||
013bf95a MK |
401 | static void reset_bdev(struct zram *zram) |
402 | { | |
403 | struct block_device *bdev; | |
404 | ||
7e529283 | 405 | if (!zram->backing_dev) |
013bf95a MK |
406 | return; |
407 | ||
408 | bdev = zram->bdev; | |
409 | if (zram->old_block_size) | |
410 | set_blocksize(bdev, zram->old_block_size); | |
411 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); | |
412 | /* hope filp_close flush all of IO */ | |
413 | filp_close(zram->backing_dev, NULL); | |
414 | zram->backing_dev = NULL; | |
415 | zram->old_block_size = 0; | |
416 | zram->bdev = NULL; | |
4f7a7bea MK |
417 | zram->disk->queue->backing_dev_info->capabilities |= |
418 | BDI_CAP_SYNCHRONOUS_IO; | |
1363d466 MK |
419 | kvfree(zram->bitmap); |
420 | zram->bitmap = NULL; | |
013bf95a MK |
421 | } |
422 | ||
423 | static ssize_t backing_dev_show(struct device *dev, | |
424 | struct device_attribute *attr, char *buf) | |
425 | { | |
426 | struct zram *zram = dev_to_zram(dev); | |
427 | struct file *file = zram->backing_dev; | |
428 | char *p; | |
429 | ssize_t ret; | |
430 | ||
431 | down_read(&zram->init_lock); | |
7e529283 | 432 | if (!zram->backing_dev) { |
013bf95a MK |
433 | memcpy(buf, "none\n", 5); |
434 | up_read(&zram->init_lock); | |
435 | return 5; | |
436 | } | |
437 | ||
438 | p = file_path(file, buf, PAGE_SIZE - 1); | |
439 | if (IS_ERR(p)) { | |
440 | ret = PTR_ERR(p); | |
441 | goto out; | |
442 | } | |
443 | ||
444 | ret = strlen(p); | |
445 | memmove(buf, p, ret); | |
446 | buf[ret++] = '\n'; | |
447 | out: | |
448 | up_read(&zram->init_lock); | |
449 | return ret; | |
450 | } | |
451 | ||
452 | static ssize_t backing_dev_store(struct device *dev, | |
453 | struct device_attribute *attr, const char *buf, size_t len) | |
454 | { | |
455 | char *file_name; | |
c8bd134a | 456 | size_t sz; |
013bf95a MK |
457 | struct file *backing_dev = NULL; |
458 | struct inode *inode; | |
459 | struct address_space *mapping; | |
1363d466 MK |
460 | unsigned int bitmap_sz, old_block_size = 0; |
461 | unsigned long nr_pages, *bitmap = NULL; | |
013bf95a MK |
462 | struct block_device *bdev = NULL; |
463 | int err; | |
464 | struct zram *zram = dev_to_zram(dev); | |
465 | ||
466 | file_name = kmalloc(PATH_MAX, GFP_KERNEL); | |
467 | if (!file_name) | |
468 | return -ENOMEM; | |
469 | ||
470 | down_write(&zram->init_lock); | |
471 | if (init_done(zram)) { | |
472 | pr_info("Can't setup backing device for initialized device\n"); | |
473 | err = -EBUSY; | |
474 | goto out; | |
475 | } | |
476 | ||
c8bd134a PK |
477 | strlcpy(file_name, buf, PATH_MAX); |
478 | /* ignore trailing newline */ | |
479 | sz = strlen(file_name); | |
480 | if (sz > 0 && file_name[sz - 1] == '\n') | |
481 | file_name[sz - 1] = 0x00; | |
013bf95a MK |
482 | |
483 | backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); | |
484 | if (IS_ERR(backing_dev)) { | |
485 | err = PTR_ERR(backing_dev); | |
486 | backing_dev = NULL; | |
487 | goto out; | |
488 | } | |
489 | ||
490 | mapping = backing_dev->f_mapping; | |
491 | inode = mapping->host; | |
492 | ||
493 | /* Support only block device in this moment */ | |
494 | if (!S_ISBLK(inode->i_mode)) { | |
495 | err = -ENOTBLK; | |
496 | goto out; | |
497 | } | |
498 | ||
499 | bdev = bdgrab(I_BDEV(inode)); | |
500 | err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); | |
5547932d MK |
501 | if (err < 0) { |
502 | bdev = NULL; | |
013bf95a | 503 | goto out; |
5547932d | 504 | } |
013bf95a | 505 | |
1363d466 MK |
506 | nr_pages = i_size_read(inode) >> PAGE_SHIFT; |
507 | bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); | |
508 | bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); | |
509 | if (!bitmap) { | |
510 | err = -ENOMEM; | |
511 | goto out; | |
512 | } | |
513 | ||
013bf95a MK |
514 | old_block_size = block_size(bdev); |
515 | err = set_blocksize(bdev, PAGE_SIZE); | |
516 | if (err) | |
517 | goto out; | |
518 | ||
519 | reset_bdev(zram); | |
520 | ||
521 | zram->old_block_size = old_block_size; | |
522 | zram->bdev = bdev; | |
523 | zram->backing_dev = backing_dev; | |
1363d466 MK |
524 | zram->bitmap = bitmap; |
525 | zram->nr_pages = nr_pages; | |
4f7a7bea MK |
526 | /* |
527 | * With writeback feature, zram does asynchronous IO so it's no longer | |
528 | * synchronous device so let's remove synchronous io flag. Othewise, | |
529 | * upper layer(e.g., swap) could wait IO completion rather than | |
530 | * (submit and return), which will cause system sluggish. | |
531 | * Furthermore, when the IO function returns(e.g., swap_readpage), | |
532 | * upper layer expects IO was done so it could deallocate the page | |
533 | * freely but in fact, IO is going on so finally could cause | |
534 | * use-after-free when the IO is really done. | |
535 | */ | |
536 | zram->disk->queue->backing_dev_info->capabilities &= | |
537 | ~BDI_CAP_SYNCHRONOUS_IO; | |
013bf95a MK |
538 | up_write(&zram->init_lock); |
539 | ||
540 | pr_info("setup backing device %s\n", file_name); | |
541 | kfree(file_name); | |
542 | ||
543 | return len; | |
544 | out: | |
1363d466 MK |
545 | if (bitmap) |
546 | kvfree(bitmap); | |
547 | ||
013bf95a MK |
548 | if (bdev) |
549 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
550 | ||
551 | if (backing_dev) | |
552 | filp_close(backing_dev, NULL); | |
553 | ||
554 | up_write(&zram->init_lock); | |
555 | ||
556 | kfree(file_name); | |
557 | ||
558 | return err; | |
559 | } | |
560 | ||
7e529283 | 561 | static unsigned long alloc_block_bdev(struct zram *zram) |
1363d466 | 562 | { |
3c9959e0 MK |
563 | unsigned long blk_idx = 1; |
564 | retry: | |
1363d466 | 565 | /* skip 0 bit to confuse zram.handle = 0 */ |
3c9959e0 MK |
566 | blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); |
567 | if (blk_idx == zram->nr_pages) | |
1363d466 | 568 | return 0; |
1363d466 | 569 | |
3c9959e0 MK |
570 | if (test_and_set_bit(blk_idx, zram->bitmap)) |
571 | goto retry; | |
1363d466 | 572 | |
23eddf39 | 573 | atomic64_inc(&zram->stats.bd_count); |
3c9959e0 | 574 | return blk_idx; |
1363d466 MK |
575 | } |
576 | ||
7e529283 | 577 | static void free_block_bdev(struct zram *zram, unsigned long blk_idx) |
1363d466 MK |
578 | { |
579 | int was_set; | |
580 | ||
7e529283 | 581 | was_set = test_and_clear_bit(blk_idx, zram->bitmap); |
1363d466 | 582 | WARN_ON_ONCE(!was_set); |
23eddf39 | 583 | atomic64_dec(&zram->stats.bd_count); |
1363d466 MK |
584 | } |
585 | ||
384bc41f | 586 | static void zram_page_end_io(struct bio *bio) |
db8ffbd4 | 587 | { |
263663cd | 588 | struct page *page = bio_first_page_all(bio); |
db8ffbd4 MK |
589 | |
590 | page_endio(page, op_is_write(bio_op(bio)), | |
591 | blk_status_to_errno(bio->bi_status)); | |
592 | bio_put(bio); | |
593 | } | |
594 | ||
8e654f8f MK |
595 | /* |
596 | * Returns 1 if the submission is successful. | |
597 | */ | |
598 | static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, | |
599 | unsigned long entry, struct bio *parent) | |
600 | { | |
601 | struct bio *bio; | |
602 | ||
603 | bio = bio_alloc(GFP_ATOMIC, 1); | |
604 | if (!bio) | |
605 | return -ENOMEM; | |
606 | ||
607 | bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); | |
a0725ab0 | 608 | bio_set_dev(bio, zram->bdev); |
8e654f8f MK |
609 | if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { |
610 | bio_put(bio); | |
611 | return -EIO; | |
612 | } | |
613 | ||
614 | if (!parent) { | |
615 | bio->bi_opf = REQ_OP_READ; | |
616 | bio->bi_end_io = zram_page_end_io; | |
617 | } else { | |
618 | bio->bi_opf = parent->bi_opf; | |
619 | bio_chain(bio, parent); | |
620 | } | |
621 | ||
622 | submit_bio(bio); | |
623 | return 1; | |
624 | } | |
625 | ||
1d69a3f8 MK |
626 | #define HUGE_WRITEBACK 1 |
627 | #define IDLE_WRITEBACK 2 | |
a939888e MK |
628 | |
629 | static ssize_t writeback_store(struct device *dev, | |
630 | struct device_attribute *attr, const char *buf, size_t len) | |
631 | { | |
632 | struct zram *zram = dev_to_zram(dev); | |
633 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
634 | unsigned long index; | |
635 | struct bio bio; | |
636 | struct bio_vec bio_vec; | |
637 | struct page *page; | |
638 | ssize_t ret, sz; | |
639 | char mode_buf[8]; | |
1d69a3f8 | 640 | int mode = -1; |
a939888e MK |
641 | unsigned long blk_idx = 0; |
642 | ||
643 | sz = strscpy(mode_buf, buf, sizeof(mode_buf)); | |
644 | if (sz <= 0) | |
645 | return -EINVAL; | |
646 | ||
647 | /* ignore trailing newline */ | |
648 | if (mode_buf[sz - 1] == '\n') | |
649 | mode_buf[sz - 1] = 0x00; | |
650 | ||
651 | if (!strcmp(mode_buf, "idle")) | |
652 | mode = IDLE_WRITEBACK; | |
653 | else if (!strcmp(mode_buf, "huge")) | |
654 | mode = HUGE_WRITEBACK; | |
655 | ||
1d69a3f8 | 656 | if (mode == -1) |
a939888e MK |
657 | return -EINVAL; |
658 | ||
659 | down_read(&zram->init_lock); | |
660 | if (!init_done(zram)) { | |
661 | ret = -EINVAL; | |
662 | goto release_init_lock; | |
663 | } | |
664 | ||
665 | if (!zram->backing_dev) { | |
666 | ret = -ENODEV; | |
667 | goto release_init_lock; | |
668 | } | |
669 | ||
670 | page = alloc_page(GFP_KERNEL); | |
671 | if (!page) { | |
672 | ret = -ENOMEM; | |
673 | goto release_init_lock; | |
674 | } | |
675 | ||
676 | for (index = 0; index < nr_pages; index++) { | |
677 | struct bio_vec bvec; | |
678 | ||
679 | bvec.bv_page = page; | |
680 | bvec.bv_len = PAGE_SIZE; | |
681 | bvec.bv_offset = 0; | |
682 | ||
1d69a3f8 MK |
683 | spin_lock(&zram->wb_limit_lock); |
684 | if (zram->wb_limit_enable && !zram->bd_wb_limit) { | |
685 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
686 | ret = -EIO; |
687 | break; | |
688 | } | |
1d69a3f8 | 689 | spin_unlock(&zram->wb_limit_lock); |
bb416d18 | 690 | |
a939888e MK |
691 | if (!blk_idx) { |
692 | blk_idx = alloc_block_bdev(zram); | |
693 | if (!blk_idx) { | |
694 | ret = -ENOSPC; | |
695 | break; | |
696 | } | |
697 | } | |
698 | ||
699 | zram_slot_lock(zram, index); | |
700 | if (!zram_allocated(zram, index)) | |
701 | goto next; | |
702 | ||
703 | if (zram_test_flag(zram, index, ZRAM_WB) || | |
704 | zram_test_flag(zram, index, ZRAM_SAME) || | |
705 | zram_test_flag(zram, index, ZRAM_UNDER_WB)) | |
706 | goto next; | |
707 | ||
1d69a3f8 MK |
708 | if (mode == IDLE_WRITEBACK && |
709 | !zram_test_flag(zram, index, ZRAM_IDLE)) | |
710 | goto next; | |
711 | if (mode == HUGE_WRITEBACK && | |
712 | !zram_test_flag(zram, index, ZRAM_HUGE)) | |
a939888e MK |
713 | goto next; |
714 | /* | |
715 | * Clearing ZRAM_UNDER_WB is duty of caller. | |
716 | * IOW, zram_free_page never clear it. | |
717 | */ | |
718 | zram_set_flag(zram, index, ZRAM_UNDER_WB); | |
719 | /* Need for hugepage writeback racing */ | |
720 | zram_set_flag(zram, index, ZRAM_IDLE); | |
721 | zram_slot_unlock(zram, index); | |
722 | if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { | |
723 | zram_slot_lock(zram, index); | |
724 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
725 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
726 | zram_slot_unlock(zram, index); | |
727 | continue; | |
728 | } | |
729 | ||
730 | bio_init(&bio, &bio_vec, 1); | |
731 | bio_set_dev(&bio, zram->bdev); | |
732 | bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); | |
733 | bio.bi_opf = REQ_OP_WRITE | REQ_SYNC; | |
734 | ||
735 | bio_add_page(&bio, bvec.bv_page, bvec.bv_len, | |
736 | bvec.bv_offset); | |
737 | /* | |
738 | * XXX: A single page IO would be inefficient for write | |
739 | * but it would be not bad as starter. | |
740 | */ | |
741 | ret = submit_bio_wait(&bio); | |
742 | if (ret) { | |
743 | zram_slot_lock(zram, index); | |
744 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
745 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
746 | zram_slot_unlock(zram, index); | |
747 | continue; | |
748 | } | |
749 | ||
23eddf39 | 750 | atomic64_inc(&zram->stats.bd_writes); |
a939888e MK |
751 | /* |
752 | * We released zram_slot_lock so need to check if the slot was | |
753 | * changed. If there is freeing for the slot, we can catch it | |
754 | * easily by zram_allocated. | |
755 | * A subtle case is the slot is freed/reallocated/marked as | |
756 | * ZRAM_IDLE again. To close the race, idle_store doesn't | |
757 | * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. | |
758 | * Thus, we could close the race by checking ZRAM_IDLE bit. | |
759 | */ | |
760 | zram_slot_lock(zram, index); | |
761 | if (!zram_allocated(zram, index) || | |
762 | !zram_test_flag(zram, index, ZRAM_IDLE)) { | |
763 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
764 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
765 | goto next; | |
766 | } | |
767 | ||
768 | zram_free_page(zram, index); | |
769 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
770 | zram_set_flag(zram, index, ZRAM_WB); | |
771 | zram_set_element(zram, index, blk_idx); | |
772 | blk_idx = 0; | |
773 | atomic64_inc(&zram->stats.pages_stored); | |
1d69a3f8 MK |
774 | spin_lock(&zram->wb_limit_lock); |
775 | if (zram->wb_limit_enable && zram->bd_wb_limit > 0) | |
776 | zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); | |
777 | spin_unlock(&zram->wb_limit_lock); | |
a939888e MK |
778 | next: |
779 | zram_slot_unlock(zram, index); | |
780 | } | |
781 | ||
782 | if (blk_idx) | |
783 | free_block_bdev(zram, blk_idx); | |
784 | ret = len; | |
785 | __free_page(page); | |
786 | release_init_lock: | |
787 | up_read(&zram->init_lock); | |
788 | ||
789 | return ret; | |
790 | } | |
791 | ||
8e654f8f MK |
792 | struct zram_work { |
793 | struct work_struct work; | |
794 | struct zram *zram; | |
795 | unsigned long entry; | |
796 | struct bio *bio; | |
797 | }; | |
798 | ||
799 | #if PAGE_SIZE != 4096 | |
800 | static void zram_sync_read(struct work_struct *work) | |
801 | { | |
802 | struct bio_vec bvec; | |
803 | struct zram_work *zw = container_of(work, struct zram_work, work); | |
804 | struct zram *zram = zw->zram; | |
805 | unsigned long entry = zw->entry; | |
806 | struct bio *bio = zw->bio; | |
807 | ||
808 | read_from_bdev_async(zram, &bvec, entry, bio); | |
809 | } | |
810 | ||
811 | /* | |
812 | * Block layer want one ->make_request_fn to be active at a time | |
813 | * so if we use chained IO with parent IO in same context, | |
814 | * it's a deadlock. To avoid, it, it uses worker thread context. | |
815 | */ | |
816 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
817 | unsigned long entry, struct bio *bio) | |
818 | { | |
819 | struct zram_work work; | |
820 | ||
821 | work.zram = zram; | |
822 | work.entry = entry; | |
823 | work.bio = bio; | |
824 | ||
825 | INIT_WORK_ONSTACK(&work.work, zram_sync_read); | |
826 | queue_work(system_unbound_wq, &work.work); | |
827 | flush_work(&work.work); | |
828 | destroy_work_on_stack(&work.work); | |
829 | ||
830 | return 1; | |
831 | } | |
832 | #else | |
833 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
834 | unsigned long entry, struct bio *bio) | |
835 | { | |
836 | WARN_ON(1); | |
837 | return -EIO; | |
838 | } | |
839 | #endif | |
840 | ||
841 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, | |
842 | unsigned long entry, struct bio *parent, bool sync) | |
843 | { | |
23eddf39 | 844 | atomic64_inc(&zram->stats.bd_reads); |
8e654f8f MK |
845 | if (sync) |
846 | return read_from_bdev_sync(zram, bvec, entry, parent); | |
847 | else | |
848 | return read_from_bdev_async(zram, bvec, entry, parent); | |
849 | } | |
013bf95a | 850 | #else |
013bf95a | 851 | static inline void reset_bdev(struct zram *zram) {}; |
8e654f8f MK |
852 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, |
853 | unsigned long entry, struct bio *parent, bool sync) | |
854 | { | |
855 | return -EIO; | |
856 | } | |
7e529283 MK |
857 | |
858 | static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; | |
013bf95a MK |
859 | #endif |
860 | ||
c0265342 MK |
861 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
862 | ||
863 | static struct dentry *zram_debugfs_root; | |
864 | ||
865 | static void zram_debugfs_create(void) | |
866 | { | |
867 | zram_debugfs_root = debugfs_create_dir("zram", NULL); | |
868 | } | |
869 | ||
870 | static void zram_debugfs_destroy(void) | |
871 | { | |
872 | debugfs_remove_recursive(zram_debugfs_root); | |
873 | } | |
874 | ||
875 | static void zram_accessed(struct zram *zram, u32 index) | |
876 | { | |
e82592c4 | 877 | zram_clear_flag(zram, index, ZRAM_IDLE); |
c0265342 MK |
878 | zram->table[index].ac_time = ktime_get_boottime(); |
879 | } | |
880 | ||
c0265342 MK |
881 | static ssize_t read_block_state(struct file *file, char __user *buf, |
882 | size_t count, loff_t *ppos) | |
883 | { | |
884 | char *kbuf; | |
885 | ssize_t index, written = 0; | |
886 | struct zram *zram = file->private_data; | |
887 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
888 | struct timespec64 ts; | |
889 | ||
890 | kbuf = kvmalloc(count, GFP_KERNEL); | |
891 | if (!kbuf) | |
892 | return -ENOMEM; | |
893 | ||
894 | down_read(&zram->init_lock); | |
895 | if (!init_done(zram)) { | |
896 | up_read(&zram->init_lock); | |
897 | kvfree(kbuf); | |
898 | return -EINVAL; | |
899 | } | |
900 | ||
901 | for (index = *ppos; index < nr_pages; index++) { | |
902 | int copied; | |
903 | ||
904 | zram_slot_lock(zram, index); | |
905 | if (!zram_allocated(zram, index)) | |
906 | goto next; | |
907 | ||
908 | ts = ktime_to_timespec64(zram->table[index].ac_time); | |
909 | copied = snprintf(kbuf + written, count, | |
e82592c4 | 910 | "%12zd %12lld.%06lu %c%c%c%c\n", |
c0265342 MK |
911 | index, (s64)ts.tv_sec, |
912 | ts.tv_nsec / NSEC_PER_USEC, | |
913 | zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', | |
914 | zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', | |
e82592c4 MK |
915 | zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', |
916 | zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); | |
c0265342 MK |
917 | |
918 | if (count < copied) { | |
919 | zram_slot_unlock(zram, index); | |
920 | break; | |
921 | } | |
922 | written += copied; | |
923 | count -= copied; | |
924 | next: | |
925 | zram_slot_unlock(zram, index); | |
926 | *ppos += 1; | |
927 | } | |
928 | ||
929 | up_read(&zram->init_lock); | |
930 | if (copy_to_user(buf, kbuf, written)) | |
931 | written = -EFAULT; | |
932 | kvfree(kbuf); | |
933 | ||
934 | return written; | |
935 | } | |
936 | ||
937 | static const struct file_operations proc_zram_block_state_op = { | |
938 | .open = simple_open, | |
939 | .read = read_block_state, | |
940 | .llseek = default_llseek, | |
941 | }; | |
942 | ||
943 | static void zram_debugfs_register(struct zram *zram) | |
944 | { | |
945 | if (!zram_debugfs_root) | |
946 | return; | |
947 | ||
948 | zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, | |
949 | zram_debugfs_root); | |
950 | debugfs_create_file("block_state", 0400, zram->debugfs_dir, | |
951 | zram, &proc_zram_block_state_op); | |
952 | } | |
953 | ||
954 | static void zram_debugfs_unregister(struct zram *zram) | |
955 | { | |
956 | debugfs_remove_recursive(zram->debugfs_dir); | |
957 | } | |
958 | #else | |
959 | static void zram_debugfs_create(void) {}; | |
960 | static void zram_debugfs_destroy(void) {}; | |
e82592c4 MK |
961 | static void zram_accessed(struct zram *zram, u32 index) |
962 | { | |
963 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
964 | }; | |
c0265342 MK |
965 | static void zram_debugfs_register(struct zram *zram) {}; |
966 | static void zram_debugfs_unregister(struct zram *zram) {}; | |
967 | #endif | |
013bf95a | 968 | |
43209ea2 SS |
969 | /* |
970 | * We switched to per-cpu streams and this attr is not needed anymore. | |
971 | * However, we will keep it around for some time, because: | |
972 | * a) we may revert per-cpu streams in the future | |
973 | * b) it's visible to user space and we need to follow our 2 years | |
974 | * retirement rule; but we already have a number of 'soon to be | |
975 | * altered' attrs, so max_comp_streams need to wait for the next | |
976 | * layoff cycle. | |
977 | */ | |
522698d7 SS |
978 | static ssize_t max_comp_streams_show(struct device *dev, |
979 | struct device_attribute *attr, char *buf) | |
980 | { | |
43209ea2 | 981 | return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); |
522698d7 SS |
982 | } |
983 | ||
beca3ec7 SS |
984 | static ssize_t max_comp_streams_store(struct device *dev, |
985 | struct device_attribute *attr, const char *buf, size_t len) | |
986 | { | |
43209ea2 | 987 | return len; |
beca3ec7 SS |
988 | } |
989 | ||
e46b8a03 SS |
990 | static ssize_t comp_algorithm_show(struct device *dev, |
991 | struct device_attribute *attr, char *buf) | |
992 | { | |
993 | size_t sz; | |
994 | struct zram *zram = dev_to_zram(dev); | |
995 | ||
996 | down_read(&zram->init_lock); | |
997 | sz = zcomp_available_show(zram->compressor, buf); | |
998 | up_read(&zram->init_lock); | |
999 | ||
1000 | return sz; | |
1001 | } | |
1002 | ||
1003 | static ssize_t comp_algorithm_store(struct device *dev, | |
1004 | struct device_attribute *attr, const char *buf, size_t len) | |
1005 | { | |
1006 | struct zram *zram = dev_to_zram(dev); | |
f357e345 | 1007 | char compressor[ARRAY_SIZE(zram->compressor)]; |
4bbacd51 SS |
1008 | size_t sz; |
1009 | ||
415403be SS |
1010 | strlcpy(compressor, buf, sizeof(compressor)); |
1011 | /* ignore trailing newline */ | |
1012 | sz = strlen(compressor); | |
1013 | if (sz > 0 && compressor[sz - 1] == '\n') | |
1014 | compressor[sz - 1] = 0x00; | |
1015 | ||
1016 | if (!zcomp_available_algorithm(compressor)) | |
1d5b43bf LH |
1017 | return -EINVAL; |
1018 | ||
e46b8a03 SS |
1019 | down_write(&zram->init_lock); |
1020 | if (init_done(zram)) { | |
1021 | up_write(&zram->init_lock); | |
1022 | pr_info("Can't change algorithm for initialized device\n"); | |
1023 | return -EBUSY; | |
1024 | } | |
4bbacd51 | 1025 | |
f357e345 | 1026 | strcpy(zram->compressor, compressor); |
e46b8a03 SS |
1027 | up_write(&zram->init_lock); |
1028 | return len; | |
1029 | } | |
1030 | ||
522698d7 SS |
1031 | static ssize_t compact_store(struct device *dev, |
1032 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 1033 | { |
522698d7 | 1034 | struct zram *zram = dev_to_zram(dev); |
306b0c95 | 1035 | |
522698d7 SS |
1036 | down_read(&zram->init_lock); |
1037 | if (!init_done(zram)) { | |
1038 | up_read(&zram->init_lock); | |
1039 | return -EINVAL; | |
1040 | } | |
306b0c95 | 1041 | |
beb6602c | 1042 | zs_compact(zram->mem_pool); |
522698d7 | 1043 | up_read(&zram->init_lock); |
d2d5e762 | 1044 | |
522698d7 | 1045 | return len; |
d2d5e762 WY |
1046 | } |
1047 | ||
522698d7 SS |
1048 | static ssize_t io_stat_show(struct device *dev, |
1049 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 1050 | { |
522698d7 SS |
1051 | struct zram *zram = dev_to_zram(dev); |
1052 | ssize_t ret; | |
d2d5e762 | 1053 | |
522698d7 SS |
1054 | down_read(&zram->init_lock); |
1055 | ret = scnprintf(buf, PAGE_SIZE, | |
1056 | "%8llu %8llu %8llu %8llu\n", | |
1057 | (u64)atomic64_read(&zram->stats.failed_reads), | |
1058 | (u64)atomic64_read(&zram->stats.failed_writes), | |
1059 | (u64)atomic64_read(&zram->stats.invalid_io), | |
1060 | (u64)atomic64_read(&zram->stats.notify_free)); | |
1061 | up_read(&zram->init_lock); | |
306b0c95 | 1062 | |
522698d7 | 1063 | return ret; |
9b3bb7ab SS |
1064 | } |
1065 | ||
522698d7 SS |
1066 | static ssize_t mm_stat_show(struct device *dev, |
1067 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 1068 | { |
522698d7 | 1069 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 1070 | struct zs_pool_stats pool_stats; |
522698d7 SS |
1071 | u64 orig_size, mem_used = 0; |
1072 | long max_used; | |
1073 | ssize_t ret; | |
a539c72a | 1074 | |
7d3f3938 SS |
1075 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
1076 | ||
522698d7 | 1077 | down_read(&zram->init_lock); |
7d3f3938 | 1078 | if (init_done(zram)) { |
beb6602c MK |
1079 | mem_used = zs_get_total_pages(zram->mem_pool); |
1080 | zs_pool_stats(zram->mem_pool, &pool_stats); | |
7d3f3938 | 1081 | } |
9b3bb7ab | 1082 | |
522698d7 SS |
1083 | orig_size = atomic64_read(&zram->stats.pages_stored); |
1084 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 1085 | |
522698d7 | 1086 | ret = scnprintf(buf, PAGE_SIZE, |
89e85bce | 1087 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n", |
522698d7 SS |
1088 | orig_size << PAGE_SHIFT, |
1089 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
1090 | mem_used << PAGE_SHIFT, | |
1091 | zram->limit_pages << PAGE_SHIFT, | |
1092 | max_used << PAGE_SHIFT, | |
8e19d540 | 1093 | (u64)atomic64_read(&zram->stats.same_pages), |
89e85bce MK |
1094 | pool_stats.pages_compacted, |
1095 | (u64)atomic64_read(&zram->stats.huge_pages)); | |
522698d7 | 1096 | up_read(&zram->init_lock); |
9b3bb7ab | 1097 | |
522698d7 SS |
1098 | return ret; |
1099 | } | |
1100 | ||
23eddf39 | 1101 | #ifdef CONFIG_ZRAM_WRITEBACK |
bb416d18 | 1102 | #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) |
23eddf39 MK |
1103 | static ssize_t bd_stat_show(struct device *dev, |
1104 | struct device_attribute *attr, char *buf) | |
1105 | { | |
1106 | struct zram *zram = dev_to_zram(dev); | |
1107 | ssize_t ret; | |
1108 | ||
1109 | down_read(&zram->init_lock); | |
1110 | ret = scnprintf(buf, PAGE_SIZE, | |
1111 | "%8llu %8llu %8llu\n", | |
bb416d18 MK |
1112 | FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), |
1113 | FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), | |
1114 | FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); | |
23eddf39 MK |
1115 | up_read(&zram->init_lock); |
1116 | ||
1117 | return ret; | |
1118 | } | |
1119 | #endif | |
1120 | ||
623e47fc SS |
1121 | static ssize_t debug_stat_show(struct device *dev, |
1122 | struct device_attribute *attr, char *buf) | |
1123 | { | |
1124 | int version = 1; | |
1125 | struct zram *zram = dev_to_zram(dev); | |
1126 | ssize_t ret; | |
1127 | ||
1128 | down_read(&zram->init_lock); | |
1129 | ret = scnprintf(buf, PAGE_SIZE, | |
3c9959e0 | 1130 | "version: %d\n%8llu %8llu\n", |
623e47fc | 1131 | version, |
3c9959e0 MK |
1132 | (u64)atomic64_read(&zram->stats.writestall), |
1133 | (u64)atomic64_read(&zram->stats.miss_free)); | |
623e47fc SS |
1134 | up_read(&zram->init_lock); |
1135 | ||
1136 | return ret; | |
1137 | } | |
1138 | ||
522698d7 SS |
1139 | static DEVICE_ATTR_RO(io_stat); |
1140 | static DEVICE_ATTR_RO(mm_stat); | |
23eddf39 MK |
1141 | #ifdef CONFIG_ZRAM_WRITEBACK |
1142 | static DEVICE_ATTR_RO(bd_stat); | |
1143 | #endif | |
623e47fc | 1144 | static DEVICE_ATTR_RO(debug_stat); |
522698d7 | 1145 | |
beb6602c | 1146 | static void zram_meta_free(struct zram *zram, u64 disksize) |
522698d7 SS |
1147 | { |
1148 | size_t num_pages = disksize >> PAGE_SHIFT; | |
1149 | size_t index; | |
1fec1172 GM |
1150 | |
1151 | /* Free all pages that are still in this zram device */ | |
302128dc MK |
1152 | for (index = 0; index < num_pages; index++) |
1153 | zram_free_page(zram, index); | |
1fec1172 | 1154 | |
beb6602c MK |
1155 | zs_destroy_pool(zram->mem_pool); |
1156 | vfree(zram->table); | |
9b3bb7ab SS |
1157 | } |
1158 | ||
beb6602c | 1159 | static bool zram_meta_alloc(struct zram *zram, u64 disksize) |
9b3bb7ab SS |
1160 | { |
1161 | size_t num_pages; | |
9b3bb7ab | 1162 | |
9b3bb7ab | 1163 | num_pages = disksize >> PAGE_SHIFT; |
fad953ce | 1164 | zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); |
beb6602c MK |
1165 | if (!zram->table) |
1166 | return false; | |
9b3bb7ab | 1167 | |
beb6602c MK |
1168 | zram->mem_pool = zs_create_pool(zram->disk->disk_name); |
1169 | if (!zram->mem_pool) { | |
1170 | vfree(zram->table); | |
1171 | return false; | |
9b3bb7ab SS |
1172 | } |
1173 | ||
60f5921a SS |
1174 | if (!huge_class_size) |
1175 | huge_class_size = zs_huge_class_size(zram->mem_pool); | |
beb6602c | 1176 | return true; |
9b3bb7ab SS |
1177 | } |
1178 | ||
d2d5e762 WY |
1179 | /* |
1180 | * To protect concurrent access to the same index entry, | |
1181 | * caller should hold this table index entry's bit_spinlock to | |
1182 | * indicate this index entry is accessing. | |
1183 | */ | |
f1e3cfff | 1184 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 1185 | { |
db8ffbd4 MK |
1186 | unsigned long handle; |
1187 | ||
7e529283 MK |
1188 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
1189 | zram->table[index].ac_time = 0; | |
1190 | #endif | |
e82592c4 MK |
1191 | if (zram_test_flag(zram, index, ZRAM_IDLE)) |
1192 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
1193 | ||
89e85bce MK |
1194 | if (zram_test_flag(zram, index, ZRAM_HUGE)) { |
1195 | zram_clear_flag(zram, index, ZRAM_HUGE); | |
1196 | atomic64_dec(&zram->stats.huge_pages); | |
1197 | } | |
1198 | ||
7e529283 MK |
1199 | if (zram_test_flag(zram, index, ZRAM_WB)) { |
1200 | zram_clear_flag(zram, index, ZRAM_WB); | |
1201 | free_block_bdev(zram, zram_get_element(zram, index)); | |
1202 | goto out; | |
db8ffbd4 | 1203 | } |
306b0c95 | 1204 | |
8e19d540 | 1205 | /* |
1206 | * No memory is allocated for same element filled pages. | |
1207 | * Simply clear same page flag. | |
1208 | */ | |
beb6602c MK |
1209 | if (zram_test_flag(zram, index, ZRAM_SAME)) { |
1210 | zram_clear_flag(zram, index, ZRAM_SAME); | |
8e19d540 | 1211 | atomic64_dec(&zram->stats.same_pages); |
7e529283 | 1212 | goto out; |
306b0c95 NG |
1213 | } |
1214 | ||
db8ffbd4 | 1215 | handle = zram_get_handle(zram, index); |
8e19d540 | 1216 | if (!handle) |
1217 | return; | |
1218 | ||
beb6602c | 1219 | zs_free(zram->mem_pool, handle); |
306b0c95 | 1220 | |
beb6602c | 1221 | atomic64_sub(zram_get_obj_size(zram, index), |
d2d5e762 | 1222 | &zram->stats.compr_data_size); |
7e529283 | 1223 | out: |
90a7806e | 1224 | atomic64_dec(&zram->stats.pages_stored); |
643ae61d | 1225 | zram_set_handle(zram, index, 0); |
beb6602c | 1226 | zram_set_obj_size(zram, index, 0); |
a939888e MK |
1227 | WARN_ON_ONCE(zram->table[index].flags & |
1228 | ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); | |
306b0c95 NG |
1229 | } |
1230 | ||
8e654f8f MK |
1231 | static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, |
1232 | struct bio *bio, bool partial_io) | |
306b0c95 | 1233 | { |
1f7319c7 | 1234 | int ret; |
92967471 | 1235 | unsigned long handle; |
ebaf9ab5 | 1236 | unsigned int size; |
1f7319c7 | 1237 | void *src, *dst; |
1f7319c7 | 1238 | |
7e529283 MK |
1239 | zram_slot_lock(zram, index); |
1240 | if (zram_test_flag(zram, index, ZRAM_WB)) { | |
1241 | struct bio_vec bvec; | |
8e654f8f | 1242 | |
8e654f8f | 1243 | zram_slot_unlock(zram, index); |
7e529283 MK |
1244 | |
1245 | bvec.bv_page = page; | |
1246 | bvec.bv_len = PAGE_SIZE; | |
1247 | bvec.bv_offset = 0; | |
1248 | return read_from_bdev(zram, &bvec, | |
1249 | zram_get_element(zram, index), | |
1250 | bio, partial_io); | |
8e654f8f MK |
1251 | } |
1252 | ||
643ae61d | 1253 | handle = zram_get_handle(zram, index); |
ae94264e MK |
1254 | if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { |
1255 | unsigned long value; | |
1256 | void *mem; | |
1257 | ||
1258 | value = handle ? zram_get_element(zram, index) : 0; | |
1259 | mem = kmap_atomic(page); | |
1260 | zram_fill_page(mem, PAGE_SIZE, value); | |
1261 | kunmap_atomic(mem); | |
1262 | zram_slot_unlock(zram, index); | |
1263 | return 0; | |
1264 | } | |
1265 | ||
beb6602c | 1266 | size = zram_get_obj_size(zram, index); |
306b0c95 | 1267 | |
beb6602c | 1268 | src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); |
ebaf9ab5 | 1269 | if (size == PAGE_SIZE) { |
1f7319c7 MK |
1270 | dst = kmap_atomic(page); |
1271 | memcpy(dst, src, PAGE_SIZE); | |
1272 | kunmap_atomic(dst); | |
1273 | ret = 0; | |
ebaf9ab5 SS |
1274 | } else { |
1275 | struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
1276 | ||
1f7319c7 MK |
1277 | dst = kmap_atomic(page); |
1278 | ret = zcomp_decompress(zstrm, src, size, dst); | |
1279 | kunmap_atomic(dst); | |
ebaf9ab5 SS |
1280 | zcomp_stream_put(zram->comp); |
1281 | } | |
beb6602c | 1282 | zs_unmap_object(zram->mem_pool, handle); |
86c49814 | 1283 | zram_slot_unlock(zram, index); |
a1dd52af | 1284 | |
8c921b2b | 1285 | /* Should NEVER happen. Return bio error if it does. */ |
1f7319c7 | 1286 | if (unlikely(ret)) |
8c921b2b | 1287 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
306b0c95 | 1288 | |
1f7319c7 | 1289 | return ret; |
306b0c95 NG |
1290 | } |
1291 | ||
37b51fdd | 1292 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
8e654f8f | 1293 | u32 index, int offset, struct bio *bio) |
924bd88d JM |
1294 | { |
1295 | int ret; | |
37b51fdd | 1296 | struct page *page; |
37b51fdd | 1297 | |
1f7319c7 MK |
1298 | page = bvec->bv_page; |
1299 | if (is_partial_io(bvec)) { | |
1300 | /* Use a temporary buffer to decompress the page */ | |
1301 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1302 | if (!page) | |
1303 | return -ENOMEM; | |
924bd88d JM |
1304 | } |
1305 | ||
8e654f8f | 1306 | ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); |
1f7319c7 MK |
1307 | if (unlikely(ret)) |
1308 | goto out; | |
7e5a5104 | 1309 | |
1f7319c7 MK |
1310 | if (is_partial_io(bvec)) { |
1311 | void *dst = kmap_atomic(bvec->bv_page); | |
1312 | void *src = kmap_atomic(page); | |
37b51fdd | 1313 | |
1f7319c7 MK |
1314 | memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); |
1315 | kunmap_atomic(src); | |
1316 | kunmap_atomic(dst); | |
37b51fdd | 1317 | } |
1f7319c7 | 1318 | out: |
37b51fdd | 1319 | if (is_partial_io(bvec)) |
1f7319c7 | 1320 | __free_page(page); |
37b51fdd | 1321 | |
37b51fdd | 1322 | return ret; |
924bd88d JM |
1323 | } |
1324 | ||
db8ffbd4 MK |
1325 | static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, |
1326 | u32 index, struct bio *bio) | |
306b0c95 | 1327 | { |
ae85a807 | 1328 | int ret = 0; |
1f7319c7 | 1329 | unsigned long alloced_pages; |
da9556a2 | 1330 | unsigned long handle = 0; |
97ec7c8b MK |
1331 | unsigned int comp_len = 0; |
1332 | void *src, *dst, *mem; | |
1333 | struct zcomp_strm *zstrm; | |
1334 | struct page *page = bvec->bv_page; | |
1335 | unsigned long element = 0; | |
1336 | enum zram_pageflags flags = 0; | |
1337 | ||
1338 | mem = kmap_atomic(page); | |
1339 | if (page_same_filled(mem, &element)) { | |
1340 | kunmap_atomic(mem); | |
1341 | /* Free memory associated with this sector now. */ | |
1342 | flags = ZRAM_SAME; | |
1343 | atomic64_inc(&zram->stats.same_pages); | |
1344 | goto out; | |
1345 | } | |
1346 | kunmap_atomic(mem); | |
924bd88d | 1347 | |
da9556a2 | 1348 | compress_again: |
97ec7c8b | 1349 | zstrm = zcomp_stream_get(zram->comp); |
1f7319c7 | 1350 | src = kmap_atomic(page); |
97ec7c8b | 1351 | ret = zcomp_compress(zstrm, src, &comp_len); |
1f7319c7 | 1352 | kunmap_atomic(src); |
306b0c95 | 1353 | |
b7ca232e | 1354 | if (unlikely(ret)) { |
97ec7c8b | 1355 | zcomp_stream_put(zram->comp); |
8c921b2b | 1356 | pr_err("Compression failed! err=%d\n", ret); |
97ec7c8b | 1357 | zs_free(zram->mem_pool, handle); |
1f7319c7 | 1358 | return ret; |
8c921b2b | 1359 | } |
da9556a2 | 1360 | |
a939888e | 1361 | if (comp_len >= huge_class_size) |
89e85bce | 1362 | comp_len = PAGE_SIZE; |
da9556a2 SS |
1363 | /* |
1364 | * handle allocation has 2 paths: | |
1365 | * a) fast path is executed with preemption disabled (for | |
1366 | * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, | |
1367 | * since we can't sleep; | |
1368 | * b) slow path enables preemption and attempts to allocate | |
1369 | * the page with __GFP_DIRECT_RECLAIM bit set. we have to | |
1370 | * put per-cpu compression stream and, thus, to re-do | |
1371 | * the compression once handle is allocated. | |
1372 | * | |
1373 | * if we have a 'non-null' handle here then we are coming | |
1374 | * from the slow path and handle has already been allocated. | |
1375 | */ | |
1376 | if (!handle) | |
beb6602c | 1377 | handle = zs_malloc(zram->mem_pool, comp_len, |
da9556a2 SS |
1378 | __GFP_KSWAPD_RECLAIM | |
1379 | __GFP_NOWARN | | |
9bc482d3 MK |
1380 | __GFP_HIGHMEM | |
1381 | __GFP_MOVABLE); | |
fd1a30de | 1382 | if (!handle) { |
2aea8493 | 1383 | zcomp_stream_put(zram->comp); |
623e47fc | 1384 | atomic64_inc(&zram->stats.writestall); |
beb6602c | 1385 | handle = zs_malloc(zram->mem_pool, comp_len, |
9bc482d3 MK |
1386 | GFP_NOIO | __GFP_HIGHMEM | |
1387 | __GFP_MOVABLE); | |
da9556a2 SS |
1388 | if (handle) |
1389 | goto compress_again; | |
1f7319c7 | 1390 | return -ENOMEM; |
8c921b2b | 1391 | } |
9ada9da9 | 1392 | |
beb6602c | 1393 | alloced_pages = zs_get_total_pages(zram->mem_pool); |
12372755 SS |
1394 | update_used_max(zram, alloced_pages); |
1395 | ||
461a8eee | 1396 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
97ec7c8b | 1397 | zcomp_stream_put(zram->comp); |
beb6602c | 1398 | zs_free(zram->mem_pool, handle); |
1f7319c7 MK |
1399 | return -ENOMEM; |
1400 | } | |
1401 | ||
beb6602c | 1402 | dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); |
1f7319c7 MK |
1403 | |
1404 | src = zstrm->buffer; | |
1405 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1406 | src = kmap_atomic(page); |
1f7319c7 MK |
1407 | memcpy(dst, src, comp_len); |
1408 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1409 | kunmap_atomic(src); |
306b0c95 | 1410 | |
2aea8493 | 1411 | zcomp_stream_put(zram->comp); |
beb6602c | 1412 | zs_unmap_object(zram->mem_pool, handle); |
4ebbe7f7 MK |
1413 | atomic64_add(comp_len, &zram->stats.compr_data_size); |
1414 | out: | |
f40ac2ae SS |
1415 | /* |
1416 | * Free memory associated with this sector | |
1417 | * before overwriting unused sectors. | |
1418 | */ | |
86c49814 | 1419 | zram_slot_lock(zram, index); |
f40ac2ae | 1420 | zram_free_page(zram, index); |
db8ffbd4 | 1421 | |
89e85bce MK |
1422 | if (comp_len == PAGE_SIZE) { |
1423 | zram_set_flag(zram, index, ZRAM_HUGE); | |
1424 | atomic64_inc(&zram->stats.huge_pages); | |
1425 | } | |
1426 | ||
db8ffbd4 MK |
1427 | if (flags) { |
1428 | zram_set_flag(zram, index, flags); | |
4ebbe7f7 | 1429 | zram_set_element(zram, index, element); |
db8ffbd4 | 1430 | } else { |
4ebbe7f7 MK |
1431 | zram_set_handle(zram, index, handle); |
1432 | zram_set_obj_size(zram, index, comp_len); | |
1433 | } | |
86c49814 | 1434 | zram_slot_unlock(zram, index); |
306b0c95 | 1435 | |
8c921b2b | 1436 | /* Update stats */ |
90a7806e | 1437 | atomic64_inc(&zram->stats.pages_stored); |
ae85a807 | 1438 | return ret; |
1f7319c7 MK |
1439 | } |
1440 | ||
1441 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, | |
db8ffbd4 | 1442 | u32 index, int offset, struct bio *bio) |
1f7319c7 MK |
1443 | { |
1444 | int ret; | |
1445 | struct page *page = NULL; | |
1446 | void *src; | |
1447 | struct bio_vec vec; | |
1448 | ||
1449 | vec = *bvec; | |
1450 | if (is_partial_io(bvec)) { | |
1451 | void *dst; | |
1452 | /* | |
1453 | * This is a partial IO. We need to read the full page | |
1454 | * before to write the changes. | |
1455 | */ | |
1456 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1457 | if (!page) | |
1458 | return -ENOMEM; | |
1459 | ||
8e654f8f | 1460 | ret = __zram_bvec_read(zram, page, index, bio, true); |
1f7319c7 MK |
1461 | if (ret) |
1462 | goto out; | |
1463 | ||
1464 | src = kmap_atomic(bvec->bv_page); | |
1465 | dst = kmap_atomic(page); | |
1466 | memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); | |
1467 | kunmap_atomic(dst); | |
1468 | kunmap_atomic(src); | |
1469 | ||
1470 | vec.bv_page = page; | |
1471 | vec.bv_len = PAGE_SIZE; | |
1472 | vec.bv_offset = 0; | |
1473 | } | |
1474 | ||
db8ffbd4 | 1475 | ret = __zram_bvec_write(zram, &vec, index, bio); |
924bd88d | 1476 | out: |
397c6066 | 1477 | if (is_partial_io(bvec)) |
1f7319c7 | 1478 | __free_page(page); |
924bd88d | 1479 | return ret; |
8c921b2b JM |
1480 | } |
1481 | ||
f4659d8e JK |
1482 | /* |
1483 | * zram_bio_discard - handler on discard request | |
1484 | * @index: physical block index in PAGE_SIZE units | |
1485 | * @offset: byte offset within physical block | |
1486 | */ | |
1487 | static void zram_bio_discard(struct zram *zram, u32 index, | |
1488 | int offset, struct bio *bio) | |
1489 | { | |
1490 | size_t n = bio->bi_iter.bi_size; | |
1491 | ||
1492 | /* | |
1493 | * zram manages data in physical block size units. Because logical block | |
1494 | * size isn't identical with physical block size on some arch, we | |
1495 | * could get a discard request pointing to a specific offset within a | |
1496 | * certain physical block. Although we can handle this request by | |
1497 | * reading that physiclal block and decompressing and partially zeroing | |
1498 | * and re-compressing and then re-storing it, this isn't reasonable | |
1499 | * because our intent with a discard request is to save memory. So | |
1500 | * skipping this logical block is appropriate here. | |
1501 | */ | |
1502 | if (offset) { | |
38515c73 | 1503 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
1504 | return; |
1505 | ||
38515c73 | 1506 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
1507 | index++; |
1508 | } | |
1509 | ||
1510 | while (n >= PAGE_SIZE) { | |
86c49814 | 1511 | zram_slot_lock(zram, index); |
f4659d8e | 1512 | zram_free_page(zram, index); |
86c49814 | 1513 | zram_slot_unlock(zram, index); |
015254da | 1514 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
1515 | index++; |
1516 | n -= PAGE_SIZE; | |
1517 | } | |
1518 | } | |
1519 | ||
ae85a807 MK |
1520 | /* |
1521 | * Returns errno if it has some problem. Otherwise return 0 or 1. | |
1522 | * Returns 0 if IO request was done synchronously | |
1523 | * Returns 1 if IO request was successfully submitted. | |
1524 | */ | |
522698d7 | 1525 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
3f289dcb | 1526 | int offset, unsigned int op, struct bio *bio) |
9b3bb7ab | 1527 | { |
522698d7 | 1528 | unsigned long start_time = jiffies; |
d62e26b3 | 1529 | struct request_queue *q = zram->disk->queue; |
9b3bb7ab | 1530 | int ret; |
9b3bb7ab | 1531 | |
ddcf35d3 | 1532 | generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT, |
522698d7 | 1533 | &zram->disk->part0); |
46a51c80 | 1534 | |
3f289dcb | 1535 | if (!op_is_write(op)) { |
522698d7 | 1536 | atomic64_inc(&zram->stats.num_reads); |
8e654f8f | 1537 | ret = zram_bvec_read(zram, bvec, index, offset, bio); |
1f7319c7 | 1538 | flush_dcache_page(bvec->bv_page); |
522698d7 SS |
1539 | } else { |
1540 | atomic64_inc(&zram->stats.num_writes); | |
db8ffbd4 | 1541 | ret = zram_bvec_write(zram, bvec, index, offset, bio); |
1b672224 | 1542 | } |
9b3bb7ab | 1543 | |
ddcf35d3 | 1544 | generic_end_io_acct(q, op, &zram->disk->part0, start_time); |
9b3bb7ab | 1545 | |
d7eac6b6 MK |
1546 | zram_slot_lock(zram, index); |
1547 | zram_accessed(zram, index); | |
1548 | zram_slot_unlock(zram, index); | |
1549 | ||
ae85a807 | 1550 | if (unlikely(ret < 0)) { |
3f289dcb | 1551 | if (!op_is_write(op)) |
522698d7 SS |
1552 | atomic64_inc(&zram->stats.failed_reads); |
1553 | else | |
1554 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 1555 | } |
9b3bb7ab | 1556 | |
1b672224 | 1557 | return ret; |
8c921b2b JM |
1558 | } |
1559 | ||
be257c61 | 1560 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 1561 | { |
abf54548 | 1562 | int offset; |
8c921b2b | 1563 | u32 index; |
7988613b KO |
1564 | struct bio_vec bvec; |
1565 | struct bvec_iter iter; | |
8c921b2b | 1566 | |
4f024f37 KO |
1567 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
1568 | offset = (bio->bi_iter.bi_sector & | |
1569 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 1570 | |
31edeacd CH |
1571 | switch (bio_op(bio)) { |
1572 | case REQ_OP_DISCARD: | |
1573 | case REQ_OP_WRITE_ZEROES: | |
f4659d8e | 1574 | zram_bio_discard(zram, index, offset, bio); |
4246a0b6 | 1575 | bio_endio(bio); |
f4659d8e | 1576 | return; |
31edeacd CH |
1577 | default: |
1578 | break; | |
f4659d8e JK |
1579 | } |
1580 | ||
7988613b | 1581 | bio_for_each_segment(bvec, bio, iter) { |
e86942c7 MK |
1582 | struct bio_vec bv = bvec; |
1583 | unsigned int unwritten = bvec.bv_len; | |
924bd88d | 1584 | |
e86942c7 MK |
1585 | do { |
1586 | bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, | |
1587 | unwritten); | |
abf54548 | 1588 | if (zram_bvec_rw(zram, &bv, index, offset, |
3f289dcb | 1589 | bio_op(bio), bio) < 0) |
924bd88d JM |
1590 | goto out; |
1591 | ||
e86942c7 MK |
1592 | bv.bv_offset += bv.bv_len; |
1593 | unwritten -= bv.bv_len; | |
924bd88d | 1594 | |
e86942c7 MK |
1595 | update_position(&index, &offset, &bv); |
1596 | } while (unwritten); | |
a1dd52af | 1597 | } |
306b0c95 | 1598 | |
4246a0b6 | 1599 | bio_endio(bio); |
7d7854b4 | 1600 | return; |
306b0c95 NG |
1601 | |
1602 | out: | |
306b0c95 | 1603 | bio_io_error(bio); |
306b0c95 NG |
1604 | } |
1605 | ||
306b0c95 | 1606 | /* |
f1e3cfff | 1607 | * Handler function for all zram I/O requests. |
306b0c95 | 1608 | */ |
dece1635 | 1609 | static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) |
306b0c95 | 1610 | { |
f1e3cfff | 1611 | struct zram *zram = queue->queuedata; |
306b0c95 | 1612 | |
54850e73 | 1613 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
1614 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 1615 | atomic64_inc(&zram->stats.invalid_io); |
a09759ac | 1616 | goto error; |
6642a67c JM |
1617 | } |
1618 | ||
be257c61 | 1619 | __zram_make_request(zram, bio); |
dece1635 | 1620 | return BLK_QC_T_NONE; |
a09759ac | 1621 | |
0900beae JM |
1622 | error: |
1623 | bio_io_error(bio); | |
dece1635 | 1624 | return BLK_QC_T_NONE; |
306b0c95 NG |
1625 | } |
1626 | ||
2ccbec05 NG |
1627 | static void zram_slot_free_notify(struct block_device *bdev, |
1628 | unsigned long index) | |
107c161b | 1629 | { |
f1e3cfff | 1630 | struct zram *zram; |
107c161b | 1631 | |
f1e3cfff | 1632 | zram = bdev->bd_disk->private_data; |
a0c516cb | 1633 | |
3c9959e0 MK |
1634 | atomic64_inc(&zram->stats.notify_free); |
1635 | if (!zram_slot_trylock(zram, index)) { | |
1636 | atomic64_inc(&zram->stats.miss_free); | |
1637 | return; | |
1638 | } | |
1639 | ||
f614a9f4 | 1640 | zram_free_page(zram, index); |
86c49814 | 1641 | zram_slot_unlock(zram, index); |
107c161b NG |
1642 | } |
1643 | ||
8c7f0102 | 1644 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
3f289dcb | 1645 | struct page *page, unsigned int op) |
8c7f0102 | 1646 | { |
ae85a807 | 1647 | int offset, ret; |
8c7f0102 | 1648 | u32 index; |
1649 | struct zram *zram; | |
1650 | struct bio_vec bv; | |
1651 | ||
98cc093c HY |
1652 | if (PageTransHuge(page)) |
1653 | return -ENOTSUPP; | |
8c7f0102 | 1654 | zram = bdev->bd_disk->private_data; |
08eee69f | 1655 | |
8c7f0102 | 1656 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
1657 | atomic64_inc(&zram->stats.invalid_io); | |
ae85a807 | 1658 | ret = -EINVAL; |
a09759ac | 1659 | goto out; |
8c7f0102 | 1660 | } |
1661 | ||
1662 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
4ca82dab | 1663 | offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; |
8c7f0102 | 1664 | |
1665 | bv.bv_page = page; | |
1666 | bv.bv_len = PAGE_SIZE; | |
1667 | bv.bv_offset = 0; | |
1668 | ||
3f289dcb | 1669 | ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); |
08eee69f | 1670 | out: |
8c7f0102 | 1671 | /* |
1672 | * If I/O fails, just return error(ie, non-zero) without | |
1673 | * calling page_endio. | |
1674 | * It causes resubmit the I/O with bio request by upper functions | |
1675 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
1676 | * bio->bi_end_io does things to handle the error | |
1677 | * (e.g., SetPageError, set_page_dirty and extra works). | |
1678 | */ | |
ae85a807 MK |
1679 | if (unlikely(ret < 0)) |
1680 | return ret; | |
1681 | ||
1682 | switch (ret) { | |
1683 | case 0: | |
3f289dcb | 1684 | page_endio(page, op_is_write(op), 0); |
ae85a807 MK |
1685 | break; |
1686 | case 1: | |
1687 | ret = 0; | |
1688 | break; | |
1689 | default: | |
1690 | WARN_ON(1); | |
1691 | } | |
1692 | return ret; | |
8c7f0102 | 1693 | } |
1694 | ||
522698d7 SS |
1695 | static void zram_reset_device(struct zram *zram) |
1696 | { | |
522698d7 SS |
1697 | struct zcomp *comp; |
1698 | u64 disksize; | |
306b0c95 | 1699 | |
522698d7 | 1700 | down_write(&zram->init_lock); |
9b3bb7ab | 1701 | |
522698d7 SS |
1702 | zram->limit_pages = 0; |
1703 | ||
1704 | if (!init_done(zram)) { | |
1705 | up_write(&zram->init_lock); | |
1706 | return; | |
1707 | } | |
1708 | ||
522698d7 SS |
1709 | comp = zram->comp; |
1710 | disksize = zram->disksize; | |
522698d7 | 1711 | zram->disksize = 0; |
522698d7 SS |
1712 | |
1713 | set_capacity(zram->disk, 0); | |
1714 | part_stat_set_all(&zram->disk->part0, 0); | |
1715 | ||
1716 | up_write(&zram->init_lock); | |
1717 | /* I/O operation under all of CPU are done so let's free */ | |
beb6602c | 1718 | zram_meta_free(zram, disksize); |
302128dc | 1719 | memset(&zram->stats, 0, sizeof(zram->stats)); |
522698d7 | 1720 | zcomp_destroy(comp); |
013bf95a | 1721 | reset_bdev(zram); |
522698d7 SS |
1722 | } |
1723 | ||
1724 | static ssize_t disksize_store(struct device *dev, | |
1725 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 1726 | { |
522698d7 SS |
1727 | u64 disksize; |
1728 | struct zcomp *comp; | |
2f6a3bed | 1729 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 1730 | int err; |
2f6a3bed | 1731 | |
522698d7 SS |
1732 | disksize = memparse(buf, NULL); |
1733 | if (!disksize) | |
1734 | return -EINVAL; | |
2f6a3bed | 1735 | |
beb6602c MK |
1736 | down_write(&zram->init_lock); |
1737 | if (init_done(zram)) { | |
1738 | pr_info("Cannot change disksize for initialized device\n"); | |
1739 | err = -EBUSY; | |
1740 | goto out_unlock; | |
1741 | } | |
1742 | ||
522698d7 | 1743 | disksize = PAGE_ALIGN(disksize); |
beb6602c MK |
1744 | if (!zram_meta_alloc(zram, disksize)) { |
1745 | err = -ENOMEM; | |
1746 | goto out_unlock; | |
1747 | } | |
522698d7 | 1748 | |
da9556a2 | 1749 | comp = zcomp_create(zram->compressor); |
522698d7 | 1750 | if (IS_ERR(comp)) { |
70864969 | 1751 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1752 | zram->compressor); |
1753 | err = PTR_ERR(comp); | |
1754 | goto out_free_meta; | |
1755 | } | |
1756 | ||
522698d7 SS |
1757 | zram->comp = comp; |
1758 | zram->disksize = disksize; | |
1759 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | |
e447a015 MK |
1760 | |
1761 | revalidate_disk(zram->disk); | |
e7ccfc4c | 1762 | up_write(&zram->init_lock); |
522698d7 SS |
1763 | |
1764 | return len; | |
1765 | ||
522698d7 | 1766 | out_free_meta: |
beb6602c MK |
1767 | zram_meta_free(zram, disksize); |
1768 | out_unlock: | |
1769 | up_write(&zram->init_lock); | |
522698d7 | 1770 | return err; |
2f6a3bed SS |
1771 | } |
1772 | ||
522698d7 SS |
1773 | static ssize_t reset_store(struct device *dev, |
1774 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1775 | { |
522698d7 SS |
1776 | int ret; |
1777 | unsigned short do_reset; | |
1778 | struct zram *zram; | |
1779 | struct block_device *bdev; | |
4f2109f6 | 1780 | |
f405c445 SS |
1781 | ret = kstrtou16(buf, 10, &do_reset); |
1782 | if (ret) | |
1783 | return ret; | |
1784 | ||
1785 | if (!do_reset) | |
1786 | return -EINVAL; | |
1787 | ||
522698d7 SS |
1788 | zram = dev_to_zram(dev); |
1789 | bdev = bdget_disk(zram->disk, 0); | |
522698d7 SS |
1790 | if (!bdev) |
1791 | return -ENOMEM; | |
4f2109f6 | 1792 | |
522698d7 | 1793 | mutex_lock(&bdev->bd_mutex); |
f405c445 SS |
1794 | /* Do not reset an active device or claimed device */ |
1795 | if (bdev->bd_openers || zram->claim) { | |
1796 | mutex_unlock(&bdev->bd_mutex); | |
1797 | bdput(bdev); | |
1798 | return -EBUSY; | |
522698d7 SS |
1799 | } |
1800 | ||
f405c445 SS |
1801 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1802 | zram->claim = true; | |
1803 | mutex_unlock(&bdev->bd_mutex); | |
522698d7 | 1804 | |
f405c445 | 1805 | /* Make sure all the pending I/O are finished */ |
522698d7 SS |
1806 | fsync_bdev(bdev); |
1807 | zram_reset_device(zram); | |
e447a015 | 1808 | revalidate_disk(zram->disk); |
522698d7 SS |
1809 | bdput(bdev); |
1810 | ||
f405c445 SS |
1811 | mutex_lock(&bdev->bd_mutex); |
1812 | zram->claim = false; | |
1813 | mutex_unlock(&bdev->bd_mutex); | |
1814 | ||
522698d7 | 1815 | return len; |
f405c445 SS |
1816 | } |
1817 | ||
1818 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1819 | { | |
1820 | int ret = 0; | |
1821 | struct zram *zram; | |
1822 | ||
1823 | WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); | |
1824 | ||
1825 | zram = bdev->bd_disk->private_data; | |
1826 | /* zram was claimed to reset so open request fails */ | |
1827 | if (zram->claim) | |
1828 | ret = -EBUSY; | |
4f2109f6 SS |
1829 | |
1830 | return ret; | |
1831 | } | |
1832 | ||
522698d7 | 1833 | static const struct block_device_operations zram_devops = { |
f405c445 | 1834 | .open = zram_open, |
522698d7 SS |
1835 | .swap_slot_free_notify = zram_slot_free_notify, |
1836 | .rw_page = zram_rw_page, | |
1837 | .owner = THIS_MODULE | |
1838 | }; | |
1839 | ||
1840 | static DEVICE_ATTR_WO(compact); | |
1841 | static DEVICE_ATTR_RW(disksize); | |
1842 | static DEVICE_ATTR_RO(initstate); | |
1843 | static DEVICE_ATTR_WO(reset); | |
c87d1655 SS |
1844 | static DEVICE_ATTR_WO(mem_limit); |
1845 | static DEVICE_ATTR_WO(mem_used_max); | |
e82592c4 | 1846 | static DEVICE_ATTR_WO(idle); |
522698d7 SS |
1847 | static DEVICE_ATTR_RW(max_comp_streams); |
1848 | static DEVICE_ATTR_RW(comp_algorithm); | |
013bf95a MK |
1849 | #ifdef CONFIG_ZRAM_WRITEBACK |
1850 | static DEVICE_ATTR_RW(backing_dev); | |
a939888e | 1851 | static DEVICE_ATTR_WO(writeback); |
bb416d18 | 1852 | static DEVICE_ATTR_RW(writeback_limit); |
1d69a3f8 | 1853 | static DEVICE_ATTR_RW(writeback_limit_enable); |
013bf95a | 1854 | #endif |
a68eb3b6 | 1855 | |
9b3bb7ab SS |
1856 | static struct attribute *zram_disk_attrs[] = { |
1857 | &dev_attr_disksize.attr, | |
1858 | &dev_attr_initstate.attr, | |
1859 | &dev_attr_reset.attr, | |
99ebbd30 | 1860 | &dev_attr_compact.attr, |
9ada9da9 | 1861 | &dev_attr_mem_limit.attr, |
461a8eee | 1862 | &dev_attr_mem_used_max.attr, |
e82592c4 | 1863 | &dev_attr_idle.attr, |
beca3ec7 | 1864 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1865 | &dev_attr_comp_algorithm.attr, |
013bf95a MK |
1866 | #ifdef CONFIG_ZRAM_WRITEBACK |
1867 | &dev_attr_backing_dev.attr, | |
a939888e | 1868 | &dev_attr_writeback.attr, |
bb416d18 | 1869 | &dev_attr_writeback_limit.attr, |
1d69a3f8 | 1870 | &dev_attr_writeback_limit_enable.attr, |
013bf95a | 1871 | #endif |
2f6a3bed | 1872 | &dev_attr_io_stat.attr, |
4f2109f6 | 1873 | &dev_attr_mm_stat.attr, |
23eddf39 MK |
1874 | #ifdef CONFIG_ZRAM_WRITEBACK |
1875 | &dev_attr_bd_stat.attr, | |
1876 | #endif | |
623e47fc | 1877 | &dev_attr_debug_stat.attr, |
9b3bb7ab SS |
1878 | NULL, |
1879 | }; | |
1880 | ||
bc1bb362 | 1881 | static const struct attribute_group zram_disk_attr_group = { |
9b3bb7ab SS |
1882 | .attrs = zram_disk_attrs, |
1883 | }; | |
1884 | ||
98af4d4d HR |
1885 | static const struct attribute_group *zram_disk_attr_groups[] = { |
1886 | &zram_disk_attr_group, | |
1887 | NULL, | |
1888 | }; | |
1889 | ||
92ff1528 SS |
1890 | /* |
1891 | * Allocate and initialize new zram device. the function returns | |
1892 | * '>= 0' device_id upon success, and negative value otherwise. | |
1893 | */ | |
1894 | static int zram_add(void) | |
306b0c95 | 1895 | { |
85508ec6 | 1896 | struct zram *zram; |
ee980160 | 1897 | struct request_queue *queue; |
92ff1528 | 1898 | int ret, device_id; |
85508ec6 SS |
1899 | |
1900 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1901 | if (!zram) | |
1902 | return -ENOMEM; | |
1903 | ||
92ff1528 | 1904 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1905 | if (ret < 0) |
1906 | goto out_free_dev; | |
92ff1528 | 1907 | device_id = ret; |
de1a21a0 | 1908 | |
0900beae | 1909 | init_rwsem(&zram->init_lock); |
1d69a3f8 MK |
1910 | #ifdef CONFIG_ZRAM_WRITEBACK |
1911 | spin_lock_init(&zram->wb_limit_lock); | |
1912 | #endif | |
ee980160 SS |
1913 | queue = blk_alloc_queue(GFP_KERNEL); |
1914 | if (!queue) { | |
306b0c95 NG |
1915 | pr_err("Error allocating disk queue for device %d\n", |
1916 | device_id); | |
85508ec6 SS |
1917 | ret = -ENOMEM; |
1918 | goto out_free_idr; | |
306b0c95 NG |
1919 | } |
1920 | ||
ee980160 | 1921 | blk_queue_make_request(queue, zram_make_request); |
306b0c95 | 1922 | |
85508ec6 | 1923 | /* gendisk structure */ |
f1e3cfff NG |
1924 | zram->disk = alloc_disk(1); |
1925 | if (!zram->disk) { | |
70864969 | 1926 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1927 | device_id); |
201c7b72 | 1928 | ret = -ENOMEM; |
39a9b8ac | 1929 | goto out_free_queue; |
306b0c95 NG |
1930 | } |
1931 | ||
f1e3cfff NG |
1932 | zram->disk->major = zram_major; |
1933 | zram->disk->first_minor = device_id; | |
1934 | zram->disk->fops = &zram_devops; | |
ee980160 SS |
1935 | zram->disk->queue = queue; |
1936 | zram->disk->queue->queuedata = zram; | |
f1e3cfff NG |
1937 | zram->disk->private_data = zram; |
1938 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1939 | |
33863c21 | 1940 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1941 | set_capacity(zram->disk, 0); |
b67d1ec1 | 1942 | /* zram devices sort of resembles non-rotational disks */ |
8b904b5b BVA |
1943 | blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); |
1944 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); | |
e447a015 | 1945 | |
a1dd52af NG |
1946 | /* |
1947 | * To ensure that we always get PAGE_SIZE aligned | |
1948 | * and n*PAGE_SIZED sized I/O requests. | |
1949 | */ | |
f1e3cfff | 1950 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1951 | blk_queue_logical_block_size(zram->disk->queue, |
1952 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1953 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1954 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1955 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1956 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
8b904b5b | 1957 | blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); |
31edeacd | 1958 | |
f4659d8e JK |
1959 | /* |
1960 | * zram_bio_discard() will clear all logical blocks if logical block | |
1961 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1962 | * different, we will skip discarding some parts of logical blocks in | |
1963 | * the part of the request range which isn't aligned to physical block | |
1964 | * size. So we can't ensure that all discarded logical blocks are | |
1965 | * zeroed. | |
1966 | */ | |
1967 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
31edeacd | 1968 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); |
5d83d5a0 | 1969 | |
e447a015 | 1970 | zram->disk->queue->backing_dev_info->capabilities |= |
23c47d2a | 1971 | (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO); |
98af4d4d HR |
1972 | device_add_disk(NULL, zram->disk, zram_disk_attr_groups); |
1973 | ||
e46b8a03 | 1974 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
d12b63c9 | 1975 | |
c0265342 | 1976 | zram_debugfs_register(zram); |
d12b63c9 | 1977 | pr_info("Added device: %s\n", zram->disk->disk_name); |
92ff1528 | 1978 | return device_id; |
de1a21a0 | 1979 | |
39a9b8ac | 1980 | out_free_queue: |
ee980160 | 1981 | blk_cleanup_queue(queue); |
85508ec6 SS |
1982 | out_free_idr: |
1983 | idr_remove(&zram_index_idr, device_id); | |
1984 | out_free_dev: | |
1985 | kfree(zram); | |
de1a21a0 | 1986 | return ret; |
306b0c95 NG |
1987 | } |
1988 | ||
6566d1a3 | 1989 | static int zram_remove(struct zram *zram) |
306b0c95 | 1990 | { |
6566d1a3 SS |
1991 | struct block_device *bdev; |
1992 | ||
1993 | bdev = bdget_disk(zram->disk, 0); | |
1994 | if (!bdev) | |
1995 | return -ENOMEM; | |
1996 | ||
1997 | mutex_lock(&bdev->bd_mutex); | |
1998 | if (bdev->bd_openers || zram->claim) { | |
1999 | mutex_unlock(&bdev->bd_mutex); | |
2000 | bdput(bdev); | |
2001 | return -EBUSY; | |
2002 | } | |
2003 | ||
2004 | zram->claim = true; | |
2005 | mutex_unlock(&bdev->bd_mutex); | |
2006 | ||
c0265342 | 2007 | zram_debugfs_unregister(zram); |
306b0c95 | 2008 | |
6566d1a3 SS |
2009 | /* Make sure all the pending I/O are finished */ |
2010 | fsync_bdev(bdev); | |
85508ec6 | 2011 | zram_reset_device(zram); |
6566d1a3 SS |
2012 | bdput(bdev); |
2013 | ||
2014 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
2015 | ||
85508ec6 | 2016 | del_gendisk(zram->disk); |
392db380 | 2017 | blk_cleanup_queue(zram->disk->queue); |
85508ec6 SS |
2018 | put_disk(zram->disk); |
2019 | kfree(zram); | |
6566d1a3 SS |
2020 | return 0; |
2021 | } | |
2022 | ||
2023 | /* zram-control sysfs attributes */ | |
27104a53 GKH |
2024 | |
2025 | /* | |
2026 | * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a | |
2027 | * sense that reading from this file does alter the state of your system -- it | |
2028 | * creates a new un-initialized zram device and returns back this device's | |
2029 | * device_id (or an error code if it fails to create a new device). | |
2030 | */ | |
6566d1a3 SS |
2031 | static ssize_t hot_add_show(struct class *class, |
2032 | struct class_attribute *attr, | |
2033 | char *buf) | |
2034 | { | |
2035 | int ret; | |
2036 | ||
2037 | mutex_lock(&zram_index_mutex); | |
2038 | ret = zram_add(); | |
2039 | mutex_unlock(&zram_index_mutex); | |
2040 | ||
2041 | if (ret < 0) | |
2042 | return ret; | |
2043 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
2044 | } | |
f40609d1 | 2045 | static CLASS_ATTR_RO(hot_add); |
6566d1a3 SS |
2046 | |
2047 | static ssize_t hot_remove_store(struct class *class, | |
2048 | struct class_attribute *attr, | |
2049 | const char *buf, | |
2050 | size_t count) | |
2051 | { | |
2052 | struct zram *zram; | |
2053 | int ret, dev_id; | |
2054 | ||
2055 | /* dev_id is gendisk->first_minor, which is `int' */ | |
2056 | ret = kstrtoint(buf, 10, &dev_id); | |
2057 | if (ret) | |
2058 | return ret; | |
2059 | if (dev_id < 0) | |
2060 | return -EINVAL; | |
2061 | ||
2062 | mutex_lock(&zram_index_mutex); | |
2063 | ||
2064 | zram = idr_find(&zram_index_idr, dev_id); | |
17ec4cd9 | 2065 | if (zram) { |
6566d1a3 | 2066 | ret = zram_remove(zram); |
529e71e1 TI |
2067 | if (!ret) |
2068 | idr_remove(&zram_index_idr, dev_id); | |
17ec4cd9 | 2069 | } else { |
6566d1a3 | 2070 | ret = -ENODEV; |
17ec4cd9 | 2071 | } |
6566d1a3 SS |
2072 | |
2073 | mutex_unlock(&zram_index_mutex); | |
2074 | return ret ? ret : count; | |
85508ec6 | 2075 | } |
27104a53 | 2076 | static CLASS_ATTR_WO(hot_remove); |
a096cafc | 2077 | |
27104a53 GKH |
2078 | static struct attribute *zram_control_class_attrs[] = { |
2079 | &class_attr_hot_add.attr, | |
2080 | &class_attr_hot_remove.attr, | |
2081 | NULL, | |
6566d1a3 | 2082 | }; |
27104a53 | 2083 | ATTRIBUTE_GROUPS(zram_control_class); |
6566d1a3 SS |
2084 | |
2085 | static struct class zram_control_class = { | |
2086 | .name = "zram-control", | |
2087 | .owner = THIS_MODULE, | |
27104a53 | 2088 | .class_groups = zram_control_class_groups, |
6566d1a3 SS |
2089 | }; |
2090 | ||
85508ec6 SS |
2091 | static int zram_remove_cb(int id, void *ptr, void *data) |
2092 | { | |
2093 | zram_remove(ptr); | |
2094 | return 0; | |
2095 | } | |
a096cafc | 2096 | |
85508ec6 SS |
2097 | static void destroy_devices(void) |
2098 | { | |
6566d1a3 | 2099 | class_unregister(&zram_control_class); |
85508ec6 | 2100 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
c0265342 | 2101 | zram_debugfs_destroy(); |
85508ec6 | 2102 | idr_destroy(&zram_index_idr); |
a096cafc | 2103 | unregister_blkdev(zram_major, "zram"); |
1dd6c834 | 2104 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
306b0c95 NG |
2105 | } |
2106 | ||
f1e3cfff | 2107 | static int __init zram_init(void) |
306b0c95 | 2108 | { |
92ff1528 | 2109 | int ret; |
306b0c95 | 2110 | |
1dd6c834 AMG |
2111 | ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", |
2112 | zcomp_cpu_up_prepare, zcomp_cpu_dead); | |
2113 | if (ret < 0) | |
2114 | return ret; | |
2115 | ||
6566d1a3 SS |
2116 | ret = class_register(&zram_control_class); |
2117 | if (ret) { | |
70864969 | 2118 | pr_err("Unable to register zram-control class\n"); |
1dd6c834 | 2119 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
6566d1a3 SS |
2120 | return ret; |
2121 | } | |
2122 | ||
c0265342 | 2123 | zram_debugfs_create(); |
f1e3cfff NG |
2124 | zram_major = register_blkdev(0, "zram"); |
2125 | if (zram_major <= 0) { | |
70864969 | 2126 | pr_err("Unable to get major number\n"); |
6566d1a3 | 2127 | class_unregister(&zram_control_class); |
1dd6c834 | 2128 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
a096cafc | 2129 | return -EBUSY; |
306b0c95 NG |
2130 | } |
2131 | ||
92ff1528 | 2132 | while (num_devices != 0) { |
6566d1a3 | 2133 | mutex_lock(&zram_index_mutex); |
92ff1528 | 2134 | ret = zram_add(); |
6566d1a3 | 2135 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 2136 | if (ret < 0) |
a096cafc | 2137 | goto out_error; |
92ff1528 | 2138 | num_devices--; |
de1a21a0 NG |
2139 | } |
2140 | ||
306b0c95 | 2141 | return 0; |
de1a21a0 | 2142 | |
a096cafc | 2143 | out_error: |
85508ec6 | 2144 | destroy_devices(); |
306b0c95 NG |
2145 | return ret; |
2146 | } | |
2147 | ||
f1e3cfff | 2148 | static void __exit zram_exit(void) |
306b0c95 | 2149 | { |
85508ec6 | 2150 | destroy_devices(); |
306b0c95 NG |
2151 | } |
2152 | ||
f1e3cfff NG |
2153 | module_init(zram_init); |
2154 | module_exit(zram_exit); | |
306b0c95 | 2155 | |
9b3bb7ab | 2156 | module_param(num_devices, uint, 0); |
c3cdb40e | 2157 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 2158 | |
306b0c95 NG |
2159 | MODULE_LICENSE("Dual BSD/GPL"); |
2160 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
f1e3cfff | 2161 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |