Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
306b0c95 | 25 | #include <linux/highmem.h> |
5a0e3ad6 | 26 | #include <linux/slab.h> |
b09ab054 | 27 | #include <linux/backing-dev.h> |
306b0c95 | 28 | #include <linux/string.h> |
306b0c95 | 29 | #include <linux/vmalloc.h> |
fcfa8d95 | 30 | #include <linux/err.h> |
85508ec6 | 31 | #include <linux/idr.h> |
6566d1a3 | 32 | #include <linux/sysfs.h> |
c0265342 | 33 | #include <linux/debugfs.h> |
1dd6c834 | 34 | #include <linux/cpuhotplug.h> |
c6a564ff | 35 | #include <linux/part_stat.h> |
306b0c95 | 36 | |
16a4bfb9 | 37 | #include "zram_drv.h" |
306b0c95 | 38 | |
85508ec6 | 39 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
40 | /* idr index must be protected */ |
41 | static DEFINE_MUTEX(zram_index_mutex); | |
42 | ||
f1e3cfff | 43 | static int zram_major; |
3d711a38 | 44 | static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; |
306b0c95 | 45 | |
306b0c95 | 46 | /* Module params (documentation at end) */ |
ca3d70bd | 47 | static unsigned int num_devices = 1; |
60f5921a SS |
48 | /* |
49 | * Pages that compress to sizes equals or greater than this are stored | |
50 | * uncompressed in memory. | |
51 | */ | |
52 | static size_t huge_class_size; | |
33863c21 | 53 | |
a8b456d0 | 54 | static const struct block_device_operations zram_devops; |
13c1c74a | 55 | #ifdef CONFIG_ZRAM_WRITEBACK |
a8b456d0 | 56 | static const struct block_device_operations zram_wb_devops; |
13c1c74a | 57 | #endif |
a8b456d0 | 58 | |
1f7319c7 | 59 | static void zram_free_page(struct zram *zram, size_t index); |
a939888e MK |
60 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
61 | u32 index, int offset, struct bio *bio); | |
62 | ||
1f7319c7 | 63 | |
3c9959e0 MK |
64 | static int zram_slot_trylock(struct zram *zram, u32 index) |
65 | { | |
7e529283 | 66 | return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); |
3c9959e0 MK |
67 | } |
68 | ||
c4d6c4cc MK |
69 | static void zram_slot_lock(struct zram *zram, u32 index) |
70 | { | |
7e529283 | 71 | bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); |
c4d6c4cc MK |
72 | } |
73 | ||
74 | static void zram_slot_unlock(struct zram *zram, u32 index) | |
75 | { | |
7e529283 | 76 | bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); |
c4d6c4cc MK |
77 | } |
78 | ||
08eee69f | 79 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 80 | { |
08eee69f | 81 | return zram->disksize; |
be2d1d56 SS |
82 | } |
83 | ||
9b3bb7ab SS |
84 | static inline struct zram *dev_to_zram(struct device *dev) |
85 | { | |
86 | return (struct zram *)dev_to_disk(dev)->private_data; | |
87 | } | |
88 | ||
643ae61d MK |
89 | static unsigned long zram_get_handle(struct zram *zram, u32 index) |
90 | { | |
91 | return zram->table[index].handle; | |
92 | } | |
93 | ||
94 | static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) | |
95 | { | |
96 | zram->table[index].handle = handle; | |
97 | } | |
98 | ||
b31177f2 | 99 | /* flag operations require table entry bit_spin_lock() being held */ |
c0265342 | 100 | static bool zram_test_flag(struct zram *zram, u32 index, |
522698d7 | 101 | enum zram_pageflags flag) |
99ebbd30 | 102 | { |
7e529283 | 103 | return zram->table[index].flags & BIT(flag); |
522698d7 | 104 | } |
99ebbd30 | 105 | |
beb6602c | 106 | static void zram_set_flag(struct zram *zram, u32 index, |
522698d7 SS |
107 | enum zram_pageflags flag) |
108 | { | |
7e529283 | 109 | zram->table[index].flags |= BIT(flag); |
522698d7 | 110 | } |
99ebbd30 | 111 | |
beb6602c | 112 | static void zram_clear_flag(struct zram *zram, u32 index, |
522698d7 SS |
113 | enum zram_pageflags flag) |
114 | { | |
7e529283 | 115 | zram->table[index].flags &= ~BIT(flag); |
522698d7 | 116 | } |
99ebbd30 | 117 | |
beb6602c | 118 | static inline void zram_set_element(struct zram *zram, u32 index, |
8e19d540 | 119 | unsigned long element) |
120 | { | |
beb6602c | 121 | zram->table[index].element = element; |
8e19d540 | 122 | } |
123 | ||
643ae61d | 124 | static unsigned long zram_get_element(struct zram *zram, u32 index) |
8e19d540 | 125 | { |
643ae61d | 126 | return zram->table[index].element; |
8e19d540 | 127 | } |
128 | ||
beb6602c | 129 | static size_t zram_get_obj_size(struct zram *zram, u32 index) |
522698d7 | 130 | { |
7e529283 | 131 | return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); |
99ebbd30 AM |
132 | } |
133 | ||
beb6602c | 134 | static void zram_set_obj_size(struct zram *zram, |
522698d7 | 135 | u32 index, size_t size) |
9b3bb7ab | 136 | { |
7e529283 | 137 | unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 138 | |
7e529283 | 139 | zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; |
522698d7 SS |
140 | } |
141 | ||
a939888e MK |
142 | static inline bool zram_allocated(struct zram *zram, u32 index) |
143 | { | |
144 | return zram_get_obj_size(zram, index) || | |
145 | zram_test_flag(zram, index, ZRAM_SAME) || | |
146 | zram_test_flag(zram, index, ZRAM_WB); | |
147 | } | |
148 | ||
1f7319c7 | 149 | #if PAGE_SIZE != 4096 |
1c53e0d2 | 150 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
151 | { |
152 | return bvec->bv_len != PAGE_SIZE; | |
153 | } | |
1f7319c7 MK |
154 | #else |
155 | static inline bool is_partial_io(struct bio_vec *bvec) | |
156 | { | |
157 | return false; | |
158 | } | |
159 | #endif | |
522698d7 SS |
160 | |
161 | /* | |
162 | * Check if request is within bounds and aligned on zram logical blocks. | |
163 | */ | |
1c53e0d2 | 164 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
165 | sector_t start, unsigned int size) |
166 | { | |
167 | u64 end, bound; | |
168 | ||
169 | /* unaligned request */ | |
170 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 171 | return false; |
522698d7 | 172 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 173 | return false; |
522698d7 SS |
174 | |
175 | end = start + (size >> SECTOR_SHIFT); | |
176 | bound = zram->disksize >> SECTOR_SHIFT; | |
177 | /* out of range range */ | |
178 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 179 | return false; |
522698d7 SS |
180 | |
181 | /* I/O request is valid */ | |
1c53e0d2 | 182 | return true; |
522698d7 SS |
183 | } |
184 | ||
185 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
186 | { | |
e86942c7 | 187 | *index += (*offset + bvec->bv_len) / PAGE_SIZE; |
522698d7 SS |
188 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; |
189 | } | |
190 | ||
191 | static inline void update_used_max(struct zram *zram, | |
192 | const unsigned long pages) | |
193 | { | |
194 | unsigned long old_max, cur_max; | |
195 | ||
196 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
197 | ||
198 | do { | |
199 | cur_max = old_max; | |
200 | if (pages > cur_max) | |
201 | old_max = atomic_long_cmpxchg( | |
202 | &zram->stats.max_used_pages, cur_max, pages); | |
203 | } while (old_max != cur_max); | |
204 | } | |
205 | ||
48ad1abe | 206 | static inline void zram_fill_page(void *ptr, unsigned long len, |
8e19d540 | 207 | unsigned long value) |
208 | { | |
8e19d540 | 209 | WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); |
48ad1abe | 210 | memset_l(ptr, value, len / sizeof(unsigned long)); |
8e19d540 | 211 | } |
212 | ||
213 | static bool page_same_filled(void *ptr, unsigned long *element) | |
522698d7 | 214 | { |
522698d7 | 215 | unsigned long *page; |
f0fe9984 | 216 | unsigned long val; |
90f82cbf | 217 | unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; |
522698d7 SS |
218 | |
219 | page = (unsigned long *)ptr; | |
f0fe9984 | 220 | val = page[0]; |
522698d7 | 221 | |
90f82cbf TS |
222 | if (val != page[last_pos]) |
223 | return false; | |
224 | ||
225 | for (pos = 1; pos < last_pos; pos++) { | |
f0fe9984 | 226 | if (val != page[pos]) |
1c53e0d2 | 227 | return false; |
522698d7 SS |
228 | } |
229 | ||
f0fe9984 | 230 | *element = val; |
8e19d540 | 231 | |
1c53e0d2 | 232 | return true; |
522698d7 SS |
233 | } |
234 | ||
9b3bb7ab SS |
235 | static ssize_t initstate_show(struct device *dev, |
236 | struct device_attribute *attr, char *buf) | |
237 | { | |
a68eb3b6 | 238 | u32 val; |
9b3bb7ab SS |
239 | struct zram *zram = dev_to_zram(dev); |
240 | ||
a68eb3b6 SS |
241 | down_read(&zram->init_lock); |
242 | val = init_done(zram); | |
243 | up_read(&zram->init_lock); | |
9b3bb7ab | 244 | |
56b4e8cb | 245 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
246 | } |
247 | ||
522698d7 SS |
248 | static ssize_t disksize_show(struct device *dev, |
249 | struct device_attribute *attr, char *buf) | |
250 | { | |
251 | struct zram *zram = dev_to_zram(dev); | |
252 | ||
253 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
254 | } | |
255 | ||
9ada9da9 MK |
256 | static ssize_t mem_limit_store(struct device *dev, |
257 | struct device_attribute *attr, const char *buf, size_t len) | |
258 | { | |
259 | u64 limit; | |
260 | char *tmp; | |
261 | struct zram *zram = dev_to_zram(dev); | |
262 | ||
263 | limit = memparse(buf, &tmp); | |
264 | if (buf == tmp) /* no chars parsed, invalid input */ | |
265 | return -EINVAL; | |
266 | ||
267 | down_write(&zram->init_lock); | |
268 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
269 | up_write(&zram->init_lock); | |
270 | ||
271 | return len; | |
272 | } | |
273 | ||
461a8eee MK |
274 | static ssize_t mem_used_max_store(struct device *dev, |
275 | struct device_attribute *attr, const char *buf, size_t len) | |
276 | { | |
277 | int err; | |
278 | unsigned long val; | |
279 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
280 | |
281 | err = kstrtoul(buf, 10, &val); | |
282 | if (err || val != 0) | |
283 | return -EINVAL; | |
284 | ||
285 | down_read(&zram->init_lock); | |
5a99e95b | 286 | if (init_done(zram)) { |
461a8eee | 287 | atomic_long_set(&zram->stats.max_used_pages, |
beb6602c | 288 | zs_get_total_pages(zram->mem_pool)); |
5a99e95b | 289 | } |
461a8eee MK |
290 | up_read(&zram->init_lock); |
291 | ||
292 | return len; | |
293 | } | |
294 | ||
755804d1 BG |
295 | /* |
296 | * Mark all pages which are older than or equal to cutoff as IDLE. | |
297 | * Callers should hold the zram init lock in read mode | |
298 | */ | |
299 | static void mark_idle(struct zram *zram, ktime_t cutoff) | |
e82592c4 | 300 | { |
755804d1 | 301 | int is_idle = 1; |
e82592c4 MK |
302 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; |
303 | int index; | |
e82592c4 | 304 | |
e82592c4 | 305 | for (index = 0; index < nr_pages; index++) { |
a939888e MK |
306 | /* |
307 | * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. | |
308 | * See the comment in writeback_store. | |
309 | */ | |
e82592c4 | 310 | zram_slot_lock(zram, index); |
1d69a3f8 | 311 | if (zram_allocated(zram, index) && |
755804d1 BG |
312 | !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { |
313 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING | |
314 | is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); | |
315 | #endif | |
316 | if (is_idle) | |
317 | zram_set_flag(zram, index, ZRAM_IDLE); | |
318 | } | |
e82592c4 MK |
319 | zram_slot_unlock(zram, index); |
320 | } | |
755804d1 | 321 | } |
e82592c4 | 322 | |
755804d1 BG |
323 | static ssize_t idle_store(struct device *dev, |
324 | struct device_attribute *attr, const char *buf, size_t len) | |
325 | { | |
326 | struct zram *zram = dev_to_zram(dev); | |
327 | ktime_t cutoff_time = 0; | |
328 | ssize_t rv = -EINVAL; | |
e82592c4 | 329 | |
755804d1 BG |
330 | if (!sysfs_streq(buf, "all")) { |
331 | /* | |
332 | * If it did not parse as 'all' try to treat it as an integer when | |
333 | * we have memory tracking enabled. | |
334 | */ | |
335 | u64 age_sec; | |
336 | ||
337 | if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) | |
338 | cutoff_time = ktime_sub(ktime_get_boottime(), | |
339 | ns_to_ktime(age_sec * NSEC_PER_SEC)); | |
340 | else | |
341 | goto out; | |
342 | } | |
343 | ||
344 | down_read(&zram->init_lock); | |
345 | if (!init_done(zram)) | |
346 | goto out_unlock; | |
347 | ||
348 | /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */ | |
349 | mark_idle(zram, cutoff_time); | |
350 | rv = len; | |
351 | ||
352 | out_unlock: | |
353 | up_read(&zram->init_lock); | |
354 | out: | |
355 | return rv; | |
e82592c4 MK |
356 | } |
357 | ||
013bf95a | 358 | #ifdef CONFIG_ZRAM_WRITEBACK |
1d69a3f8 MK |
359 | static ssize_t writeback_limit_enable_store(struct device *dev, |
360 | struct device_attribute *attr, const char *buf, size_t len) | |
361 | { | |
362 | struct zram *zram = dev_to_zram(dev); | |
363 | u64 val; | |
364 | ssize_t ret = -EINVAL; | |
365 | ||
366 | if (kstrtoull(buf, 10, &val)) | |
367 | return ret; | |
368 | ||
369 | down_read(&zram->init_lock); | |
370 | spin_lock(&zram->wb_limit_lock); | |
371 | zram->wb_limit_enable = val; | |
372 | spin_unlock(&zram->wb_limit_lock); | |
373 | up_read(&zram->init_lock); | |
374 | ret = len; | |
375 | ||
376 | return ret; | |
377 | } | |
378 | ||
379 | static ssize_t writeback_limit_enable_show(struct device *dev, | |
380 | struct device_attribute *attr, char *buf) | |
381 | { | |
382 | bool val; | |
383 | struct zram *zram = dev_to_zram(dev); | |
384 | ||
385 | down_read(&zram->init_lock); | |
386 | spin_lock(&zram->wb_limit_lock); | |
387 | val = zram->wb_limit_enable; | |
388 | spin_unlock(&zram->wb_limit_lock); | |
389 | up_read(&zram->init_lock); | |
390 | ||
391 | return scnprintf(buf, PAGE_SIZE, "%d\n", val); | |
392 | } | |
393 | ||
bb416d18 MK |
394 | static ssize_t writeback_limit_store(struct device *dev, |
395 | struct device_attribute *attr, const char *buf, size_t len) | |
396 | { | |
397 | struct zram *zram = dev_to_zram(dev); | |
398 | u64 val; | |
399 | ssize_t ret = -EINVAL; | |
400 | ||
401 | if (kstrtoull(buf, 10, &val)) | |
402 | return ret; | |
403 | ||
404 | down_read(&zram->init_lock); | |
1d69a3f8 MK |
405 | spin_lock(&zram->wb_limit_lock); |
406 | zram->bd_wb_limit = val; | |
407 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
408 | up_read(&zram->init_lock); |
409 | ret = len; | |
410 | ||
411 | return ret; | |
412 | } | |
413 | ||
414 | static ssize_t writeback_limit_show(struct device *dev, | |
415 | struct device_attribute *attr, char *buf) | |
416 | { | |
417 | u64 val; | |
418 | struct zram *zram = dev_to_zram(dev); | |
419 | ||
420 | down_read(&zram->init_lock); | |
1d69a3f8 MK |
421 | spin_lock(&zram->wb_limit_lock); |
422 | val = zram->bd_wb_limit; | |
423 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
424 | up_read(&zram->init_lock); |
425 | ||
426 | return scnprintf(buf, PAGE_SIZE, "%llu\n", val); | |
427 | } | |
428 | ||
013bf95a MK |
429 | static void reset_bdev(struct zram *zram) |
430 | { | |
431 | struct block_device *bdev; | |
432 | ||
7e529283 | 433 | if (!zram->backing_dev) |
013bf95a MK |
434 | return; |
435 | ||
436 | bdev = zram->bdev; | |
013bf95a MK |
437 | blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); |
438 | /* hope filp_close flush all of IO */ | |
439 | filp_close(zram->backing_dev, NULL); | |
440 | zram->backing_dev = NULL; | |
013bf95a | 441 | zram->bdev = NULL; |
a8b456d0 | 442 | zram->disk->fops = &zram_devops; |
1363d466 MK |
443 | kvfree(zram->bitmap); |
444 | zram->bitmap = NULL; | |
013bf95a MK |
445 | } |
446 | ||
447 | static ssize_t backing_dev_show(struct device *dev, | |
448 | struct device_attribute *attr, char *buf) | |
449 | { | |
f7daefe4 | 450 | struct file *file; |
013bf95a | 451 | struct zram *zram = dev_to_zram(dev); |
013bf95a MK |
452 | char *p; |
453 | ssize_t ret; | |
454 | ||
455 | down_read(&zram->init_lock); | |
f7daefe4 C |
456 | file = zram->backing_dev; |
457 | if (!file) { | |
013bf95a MK |
458 | memcpy(buf, "none\n", 5); |
459 | up_read(&zram->init_lock); | |
460 | return 5; | |
461 | } | |
462 | ||
463 | p = file_path(file, buf, PAGE_SIZE - 1); | |
464 | if (IS_ERR(p)) { | |
465 | ret = PTR_ERR(p); | |
466 | goto out; | |
467 | } | |
468 | ||
469 | ret = strlen(p); | |
470 | memmove(buf, p, ret); | |
471 | buf[ret++] = '\n'; | |
472 | out: | |
473 | up_read(&zram->init_lock); | |
474 | return ret; | |
475 | } | |
476 | ||
477 | static ssize_t backing_dev_store(struct device *dev, | |
478 | struct device_attribute *attr, const char *buf, size_t len) | |
479 | { | |
480 | char *file_name; | |
c8bd134a | 481 | size_t sz; |
013bf95a MK |
482 | struct file *backing_dev = NULL; |
483 | struct inode *inode; | |
484 | struct address_space *mapping; | |
ee763e21 | 485 | unsigned int bitmap_sz; |
1363d466 | 486 | unsigned long nr_pages, *bitmap = NULL; |
013bf95a MK |
487 | struct block_device *bdev = NULL; |
488 | int err; | |
489 | struct zram *zram = dev_to_zram(dev); | |
490 | ||
491 | file_name = kmalloc(PATH_MAX, GFP_KERNEL); | |
492 | if (!file_name) | |
493 | return -ENOMEM; | |
494 | ||
495 | down_write(&zram->init_lock); | |
496 | if (init_done(zram)) { | |
497 | pr_info("Can't setup backing device for initialized device\n"); | |
498 | err = -EBUSY; | |
499 | goto out; | |
500 | } | |
501 | ||
c8bd134a PK |
502 | strlcpy(file_name, buf, PATH_MAX); |
503 | /* ignore trailing newline */ | |
504 | sz = strlen(file_name); | |
505 | if (sz > 0 && file_name[sz - 1] == '\n') | |
506 | file_name[sz - 1] = 0x00; | |
013bf95a MK |
507 | |
508 | backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); | |
509 | if (IS_ERR(backing_dev)) { | |
510 | err = PTR_ERR(backing_dev); | |
511 | backing_dev = NULL; | |
512 | goto out; | |
513 | } | |
514 | ||
515 | mapping = backing_dev->f_mapping; | |
516 | inode = mapping->host; | |
517 | ||
518 | /* Support only block device in this moment */ | |
519 | if (!S_ISBLK(inode->i_mode)) { | |
520 | err = -ENOTBLK; | |
521 | goto out; | |
522 | } | |
523 | ||
0fc66c9d CH |
524 | bdev = blkdev_get_by_dev(inode->i_rdev, |
525 | FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); | |
526 | if (IS_ERR(bdev)) { | |
527 | err = PTR_ERR(bdev); | |
5547932d | 528 | bdev = NULL; |
013bf95a | 529 | goto out; |
5547932d | 530 | } |
013bf95a | 531 | |
1363d466 MK |
532 | nr_pages = i_size_read(inode) >> PAGE_SHIFT; |
533 | bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); | |
534 | bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); | |
535 | if (!bitmap) { | |
536 | err = -ENOMEM; | |
537 | goto out; | |
538 | } | |
539 | ||
013bf95a MK |
540 | reset_bdev(zram); |
541 | ||
013bf95a MK |
542 | zram->bdev = bdev; |
543 | zram->backing_dev = backing_dev; | |
1363d466 MK |
544 | zram->bitmap = bitmap; |
545 | zram->nr_pages = nr_pages; | |
4f7a7bea MK |
546 | /* |
547 | * With writeback feature, zram does asynchronous IO so it's no longer | |
548 | * synchronous device so let's remove synchronous io flag. Othewise, | |
549 | * upper layer(e.g., swap) could wait IO completion rather than | |
550 | * (submit and return), which will cause system sluggish. | |
551 | * Furthermore, when the IO function returns(e.g., swap_readpage), | |
552 | * upper layer expects IO was done so it could deallocate the page | |
553 | * freely but in fact, IO is going on so finally could cause | |
554 | * use-after-free when the IO is really done. | |
555 | */ | |
a8b456d0 | 556 | zram->disk->fops = &zram_wb_devops; |
013bf95a MK |
557 | up_write(&zram->init_lock); |
558 | ||
559 | pr_info("setup backing device %s\n", file_name); | |
560 | kfree(file_name); | |
561 | ||
562 | return len; | |
563 | out: | |
294ed6b9 | 564 | kvfree(bitmap); |
1363d466 | 565 | |
013bf95a MK |
566 | if (bdev) |
567 | blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); | |
568 | ||
569 | if (backing_dev) | |
570 | filp_close(backing_dev, NULL); | |
571 | ||
572 | up_write(&zram->init_lock); | |
573 | ||
574 | kfree(file_name); | |
575 | ||
576 | return err; | |
577 | } | |
578 | ||
7e529283 | 579 | static unsigned long alloc_block_bdev(struct zram *zram) |
1363d466 | 580 | { |
3c9959e0 MK |
581 | unsigned long blk_idx = 1; |
582 | retry: | |
1363d466 | 583 | /* skip 0 bit to confuse zram.handle = 0 */ |
3c9959e0 MK |
584 | blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); |
585 | if (blk_idx == zram->nr_pages) | |
1363d466 | 586 | return 0; |
1363d466 | 587 | |
3c9959e0 MK |
588 | if (test_and_set_bit(blk_idx, zram->bitmap)) |
589 | goto retry; | |
1363d466 | 590 | |
23eddf39 | 591 | atomic64_inc(&zram->stats.bd_count); |
3c9959e0 | 592 | return blk_idx; |
1363d466 MK |
593 | } |
594 | ||
7e529283 | 595 | static void free_block_bdev(struct zram *zram, unsigned long blk_idx) |
1363d466 MK |
596 | { |
597 | int was_set; | |
598 | ||
7e529283 | 599 | was_set = test_and_clear_bit(blk_idx, zram->bitmap); |
1363d466 | 600 | WARN_ON_ONCE(!was_set); |
23eddf39 | 601 | atomic64_dec(&zram->stats.bd_count); |
1363d466 MK |
602 | } |
603 | ||
384bc41f | 604 | static void zram_page_end_io(struct bio *bio) |
db8ffbd4 | 605 | { |
263663cd | 606 | struct page *page = bio_first_page_all(bio); |
db8ffbd4 MK |
607 | |
608 | page_endio(page, op_is_write(bio_op(bio)), | |
609 | blk_status_to_errno(bio->bi_status)); | |
610 | bio_put(bio); | |
611 | } | |
612 | ||
8e654f8f MK |
613 | /* |
614 | * Returns 1 if the submission is successful. | |
615 | */ | |
616 | static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, | |
617 | unsigned long entry, struct bio *parent) | |
618 | { | |
619 | struct bio *bio; | |
620 | ||
07888c66 CH |
621 | bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, |
622 | GFP_NOIO); | |
8e654f8f MK |
623 | if (!bio) |
624 | return -ENOMEM; | |
625 | ||
626 | bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); | |
8e654f8f MK |
627 | if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { |
628 | bio_put(bio); | |
629 | return -EIO; | |
630 | } | |
631 | ||
07888c66 | 632 | if (!parent) |
8e654f8f | 633 | bio->bi_end_io = zram_page_end_io; |
07888c66 | 634 | else |
8e654f8f | 635 | bio_chain(bio, parent); |
8e654f8f MK |
636 | |
637 | submit_bio(bio); | |
638 | return 1; | |
639 | } | |
640 | ||
0d835962 MK |
641 | #define PAGE_WB_SIG "page_index=" |
642 | ||
643 | #define PAGE_WRITEBACK 0 | |
30226b69 BG |
644 | #define HUGE_WRITEBACK (1<<0) |
645 | #define IDLE_WRITEBACK (1<<1) | |
a939888e | 646 | |
0d835962 | 647 | |
a939888e MK |
648 | static ssize_t writeback_store(struct device *dev, |
649 | struct device_attribute *attr, const char *buf, size_t len) | |
650 | { | |
651 | struct zram *zram = dev_to_zram(dev); | |
652 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
0d835962 | 653 | unsigned long index = 0; |
a939888e MK |
654 | struct bio bio; |
655 | struct bio_vec bio_vec; | |
656 | struct page *page; | |
3b82a051 | 657 | ssize_t ret = len; |
57e0076e | 658 | int mode, err; |
a939888e MK |
659 | unsigned long blk_idx = 0; |
660 | ||
0bc9f5d1 | 661 | if (sysfs_streq(buf, "idle")) |
a939888e | 662 | mode = IDLE_WRITEBACK; |
0bc9f5d1 | 663 | else if (sysfs_streq(buf, "huge")) |
a939888e | 664 | mode = HUGE_WRITEBACK; |
30226b69 BG |
665 | else if (sysfs_streq(buf, "huge_idle")) |
666 | mode = IDLE_WRITEBACK | HUGE_WRITEBACK; | |
0d835962 MK |
667 | else { |
668 | if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) | |
669 | return -EINVAL; | |
670 | ||
2766f182 MK |
671 | if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || |
672 | index >= nr_pages) | |
0d835962 MK |
673 | return -EINVAL; |
674 | ||
675 | nr_pages = 1; | |
676 | mode = PAGE_WRITEBACK; | |
677 | } | |
a939888e MK |
678 | |
679 | down_read(&zram->init_lock); | |
680 | if (!init_done(zram)) { | |
681 | ret = -EINVAL; | |
682 | goto release_init_lock; | |
683 | } | |
684 | ||
685 | if (!zram->backing_dev) { | |
686 | ret = -ENODEV; | |
687 | goto release_init_lock; | |
688 | } | |
689 | ||
690 | page = alloc_page(GFP_KERNEL); | |
691 | if (!page) { | |
692 | ret = -ENOMEM; | |
693 | goto release_init_lock; | |
694 | } | |
695 | ||
2766f182 | 696 | for (; nr_pages != 0; index++, nr_pages--) { |
a939888e MK |
697 | struct bio_vec bvec; |
698 | ||
699 | bvec.bv_page = page; | |
700 | bvec.bv_len = PAGE_SIZE; | |
701 | bvec.bv_offset = 0; | |
702 | ||
1d69a3f8 MK |
703 | spin_lock(&zram->wb_limit_lock); |
704 | if (zram->wb_limit_enable && !zram->bd_wb_limit) { | |
705 | spin_unlock(&zram->wb_limit_lock); | |
bb416d18 MK |
706 | ret = -EIO; |
707 | break; | |
708 | } | |
1d69a3f8 | 709 | spin_unlock(&zram->wb_limit_lock); |
bb416d18 | 710 | |
a939888e MK |
711 | if (!blk_idx) { |
712 | blk_idx = alloc_block_bdev(zram); | |
713 | if (!blk_idx) { | |
714 | ret = -ENOSPC; | |
715 | break; | |
716 | } | |
717 | } | |
718 | ||
719 | zram_slot_lock(zram, index); | |
720 | if (!zram_allocated(zram, index)) | |
721 | goto next; | |
722 | ||
723 | if (zram_test_flag(zram, index, ZRAM_WB) || | |
724 | zram_test_flag(zram, index, ZRAM_SAME) || | |
725 | zram_test_flag(zram, index, ZRAM_UNDER_WB)) | |
726 | goto next; | |
727 | ||
30226b69 | 728 | if (mode & IDLE_WRITEBACK && |
1d69a3f8 MK |
729 | !zram_test_flag(zram, index, ZRAM_IDLE)) |
730 | goto next; | |
30226b69 | 731 | if (mode & HUGE_WRITEBACK && |
1d69a3f8 | 732 | !zram_test_flag(zram, index, ZRAM_HUGE)) |
a939888e MK |
733 | goto next; |
734 | /* | |
735 | * Clearing ZRAM_UNDER_WB is duty of caller. | |
736 | * IOW, zram_free_page never clear it. | |
737 | */ | |
738 | zram_set_flag(zram, index, ZRAM_UNDER_WB); | |
739 | /* Need for hugepage writeback racing */ | |
740 | zram_set_flag(zram, index, ZRAM_IDLE); | |
741 | zram_slot_unlock(zram, index); | |
742 | if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { | |
743 | zram_slot_lock(zram, index); | |
744 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
745 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
746 | zram_slot_unlock(zram, index); | |
747 | continue; | |
748 | } | |
749 | ||
49add496 CH |
750 | bio_init(&bio, zram->bdev, &bio_vec, 1, |
751 | REQ_OP_WRITE | REQ_SYNC); | |
a939888e | 752 | bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); |
a939888e MK |
753 | |
754 | bio_add_page(&bio, bvec.bv_page, bvec.bv_len, | |
755 | bvec.bv_offset); | |
756 | /* | |
757 | * XXX: A single page IO would be inefficient for write | |
758 | * but it would be not bad as starter. | |
759 | */ | |
57e0076e MK |
760 | err = submit_bio_wait(&bio); |
761 | if (err) { | |
a939888e MK |
762 | zram_slot_lock(zram, index); |
763 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
764 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
765 | zram_slot_unlock(zram, index); | |
57e0076e MK |
766 | /* |
767 | * Return last IO error unless every IO were | |
768 | * not suceeded. | |
769 | */ | |
770 | ret = err; | |
a939888e MK |
771 | continue; |
772 | } | |
773 | ||
23eddf39 | 774 | atomic64_inc(&zram->stats.bd_writes); |
a939888e MK |
775 | /* |
776 | * We released zram_slot_lock so need to check if the slot was | |
777 | * changed. If there is freeing for the slot, we can catch it | |
778 | * easily by zram_allocated. | |
779 | * A subtle case is the slot is freed/reallocated/marked as | |
780 | * ZRAM_IDLE again. To close the race, idle_store doesn't | |
781 | * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. | |
782 | * Thus, we could close the race by checking ZRAM_IDLE bit. | |
783 | */ | |
784 | zram_slot_lock(zram, index); | |
785 | if (!zram_allocated(zram, index) || | |
786 | !zram_test_flag(zram, index, ZRAM_IDLE)) { | |
787 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
788 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
789 | goto next; | |
790 | } | |
791 | ||
792 | zram_free_page(zram, index); | |
793 | zram_clear_flag(zram, index, ZRAM_UNDER_WB); | |
794 | zram_set_flag(zram, index, ZRAM_WB); | |
795 | zram_set_element(zram, index, blk_idx); | |
796 | blk_idx = 0; | |
797 | atomic64_inc(&zram->stats.pages_stored); | |
1d69a3f8 MK |
798 | spin_lock(&zram->wb_limit_lock); |
799 | if (zram->wb_limit_enable && zram->bd_wb_limit > 0) | |
800 | zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); | |
801 | spin_unlock(&zram->wb_limit_lock); | |
a939888e MK |
802 | next: |
803 | zram_slot_unlock(zram, index); | |
804 | } | |
805 | ||
806 | if (blk_idx) | |
807 | free_block_bdev(zram, blk_idx); | |
a939888e MK |
808 | __free_page(page); |
809 | release_init_lock: | |
810 | up_read(&zram->init_lock); | |
811 | ||
812 | return ret; | |
813 | } | |
814 | ||
8e654f8f MK |
815 | struct zram_work { |
816 | struct work_struct work; | |
817 | struct zram *zram; | |
818 | unsigned long entry; | |
819 | struct bio *bio; | |
e153abc0 | 820 | struct bio_vec bvec; |
8e654f8f MK |
821 | }; |
822 | ||
823 | #if PAGE_SIZE != 4096 | |
824 | static void zram_sync_read(struct work_struct *work) | |
825 | { | |
8e654f8f MK |
826 | struct zram_work *zw = container_of(work, struct zram_work, work); |
827 | struct zram *zram = zw->zram; | |
828 | unsigned long entry = zw->entry; | |
829 | struct bio *bio = zw->bio; | |
830 | ||
e153abc0 | 831 | read_from_bdev_async(zram, &zw->bvec, entry, bio); |
8e654f8f MK |
832 | } |
833 | ||
834 | /* | |
c62b37d9 CH |
835 | * Block layer want one ->submit_bio to be active at a time, so if we use |
836 | * chained IO with parent IO in same context, it's a deadlock. To avoid that, | |
837 | * use a worker thread context. | |
8e654f8f MK |
838 | */ |
839 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
840 | unsigned long entry, struct bio *bio) | |
841 | { | |
842 | struct zram_work work; | |
843 | ||
e153abc0 | 844 | work.bvec = *bvec; |
8e654f8f MK |
845 | work.zram = zram; |
846 | work.entry = entry; | |
847 | work.bio = bio; | |
848 | ||
849 | INIT_WORK_ONSTACK(&work.work, zram_sync_read); | |
850 | queue_work(system_unbound_wq, &work.work); | |
851 | flush_work(&work.work); | |
852 | destroy_work_on_stack(&work.work); | |
853 | ||
854 | return 1; | |
855 | } | |
856 | #else | |
857 | static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, | |
858 | unsigned long entry, struct bio *bio) | |
859 | { | |
860 | WARN_ON(1); | |
861 | return -EIO; | |
862 | } | |
863 | #endif | |
864 | ||
865 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, | |
866 | unsigned long entry, struct bio *parent, bool sync) | |
867 | { | |
23eddf39 | 868 | atomic64_inc(&zram->stats.bd_reads); |
8e654f8f MK |
869 | if (sync) |
870 | return read_from_bdev_sync(zram, bvec, entry, parent); | |
871 | else | |
872 | return read_from_bdev_async(zram, bvec, entry, parent); | |
873 | } | |
013bf95a | 874 | #else |
013bf95a | 875 | static inline void reset_bdev(struct zram *zram) {}; |
8e654f8f MK |
876 | static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, |
877 | unsigned long entry, struct bio *parent, bool sync) | |
878 | { | |
879 | return -EIO; | |
880 | } | |
7e529283 MK |
881 | |
882 | static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; | |
013bf95a MK |
883 | #endif |
884 | ||
c0265342 MK |
885 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
886 | ||
887 | static struct dentry *zram_debugfs_root; | |
888 | ||
889 | static void zram_debugfs_create(void) | |
890 | { | |
891 | zram_debugfs_root = debugfs_create_dir("zram", NULL); | |
892 | } | |
893 | ||
894 | static void zram_debugfs_destroy(void) | |
895 | { | |
896 | debugfs_remove_recursive(zram_debugfs_root); | |
897 | } | |
898 | ||
899 | static void zram_accessed(struct zram *zram, u32 index) | |
900 | { | |
e82592c4 | 901 | zram_clear_flag(zram, index, ZRAM_IDLE); |
c0265342 MK |
902 | zram->table[index].ac_time = ktime_get_boottime(); |
903 | } | |
904 | ||
c0265342 MK |
905 | static ssize_t read_block_state(struct file *file, char __user *buf, |
906 | size_t count, loff_t *ppos) | |
907 | { | |
908 | char *kbuf; | |
909 | ssize_t index, written = 0; | |
910 | struct zram *zram = file->private_data; | |
911 | unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; | |
912 | struct timespec64 ts; | |
913 | ||
914 | kbuf = kvmalloc(count, GFP_KERNEL); | |
915 | if (!kbuf) | |
916 | return -ENOMEM; | |
917 | ||
918 | down_read(&zram->init_lock); | |
919 | if (!init_done(zram)) { | |
920 | up_read(&zram->init_lock); | |
921 | kvfree(kbuf); | |
922 | return -EINVAL; | |
923 | } | |
924 | ||
925 | for (index = *ppos; index < nr_pages; index++) { | |
926 | int copied; | |
927 | ||
928 | zram_slot_lock(zram, index); | |
929 | if (!zram_allocated(zram, index)) | |
930 | goto next; | |
931 | ||
932 | ts = ktime_to_timespec64(zram->table[index].ac_time); | |
933 | copied = snprintf(kbuf + written, count, | |
e82592c4 | 934 | "%12zd %12lld.%06lu %c%c%c%c\n", |
c0265342 MK |
935 | index, (s64)ts.tv_sec, |
936 | ts.tv_nsec / NSEC_PER_USEC, | |
937 | zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', | |
938 | zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', | |
e82592c4 MK |
939 | zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', |
940 | zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); | |
c0265342 | 941 | |
a88e03cf | 942 | if (count <= copied) { |
c0265342 MK |
943 | zram_slot_unlock(zram, index); |
944 | break; | |
945 | } | |
946 | written += copied; | |
947 | count -= copied; | |
948 | next: | |
949 | zram_slot_unlock(zram, index); | |
950 | *ppos += 1; | |
951 | } | |
952 | ||
953 | up_read(&zram->init_lock); | |
954 | if (copy_to_user(buf, kbuf, written)) | |
955 | written = -EFAULT; | |
956 | kvfree(kbuf); | |
957 | ||
958 | return written; | |
959 | } | |
960 | ||
961 | static const struct file_operations proc_zram_block_state_op = { | |
962 | .open = simple_open, | |
963 | .read = read_block_state, | |
964 | .llseek = default_llseek, | |
965 | }; | |
966 | ||
967 | static void zram_debugfs_register(struct zram *zram) | |
968 | { | |
969 | if (!zram_debugfs_root) | |
970 | return; | |
971 | ||
972 | zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, | |
973 | zram_debugfs_root); | |
974 | debugfs_create_file("block_state", 0400, zram->debugfs_dir, | |
975 | zram, &proc_zram_block_state_op); | |
976 | } | |
977 | ||
978 | static void zram_debugfs_unregister(struct zram *zram) | |
979 | { | |
980 | debugfs_remove_recursive(zram->debugfs_dir); | |
981 | } | |
982 | #else | |
983 | static void zram_debugfs_create(void) {}; | |
984 | static void zram_debugfs_destroy(void) {}; | |
e82592c4 MK |
985 | static void zram_accessed(struct zram *zram, u32 index) |
986 | { | |
987 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
988 | }; | |
c0265342 MK |
989 | static void zram_debugfs_register(struct zram *zram) {}; |
990 | static void zram_debugfs_unregister(struct zram *zram) {}; | |
991 | #endif | |
013bf95a | 992 | |
43209ea2 SS |
993 | /* |
994 | * We switched to per-cpu streams and this attr is not needed anymore. | |
995 | * However, we will keep it around for some time, because: | |
996 | * a) we may revert per-cpu streams in the future | |
997 | * b) it's visible to user space and we need to follow our 2 years | |
998 | * retirement rule; but we already have a number of 'soon to be | |
999 | * altered' attrs, so max_comp_streams need to wait for the next | |
1000 | * layoff cycle. | |
1001 | */ | |
522698d7 SS |
1002 | static ssize_t max_comp_streams_show(struct device *dev, |
1003 | struct device_attribute *attr, char *buf) | |
1004 | { | |
43209ea2 | 1005 | return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); |
522698d7 SS |
1006 | } |
1007 | ||
beca3ec7 SS |
1008 | static ssize_t max_comp_streams_store(struct device *dev, |
1009 | struct device_attribute *attr, const char *buf, size_t len) | |
1010 | { | |
43209ea2 | 1011 | return len; |
beca3ec7 SS |
1012 | } |
1013 | ||
e46b8a03 SS |
1014 | static ssize_t comp_algorithm_show(struct device *dev, |
1015 | struct device_attribute *attr, char *buf) | |
1016 | { | |
1017 | size_t sz; | |
1018 | struct zram *zram = dev_to_zram(dev); | |
1019 | ||
1020 | down_read(&zram->init_lock); | |
1021 | sz = zcomp_available_show(zram->compressor, buf); | |
1022 | up_read(&zram->init_lock); | |
1023 | ||
1024 | return sz; | |
1025 | } | |
1026 | ||
1027 | static ssize_t comp_algorithm_store(struct device *dev, | |
1028 | struct device_attribute *attr, const char *buf, size_t len) | |
1029 | { | |
1030 | struct zram *zram = dev_to_zram(dev); | |
f357e345 | 1031 | char compressor[ARRAY_SIZE(zram->compressor)]; |
4bbacd51 SS |
1032 | size_t sz; |
1033 | ||
415403be SS |
1034 | strlcpy(compressor, buf, sizeof(compressor)); |
1035 | /* ignore trailing newline */ | |
1036 | sz = strlen(compressor); | |
1037 | if (sz > 0 && compressor[sz - 1] == '\n') | |
1038 | compressor[sz - 1] = 0x00; | |
1039 | ||
1040 | if (!zcomp_available_algorithm(compressor)) | |
1d5b43bf LH |
1041 | return -EINVAL; |
1042 | ||
e46b8a03 SS |
1043 | down_write(&zram->init_lock); |
1044 | if (init_done(zram)) { | |
1045 | up_write(&zram->init_lock); | |
1046 | pr_info("Can't change algorithm for initialized device\n"); | |
1047 | return -EBUSY; | |
1048 | } | |
4bbacd51 | 1049 | |
f357e345 | 1050 | strcpy(zram->compressor, compressor); |
e46b8a03 SS |
1051 | up_write(&zram->init_lock); |
1052 | return len; | |
1053 | } | |
1054 | ||
522698d7 SS |
1055 | static ssize_t compact_store(struct device *dev, |
1056 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 1057 | { |
522698d7 | 1058 | struct zram *zram = dev_to_zram(dev); |
306b0c95 | 1059 | |
522698d7 SS |
1060 | down_read(&zram->init_lock); |
1061 | if (!init_done(zram)) { | |
1062 | up_read(&zram->init_lock); | |
1063 | return -EINVAL; | |
1064 | } | |
306b0c95 | 1065 | |
beb6602c | 1066 | zs_compact(zram->mem_pool); |
522698d7 | 1067 | up_read(&zram->init_lock); |
d2d5e762 | 1068 | |
522698d7 | 1069 | return len; |
d2d5e762 WY |
1070 | } |
1071 | ||
522698d7 SS |
1072 | static ssize_t io_stat_show(struct device *dev, |
1073 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 1074 | { |
522698d7 SS |
1075 | struct zram *zram = dev_to_zram(dev); |
1076 | ssize_t ret; | |
d2d5e762 | 1077 | |
522698d7 SS |
1078 | down_read(&zram->init_lock); |
1079 | ret = scnprintf(buf, PAGE_SIZE, | |
1080 | "%8llu %8llu %8llu %8llu\n", | |
1081 | (u64)atomic64_read(&zram->stats.failed_reads), | |
1082 | (u64)atomic64_read(&zram->stats.failed_writes), | |
1083 | (u64)atomic64_read(&zram->stats.invalid_io), | |
1084 | (u64)atomic64_read(&zram->stats.notify_free)); | |
1085 | up_read(&zram->init_lock); | |
306b0c95 | 1086 | |
522698d7 | 1087 | return ret; |
9b3bb7ab SS |
1088 | } |
1089 | ||
522698d7 SS |
1090 | static ssize_t mm_stat_show(struct device *dev, |
1091 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 1092 | { |
522698d7 | 1093 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 1094 | struct zs_pool_stats pool_stats; |
522698d7 SS |
1095 | u64 orig_size, mem_used = 0; |
1096 | long max_used; | |
1097 | ssize_t ret; | |
a539c72a | 1098 | |
7d3f3938 SS |
1099 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
1100 | ||
522698d7 | 1101 | down_read(&zram->init_lock); |
7d3f3938 | 1102 | if (init_done(zram)) { |
beb6602c MK |
1103 | mem_used = zs_get_total_pages(zram->mem_pool); |
1104 | zs_pool_stats(zram->mem_pool, &pool_stats); | |
7d3f3938 | 1105 | } |
9b3bb7ab | 1106 | |
522698d7 SS |
1107 | orig_size = atomic64_read(&zram->stats.pages_stored); |
1108 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 1109 | |
522698d7 | 1110 | ret = scnprintf(buf, PAGE_SIZE, |
194e28da | 1111 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", |
522698d7 SS |
1112 | orig_size << PAGE_SHIFT, |
1113 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
1114 | mem_used << PAGE_SHIFT, | |
1115 | zram->limit_pages << PAGE_SHIFT, | |
1116 | max_used << PAGE_SHIFT, | |
8e19d540 | 1117 | (u64)atomic64_read(&zram->stats.same_pages), |
23959281 | 1118 | atomic_long_read(&pool_stats.pages_compacted), |
194e28da MK |
1119 | (u64)atomic64_read(&zram->stats.huge_pages), |
1120 | (u64)atomic64_read(&zram->stats.huge_pages_since)); | |
522698d7 | 1121 | up_read(&zram->init_lock); |
9b3bb7ab | 1122 | |
522698d7 SS |
1123 | return ret; |
1124 | } | |
1125 | ||
23eddf39 | 1126 | #ifdef CONFIG_ZRAM_WRITEBACK |
bb416d18 | 1127 | #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) |
23eddf39 MK |
1128 | static ssize_t bd_stat_show(struct device *dev, |
1129 | struct device_attribute *attr, char *buf) | |
1130 | { | |
1131 | struct zram *zram = dev_to_zram(dev); | |
1132 | ssize_t ret; | |
1133 | ||
1134 | down_read(&zram->init_lock); | |
1135 | ret = scnprintf(buf, PAGE_SIZE, | |
1136 | "%8llu %8llu %8llu\n", | |
bb416d18 MK |
1137 | FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), |
1138 | FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), | |
1139 | FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); | |
23eddf39 MK |
1140 | up_read(&zram->init_lock); |
1141 | ||
1142 | return ret; | |
1143 | } | |
1144 | #endif | |
1145 | ||
623e47fc SS |
1146 | static ssize_t debug_stat_show(struct device *dev, |
1147 | struct device_attribute *attr, char *buf) | |
1148 | { | |
37887783 | 1149 | int version = 1; |
623e47fc SS |
1150 | struct zram *zram = dev_to_zram(dev); |
1151 | ssize_t ret; | |
1152 | ||
1153 | down_read(&zram->init_lock); | |
1154 | ret = scnprintf(buf, PAGE_SIZE, | |
37887783 | 1155 | "version: %d\n%8llu %8llu\n", |
623e47fc | 1156 | version, |
37887783 | 1157 | (u64)atomic64_read(&zram->stats.writestall), |
3c9959e0 | 1158 | (u64)atomic64_read(&zram->stats.miss_free)); |
623e47fc SS |
1159 | up_read(&zram->init_lock); |
1160 | ||
1161 | return ret; | |
1162 | } | |
1163 | ||
522698d7 SS |
1164 | static DEVICE_ATTR_RO(io_stat); |
1165 | static DEVICE_ATTR_RO(mm_stat); | |
23eddf39 MK |
1166 | #ifdef CONFIG_ZRAM_WRITEBACK |
1167 | static DEVICE_ATTR_RO(bd_stat); | |
1168 | #endif | |
623e47fc | 1169 | static DEVICE_ATTR_RO(debug_stat); |
522698d7 | 1170 | |
beb6602c | 1171 | static void zram_meta_free(struct zram *zram, u64 disksize) |
522698d7 SS |
1172 | { |
1173 | size_t num_pages = disksize >> PAGE_SHIFT; | |
1174 | size_t index; | |
1fec1172 GM |
1175 | |
1176 | /* Free all pages that are still in this zram device */ | |
302128dc MK |
1177 | for (index = 0; index < num_pages; index++) |
1178 | zram_free_page(zram, index); | |
1fec1172 | 1179 | |
beb6602c MK |
1180 | zs_destroy_pool(zram->mem_pool); |
1181 | vfree(zram->table); | |
9b3bb7ab SS |
1182 | } |
1183 | ||
beb6602c | 1184 | static bool zram_meta_alloc(struct zram *zram, u64 disksize) |
9b3bb7ab SS |
1185 | { |
1186 | size_t num_pages; | |
9b3bb7ab | 1187 | |
9b3bb7ab | 1188 | num_pages = disksize >> PAGE_SHIFT; |
fad953ce | 1189 | zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); |
beb6602c MK |
1190 | if (!zram->table) |
1191 | return false; | |
9b3bb7ab | 1192 | |
beb6602c MK |
1193 | zram->mem_pool = zs_create_pool(zram->disk->disk_name); |
1194 | if (!zram->mem_pool) { | |
1195 | vfree(zram->table); | |
1196 | return false; | |
9b3bb7ab SS |
1197 | } |
1198 | ||
60f5921a SS |
1199 | if (!huge_class_size) |
1200 | huge_class_size = zs_huge_class_size(zram->mem_pool); | |
beb6602c | 1201 | return true; |
9b3bb7ab SS |
1202 | } |
1203 | ||
d2d5e762 WY |
1204 | /* |
1205 | * To protect concurrent access to the same index entry, | |
1206 | * caller should hold this table index entry's bit_spinlock to | |
1207 | * indicate this index entry is accessing. | |
1208 | */ | |
f1e3cfff | 1209 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 1210 | { |
db8ffbd4 MK |
1211 | unsigned long handle; |
1212 | ||
7e529283 MK |
1213 | #ifdef CONFIG_ZRAM_MEMORY_TRACKING |
1214 | zram->table[index].ac_time = 0; | |
1215 | #endif | |
e82592c4 MK |
1216 | if (zram_test_flag(zram, index, ZRAM_IDLE)) |
1217 | zram_clear_flag(zram, index, ZRAM_IDLE); | |
1218 | ||
89e85bce MK |
1219 | if (zram_test_flag(zram, index, ZRAM_HUGE)) { |
1220 | zram_clear_flag(zram, index, ZRAM_HUGE); | |
1221 | atomic64_dec(&zram->stats.huge_pages); | |
1222 | } | |
1223 | ||
7e529283 MK |
1224 | if (zram_test_flag(zram, index, ZRAM_WB)) { |
1225 | zram_clear_flag(zram, index, ZRAM_WB); | |
1226 | free_block_bdev(zram, zram_get_element(zram, index)); | |
1227 | goto out; | |
db8ffbd4 | 1228 | } |
306b0c95 | 1229 | |
8e19d540 | 1230 | /* |
1231 | * No memory is allocated for same element filled pages. | |
1232 | * Simply clear same page flag. | |
1233 | */ | |
beb6602c MK |
1234 | if (zram_test_flag(zram, index, ZRAM_SAME)) { |
1235 | zram_clear_flag(zram, index, ZRAM_SAME); | |
8e19d540 | 1236 | atomic64_dec(&zram->stats.same_pages); |
7e529283 | 1237 | goto out; |
306b0c95 NG |
1238 | } |
1239 | ||
db8ffbd4 | 1240 | handle = zram_get_handle(zram, index); |
8e19d540 | 1241 | if (!handle) |
1242 | return; | |
1243 | ||
beb6602c | 1244 | zs_free(zram->mem_pool, handle); |
306b0c95 | 1245 | |
beb6602c | 1246 | atomic64_sub(zram_get_obj_size(zram, index), |
d2d5e762 | 1247 | &zram->stats.compr_data_size); |
7e529283 | 1248 | out: |
90a7806e | 1249 | atomic64_dec(&zram->stats.pages_stored); |
643ae61d | 1250 | zram_set_handle(zram, index, 0); |
beb6602c | 1251 | zram_set_obj_size(zram, index, 0); |
a939888e MK |
1252 | WARN_ON_ONCE(zram->table[index].flags & |
1253 | ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); | |
306b0c95 NG |
1254 | } |
1255 | ||
8e654f8f MK |
1256 | static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, |
1257 | struct bio *bio, bool partial_io) | |
306b0c95 | 1258 | { |
0669d2b2 | 1259 | struct zcomp_strm *zstrm; |
92967471 | 1260 | unsigned long handle; |
ebaf9ab5 | 1261 | unsigned int size; |
1f7319c7 | 1262 | void *src, *dst; |
0669d2b2 | 1263 | int ret; |
1f7319c7 | 1264 | |
7e529283 MK |
1265 | zram_slot_lock(zram, index); |
1266 | if (zram_test_flag(zram, index, ZRAM_WB)) { | |
1267 | struct bio_vec bvec; | |
8e654f8f | 1268 | |
8e654f8f | 1269 | zram_slot_unlock(zram, index); |
7e529283 MK |
1270 | |
1271 | bvec.bv_page = page; | |
1272 | bvec.bv_len = PAGE_SIZE; | |
1273 | bvec.bv_offset = 0; | |
1274 | return read_from_bdev(zram, &bvec, | |
1275 | zram_get_element(zram, index), | |
1276 | bio, partial_io); | |
8e654f8f MK |
1277 | } |
1278 | ||
643ae61d | 1279 | handle = zram_get_handle(zram, index); |
ae94264e MK |
1280 | if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { |
1281 | unsigned long value; | |
1282 | void *mem; | |
1283 | ||
1284 | value = handle ? zram_get_element(zram, index) : 0; | |
1285 | mem = kmap_atomic(page); | |
1286 | zram_fill_page(mem, PAGE_SIZE, value); | |
1287 | kunmap_atomic(mem); | |
1288 | zram_slot_unlock(zram, index); | |
1289 | return 0; | |
1290 | } | |
1291 | ||
beb6602c | 1292 | size = zram_get_obj_size(zram, index); |
306b0c95 | 1293 | |
0669d2b2 PZ |
1294 | if (size != PAGE_SIZE) |
1295 | zstrm = zcomp_stream_get(zram->comp); | |
1296 | ||
beb6602c | 1297 | src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); |
ebaf9ab5 | 1298 | if (size == PAGE_SIZE) { |
1f7319c7 MK |
1299 | dst = kmap_atomic(page); |
1300 | memcpy(dst, src, PAGE_SIZE); | |
1301 | kunmap_atomic(dst); | |
1302 | ret = 0; | |
ebaf9ab5 | 1303 | } else { |
1f7319c7 MK |
1304 | dst = kmap_atomic(page); |
1305 | ret = zcomp_decompress(zstrm, src, size, dst); | |
1306 | kunmap_atomic(dst); | |
ebaf9ab5 SS |
1307 | zcomp_stream_put(zram->comp); |
1308 | } | |
beb6602c | 1309 | zs_unmap_object(zram->mem_pool, handle); |
86c49814 | 1310 | zram_slot_unlock(zram, index); |
a1dd52af | 1311 | |
8c921b2b | 1312 | /* Should NEVER happen. Return bio error if it does. */ |
4e79603b | 1313 | if (WARN_ON(ret)) |
8c921b2b | 1314 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
306b0c95 | 1315 | |
1f7319c7 | 1316 | return ret; |
306b0c95 NG |
1317 | } |
1318 | ||
37b51fdd | 1319 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
8e654f8f | 1320 | u32 index, int offset, struct bio *bio) |
924bd88d JM |
1321 | { |
1322 | int ret; | |
37b51fdd | 1323 | struct page *page; |
37b51fdd | 1324 | |
1f7319c7 MK |
1325 | page = bvec->bv_page; |
1326 | if (is_partial_io(bvec)) { | |
1327 | /* Use a temporary buffer to decompress the page */ | |
1328 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1329 | if (!page) | |
1330 | return -ENOMEM; | |
924bd88d JM |
1331 | } |
1332 | ||
8e654f8f | 1333 | ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); |
1f7319c7 MK |
1334 | if (unlikely(ret)) |
1335 | goto out; | |
7e5a5104 | 1336 | |
1f7319c7 | 1337 | if (is_partial_io(bvec)) { |
1f7319c7 | 1338 | void *src = kmap_atomic(page); |
37b51fdd | 1339 | |
b3bd0a8a | 1340 | memcpy_to_bvec(bvec, src + offset); |
1f7319c7 | 1341 | kunmap_atomic(src); |
37b51fdd | 1342 | } |
1f7319c7 | 1343 | out: |
37b51fdd | 1344 | if (is_partial_io(bvec)) |
1f7319c7 | 1345 | __free_page(page); |
37b51fdd | 1346 | |
37b51fdd | 1347 | return ret; |
924bd88d JM |
1348 | } |
1349 | ||
db8ffbd4 MK |
1350 | static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, |
1351 | u32 index, struct bio *bio) | |
306b0c95 | 1352 | { |
ae85a807 | 1353 | int ret = 0; |
1f7319c7 | 1354 | unsigned long alloced_pages; |
37887783 | 1355 | unsigned long handle = -ENOMEM; |
97ec7c8b MK |
1356 | unsigned int comp_len = 0; |
1357 | void *src, *dst, *mem; | |
1358 | struct zcomp_strm *zstrm; | |
1359 | struct page *page = bvec->bv_page; | |
1360 | unsigned long element = 0; | |
1361 | enum zram_pageflags flags = 0; | |
1362 | ||
1363 | mem = kmap_atomic(page); | |
1364 | if (page_same_filled(mem, &element)) { | |
1365 | kunmap_atomic(mem); | |
1366 | /* Free memory associated with this sector now. */ | |
1367 | flags = ZRAM_SAME; | |
1368 | atomic64_inc(&zram->stats.same_pages); | |
1369 | goto out; | |
1370 | } | |
1371 | kunmap_atomic(mem); | |
924bd88d | 1372 | |
37887783 | 1373 | compress_again: |
97ec7c8b | 1374 | zstrm = zcomp_stream_get(zram->comp); |
1f7319c7 | 1375 | src = kmap_atomic(page); |
97ec7c8b | 1376 | ret = zcomp_compress(zstrm, src, &comp_len); |
1f7319c7 | 1377 | kunmap_atomic(src); |
306b0c95 | 1378 | |
b7ca232e | 1379 | if (unlikely(ret)) { |
97ec7c8b | 1380 | zcomp_stream_put(zram->comp); |
8c921b2b | 1381 | pr_err("Compression failed! err=%d\n", ret); |
37887783 | 1382 | zs_free(zram->mem_pool, handle); |
1f7319c7 | 1383 | return ret; |
8c921b2b | 1384 | } |
da9556a2 | 1385 | |
a939888e | 1386 | if (comp_len >= huge_class_size) |
89e85bce | 1387 | comp_len = PAGE_SIZE; |
37887783 JS |
1388 | /* |
1389 | * handle allocation has 2 paths: | |
1390 | * a) fast path is executed with preemption disabled (for | |
1391 | * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, | |
1392 | * since we can't sleep; | |
1393 | * b) slow path enables preemption and attempts to allocate | |
1394 | * the page with __GFP_DIRECT_RECLAIM bit set. we have to | |
1395 | * put per-cpu compression stream and, thus, to re-do | |
1396 | * the compression once handle is allocated. | |
1397 | * | |
1398 | * if we have a 'non-null' handle here then we are coming | |
1399 | * from the slow path and handle has already been allocated. | |
1400 | */ | |
1401 | if (IS_ERR((void *)handle)) | |
1402 | handle = zs_malloc(zram->mem_pool, comp_len, | |
1403 | __GFP_KSWAPD_RECLAIM | | |
1404 | __GFP_NOWARN | | |
1405 | __GFP_HIGHMEM | | |
1406 | __GFP_MOVABLE); | |
c7e6f17b | 1407 | if (IS_ERR((void *)handle)) { |
2aea8493 | 1408 | zcomp_stream_put(zram->comp); |
37887783 JS |
1409 | atomic64_inc(&zram->stats.writestall); |
1410 | handle = zs_malloc(zram->mem_pool, comp_len, | |
1411 | GFP_NOIO | __GFP_HIGHMEM | | |
1412 | __GFP_MOVABLE); | |
641608f3 AR |
1413 | if (IS_ERR((void *)handle)) |
1414 | return PTR_ERR((void *)handle); | |
1415 | ||
1416 | if (comp_len != PAGE_SIZE) | |
37887783 | 1417 | goto compress_again; |
641608f3 AR |
1418 | /* |
1419 | * If the page is not compressible, you need to acquire the lock and | |
1420 | * execute the code below. The zcomp_stream_get() call is needed to | |
1421 | * disable the cpu hotplug and grab the zstrm buffer back. | |
1422 | * It is necessary that the dereferencing of the zstrm variable below | |
1423 | * occurs correctly. | |
1424 | */ | |
1425 | zstrm = zcomp_stream_get(zram->comp); | |
8c921b2b | 1426 | } |
9ada9da9 | 1427 | |
beb6602c | 1428 | alloced_pages = zs_get_total_pages(zram->mem_pool); |
12372755 SS |
1429 | update_used_max(zram, alloced_pages); |
1430 | ||
461a8eee | 1431 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
97ec7c8b | 1432 | zcomp_stream_put(zram->comp); |
beb6602c | 1433 | zs_free(zram->mem_pool, handle); |
1f7319c7 MK |
1434 | return -ENOMEM; |
1435 | } | |
1436 | ||
beb6602c | 1437 | dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); |
1f7319c7 MK |
1438 | |
1439 | src = zstrm->buffer; | |
1440 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1441 | src = kmap_atomic(page); |
1f7319c7 MK |
1442 | memcpy(dst, src, comp_len); |
1443 | if (comp_len == PAGE_SIZE) | |
397c6066 | 1444 | kunmap_atomic(src); |
306b0c95 | 1445 | |
2aea8493 | 1446 | zcomp_stream_put(zram->comp); |
beb6602c | 1447 | zs_unmap_object(zram->mem_pool, handle); |
4ebbe7f7 MK |
1448 | atomic64_add(comp_len, &zram->stats.compr_data_size); |
1449 | out: | |
f40ac2ae SS |
1450 | /* |
1451 | * Free memory associated with this sector | |
1452 | * before overwriting unused sectors. | |
1453 | */ | |
86c49814 | 1454 | zram_slot_lock(zram, index); |
f40ac2ae | 1455 | zram_free_page(zram, index); |
db8ffbd4 | 1456 | |
89e85bce MK |
1457 | if (comp_len == PAGE_SIZE) { |
1458 | zram_set_flag(zram, index, ZRAM_HUGE); | |
1459 | atomic64_inc(&zram->stats.huge_pages); | |
194e28da | 1460 | atomic64_inc(&zram->stats.huge_pages_since); |
89e85bce MK |
1461 | } |
1462 | ||
db8ffbd4 MK |
1463 | if (flags) { |
1464 | zram_set_flag(zram, index, flags); | |
4ebbe7f7 | 1465 | zram_set_element(zram, index, element); |
db8ffbd4 | 1466 | } else { |
4ebbe7f7 MK |
1467 | zram_set_handle(zram, index, handle); |
1468 | zram_set_obj_size(zram, index, comp_len); | |
1469 | } | |
86c49814 | 1470 | zram_slot_unlock(zram, index); |
306b0c95 | 1471 | |
8c921b2b | 1472 | /* Update stats */ |
90a7806e | 1473 | atomic64_inc(&zram->stats.pages_stored); |
ae85a807 | 1474 | return ret; |
1f7319c7 MK |
1475 | } |
1476 | ||
1477 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, | |
db8ffbd4 | 1478 | u32 index, int offset, struct bio *bio) |
1f7319c7 MK |
1479 | { |
1480 | int ret; | |
1481 | struct page *page = NULL; | |
1f7319c7 MK |
1482 | struct bio_vec vec; |
1483 | ||
1484 | vec = *bvec; | |
1485 | if (is_partial_io(bvec)) { | |
1486 | void *dst; | |
1487 | /* | |
1488 | * This is a partial IO. We need to read the full page | |
1489 | * before to write the changes. | |
1490 | */ | |
1491 | page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); | |
1492 | if (!page) | |
1493 | return -ENOMEM; | |
1494 | ||
8e654f8f | 1495 | ret = __zram_bvec_read(zram, page, index, bio, true); |
1f7319c7 MK |
1496 | if (ret) |
1497 | goto out; | |
1498 | ||
1f7319c7 | 1499 | dst = kmap_atomic(page); |
bd3d3203 | 1500 | memcpy_from_bvec(dst + offset, bvec); |
1f7319c7 | 1501 | kunmap_atomic(dst); |
1f7319c7 MK |
1502 | |
1503 | vec.bv_page = page; | |
1504 | vec.bv_len = PAGE_SIZE; | |
1505 | vec.bv_offset = 0; | |
1506 | } | |
1507 | ||
db8ffbd4 | 1508 | ret = __zram_bvec_write(zram, &vec, index, bio); |
924bd88d | 1509 | out: |
397c6066 | 1510 | if (is_partial_io(bvec)) |
1f7319c7 | 1511 | __free_page(page); |
924bd88d | 1512 | return ret; |
8c921b2b JM |
1513 | } |
1514 | ||
f4659d8e JK |
1515 | /* |
1516 | * zram_bio_discard - handler on discard request | |
1517 | * @index: physical block index in PAGE_SIZE units | |
1518 | * @offset: byte offset within physical block | |
1519 | */ | |
1520 | static void zram_bio_discard(struct zram *zram, u32 index, | |
1521 | int offset, struct bio *bio) | |
1522 | { | |
1523 | size_t n = bio->bi_iter.bi_size; | |
1524 | ||
1525 | /* | |
1526 | * zram manages data in physical block size units. Because logical block | |
1527 | * size isn't identical with physical block size on some arch, we | |
1528 | * could get a discard request pointing to a specific offset within a | |
1529 | * certain physical block. Although we can handle this request by | |
1530 | * reading that physiclal block and decompressing and partially zeroing | |
1531 | * and re-compressing and then re-storing it, this isn't reasonable | |
1532 | * because our intent with a discard request is to save memory. So | |
1533 | * skipping this logical block is appropriate here. | |
1534 | */ | |
1535 | if (offset) { | |
38515c73 | 1536 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
1537 | return; |
1538 | ||
38515c73 | 1539 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
1540 | index++; |
1541 | } | |
1542 | ||
1543 | while (n >= PAGE_SIZE) { | |
86c49814 | 1544 | zram_slot_lock(zram, index); |
f4659d8e | 1545 | zram_free_page(zram, index); |
86c49814 | 1546 | zram_slot_unlock(zram, index); |
015254da | 1547 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
1548 | index++; |
1549 | n -= PAGE_SIZE; | |
1550 | } | |
1551 | } | |
1552 | ||
ae85a807 MK |
1553 | /* |
1554 | * Returns errno if it has some problem. Otherwise return 0 or 1. | |
1555 | * Returns 0 if IO request was done synchronously | |
1556 | * Returns 1 if IO request was successfully submitted. | |
1557 | */ | |
522698d7 | 1558 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
bc0421ea | 1559 | int offset, enum req_op op, struct bio *bio) |
9b3bb7ab SS |
1560 | { |
1561 | int ret; | |
9b3bb7ab | 1562 | |
3f289dcb | 1563 | if (!op_is_write(op)) { |
522698d7 | 1564 | atomic64_inc(&zram->stats.num_reads); |
8e654f8f | 1565 | ret = zram_bvec_read(zram, bvec, index, offset, bio); |
1f7319c7 | 1566 | flush_dcache_page(bvec->bv_page); |
522698d7 SS |
1567 | } else { |
1568 | atomic64_inc(&zram->stats.num_writes); | |
db8ffbd4 | 1569 | ret = zram_bvec_write(zram, bvec, index, offset, bio); |
1b672224 | 1570 | } |
9b3bb7ab | 1571 | |
d7eac6b6 MK |
1572 | zram_slot_lock(zram, index); |
1573 | zram_accessed(zram, index); | |
1574 | zram_slot_unlock(zram, index); | |
1575 | ||
ae85a807 | 1576 | if (unlikely(ret < 0)) { |
3f289dcb | 1577 | if (!op_is_write(op)) |
522698d7 SS |
1578 | atomic64_inc(&zram->stats.failed_reads); |
1579 | else | |
1580 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 1581 | } |
9b3bb7ab | 1582 | |
1b672224 | 1583 | return ret; |
8c921b2b JM |
1584 | } |
1585 | ||
be257c61 | 1586 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 1587 | { |
abf54548 | 1588 | int offset; |
8c921b2b | 1589 | u32 index; |
7988613b KO |
1590 | struct bio_vec bvec; |
1591 | struct bvec_iter iter; | |
d7614e44 | 1592 | unsigned long start_time; |
8c921b2b | 1593 | |
4f024f37 KO |
1594 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
1595 | offset = (bio->bi_iter.bi_sector & | |
1596 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 1597 | |
31edeacd CH |
1598 | switch (bio_op(bio)) { |
1599 | case REQ_OP_DISCARD: | |
1600 | case REQ_OP_WRITE_ZEROES: | |
f4659d8e | 1601 | zram_bio_discard(zram, index, offset, bio); |
4246a0b6 | 1602 | bio_endio(bio); |
f4659d8e | 1603 | return; |
31edeacd CH |
1604 | default: |
1605 | break; | |
f4659d8e JK |
1606 | } |
1607 | ||
d7614e44 | 1608 | start_time = bio_start_io_acct(bio); |
7988613b | 1609 | bio_for_each_segment(bvec, bio, iter) { |
e86942c7 MK |
1610 | struct bio_vec bv = bvec; |
1611 | unsigned int unwritten = bvec.bv_len; | |
924bd88d | 1612 | |
e86942c7 MK |
1613 | do { |
1614 | bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, | |
1615 | unwritten); | |
abf54548 | 1616 | if (zram_bvec_rw(zram, &bv, index, offset, |
d7614e44 CH |
1617 | bio_op(bio), bio) < 0) { |
1618 | bio->bi_status = BLK_STS_IOERR; | |
1619 | break; | |
1620 | } | |
924bd88d | 1621 | |
e86942c7 MK |
1622 | bv.bv_offset += bv.bv_len; |
1623 | unwritten -= bv.bv_len; | |
924bd88d | 1624 | |
e86942c7 MK |
1625 | update_position(&index, &offset, &bv); |
1626 | } while (unwritten); | |
a1dd52af | 1627 | } |
d7614e44 | 1628 | bio_end_io_acct(bio, start_time); |
4246a0b6 | 1629 | bio_endio(bio); |
306b0c95 NG |
1630 | } |
1631 | ||
306b0c95 | 1632 | /* |
f1e3cfff | 1633 | * Handler function for all zram I/O requests. |
306b0c95 | 1634 | */ |
3e08773c | 1635 | static void zram_submit_bio(struct bio *bio) |
306b0c95 | 1636 | { |
309dca30 | 1637 | struct zram *zram = bio->bi_bdev->bd_disk->private_data; |
306b0c95 | 1638 | |
54850e73 | 1639 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
1640 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 1641 | atomic64_inc(&zram->stats.invalid_io); |
3e08773c CH |
1642 | bio_io_error(bio); |
1643 | return; | |
6642a67c JM |
1644 | } |
1645 | ||
be257c61 | 1646 | __zram_make_request(zram, bio); |
306b0c95 NG |
1647 | } |
1648 | ||
2ccbec05 NG |
1649 | static void zram_slot_free_notify(struct block_device *bdev, |
1650 | unsigned long index) | |
107c161b | 1651 | { |
f1e3cfff | 1652 | struct zram *zram; |
107c161b | 1653 | |
f1e3cfff | 1654 | zram = bdev->bd_disk->private_data; |
a0c516cb | 1655 | |
3c9959e0 MK |
1656 | atomic64_inc(&zram->stats.notify_free); |
1657 | if (!zram_slot_trylock(zram, index)) { | |
1658 | atomic64_inc(&zram->stats.miss_free); | |
1659 | return; | |
1660 | } | |
1661 | ||
f614a9f4 | 1662 | zram_free_page(zram, index); |
86c49814 | 1663 | zram_slot_unlock(zram, index); |
107c161b NG |
1664 | } |
1665 | ||
8c7f0102 | 1666 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
86947df3 | 1667 | struct page *page, enum req_op op) |
8c7f0102 | 1668 | { |
ae85a807 | 1669 | int offset, ret; |
8c7f0102 | 1670 | u32 index; |
1671 | struct zram *zram; | |
1672 | struct bio_vec bv; | |
d7614e44 | 1673 | unsigned long start_time; |
8c7f0102 | 1674 | |
98cc093c HY |
1675 | if (PageTransHuge(page)) |
1676 | return -ENOTSUPP; | |
8c7f0102 | 1677 | zram = bdev->bd_disk->private_data; |
08eee69f | 1678 | |
8c7f0102 | 1679 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
1680 | atomic64_inc(&zram->stats.invalid_io); | |
ae85a807 | 1681 | ret = -EINVAL; |
a09759ac | 1682 | goto out; |
8c7f0102 | 1683 | } |
1684 | ||
1685 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
4ca82dab | 1686 | offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; |
8c7f0102 | 1687 | |
1688 | bv.bv_page = page; | |
1689 | bv.bv_len = PAGE_SIZE; | |
1690 | bv.bv_offset = 0; | |
1691 | ||
5f0614a5 ML |
1692 | start_time = bdev_start_io_acct(bdev->bd_disk->part0, |
1693 | SECTORS_PER_PAGE, op, jiffies); | |
3f289dcb | 1694 | ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); |
5f0614a5 | 1695 | bdev_end_io_acct(bdev->bd_disk->part0, op, start_time); |
08eee69f | 1696 | out: |
8c7f0102 | 1697 | /* |
1698 | * If I/O fails, just return error(ie, non-zero) without | |
1699 | * calling page_endio. | |
1700 | * It causes resubmit the I/O with bio request by upper functions | |
1701 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
1702 | * bio->bi_end_io does things to handle the error | |
1703 | * (e.g., SetPageError, set_page_dirty and extra works). | |
1704 | */ | |
ae85a807 MK |
1705 | if (unlikely(ret < 0)) |
1706 | return ret; | |
1707 | ||
1708 | switch (ret) { | |
1709 | case 0: | |
3f289dcb | 1710 | page_endio(page, op_is_write(op), 0); |
ae85a807 MK |
1711 | break; |
1712 | case 1: | |
1713 | ret = 0; | |
1714 | break; | |
1715 | default: | |
1716 | WARN_ON(1); | |
1717 | } | |
1718 | return ret; | |
8c7f0102 | 1719 | } |
1720 | ||
522698d7 SS |
1721 | static void zram_reset_device(struct zram *zram) |
1722 | { | |
522698d7 | 1723 | down_write(&zram->init_lock); |
9b3bb7ab | 1724 | |
522698d7 SS |
1725 | zram->limit_pages = 0; |
1726 | ||
1727 | if (!init_done(zram)) { | |
1728 | up_write(&zram->init_lock); | |
1729 | return; | |
1730 | } | |
1731 | ||
6e017a39 | 1732 | set_capacity_and_notify(zram->disk, 0); |
8446fe92 | 1733 | part_stat_set_all(zram->disk->part0, 0); |
522698d7 | 1734 | |
522698d7 | 1735 | /* I/O operation under all of CPU are done so let's free */ |
6d2453c3 SS |
1736 | zram_meta_free(zram, zram->disksize); |
1737 | zram->disksize = 0; | |
302128dc | 1738 | memset(&zram->stats, 0, sizeof(zram->stats)); |
6d2453c3 SS |
1739 | zcomp_destroy(zram->comp); |
1740 | zram->comp = NULL; | |
013bf95a | 1741 | reset_bdev(zram); |
6f163779 ML |
1742 | |
1743 | up_write(&zram->init_lock); | |
522698d7 SS |
1744 | } |
1745 | ||
1746 | static ssize_t disksize_store(struct device *dev, | |
1747 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 1748 | { |
522698d7 SS |
1749 | u64 disksize; |
1750 | struct zcomp *comp; | |
2f6a3bed | 1751 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 1752 | int err; |
2f6a3bed | 1753 | |
522698d7 SS |
1754 | disksize = memparse(buf, NULL); |
1755 | if (!disksize) | |
1756 | return -EINVAL; | |
2f6a3bed | 1757 | |
beb6602c MK |
1758 | down_write(&zram->init_lock); |
1759 | if (init_done(zram)) { | |
1760 | pr_info("Cannot change disksize for initialized device\n"); | |
1761 | err = -EBUSY; | |
1762 | goto out_unlock; | |
1763 | } | |
1764 | ||
522698d7 | 1765 | disksize = PAGE_ALIGN(disksize); |
beb6602c MK |
1766 | if (!zram_meta_alloc(zram, disksize)) { |
1767 | err = -ENOMEM; | |
1768 | goto out_unlock; | |
1769 | } | |
522698d7 | 1770 | |
da9556a2 | 1771 | comp = zcomp_create(zram->compressor); |
522698d7 | 1772 | if (IS_ERR(comp)) { |
70864969 | 1773 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1774 | zram->compressor); |
1775 | err = PTR_ERR(comp); | |
1776 | goto out_free_meta; | |
1777 | } | |
1778 | ||
522698d7 SS |
1779 | zram->comp = comp; |
1780 | zram->disksize = disksize; | |
6e017a39 | 1781 | set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); |
e7ccfc4c | 1782 | up_write(&zram->init_lock); |
522698d7 SS |
1783 | |
1784 | return len; | |
1785 | ||
522698d7 | 1786 | out_free_meta: |
beb6602c MK |
1787 | zram_meta_free(zram, disksize); |
1788 | out_unlock: | |
1789 | up_write(&zram->init_lock); | |
522698d7 | 1790 | return err; |
2f6a3bed SS |
1791 | } |
1792 | ||
522698d7 SS |
1793 | static ssize_t reset_store(struct device *dev, |
1794 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1795 | { |
522698d7 SS |
1796 | int ret; |
1797 | unsigned short do_reset; | |
1798 | struct zram *zram; | |
d666e20e | 1799 | struct gendisk *disk; |
4f2109f6 | 1800 | |
f405c445 SS |
1801 | ret = kstrtou16(buf, 10, &do_reset); |
1802 | if (ret) | |
1803 | return ret; | |
1804 | ||
1805 | if (!do_reset) | |
1806 | return -EINVAL; | |
1807 | ||
522698d7 | 1808 | zram = dev_to_zram(dev); |
d666e20e | 1809 | disk = zram->disk; |
4f2109f6 | 1810 | |
d666e20e | 1811 | mutex_lock(&disk->open_mutex); |
f405c445 | 1812 | /* Do not reset an active device or claimed device */ |
dbdc1be3 | 1813 | if (disk_openers(disk) || zram->claim) { |
d666e20e | 1814 | mutex_unlock(&disk->open_mutex); |
f405c445 | 1815 | return -EBUSY; |
522698d7 SS |
1816 | } |
1817 | ||
f405c445 SS |
1818 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1819 | zram->claim = true; | |
d666e20e | 1820 | mutex_unlock(&disk->open_mutex); |
522698d7 | 1821 | |
f405c445 | 1822 | /* Make sure all the pending I/O are finished */ |
d666e20e | 1823 | sync_blockdev(disk->part0); |
522698d7 | 1824 | zram_reset_device(zram); |
522698d7 | 1825 | |
d666e20e | 1826 | mutex_lock(&disk->open_mutex); |
f405c445 | 1827 | zram->claim = false; |
d666e20e | 1828 | mutex_unlock(&disk->open_mutex); |
f405c445 | 1829 | |
522698d7 | 1830 | return len; |
f405c445 SS |
1831 | } |
1832 | ||
1833 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1834 | { | |
1835 | int ret = 0; | |
1836 | struct zram *zram; | |
1837 | ||
a8698707 | 1838 | WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); |
f405c445 SS |
1839 | |
1840 | zram = bdev->bd_disk->private_data; | |
1841 | /* zram was claimed to reset so open request fails */ | |
1842 | if (zram->claim) | |
1843 | ret = -EBUSY; | |
4f2109f6 SS |
1844 | |
1845 | return ret; | |
1846 | } | |
1847 | ||
522698d7 | 1848 | static const struct block_device_operations zram_devops = { |
f405c445 | 1849 | .open = zram_open, |
c62b37d9 | 1850 | .submit_bio = zram_submit_bio, |
522698d7 SS |
1851 | .swap_slot_free_notify = zram_slot_free_notify, |
1852 | .rw_page = zram_rw_page, | |
1853 | .owner = THIS_MODULE | |
1854 | }; | |
1855 | ||
d422f401 | 1856 | #ifdef CONFIG_ZRAM_WRITEBACK |
a8b456d0 CH |
1857 | static const struct block_device_operations zram_wb_devops = { |
1858 | .open = zram_open, | |
1859 | .submit_bio = zram_submit_bio, | |
1860 | .swap_slot_free_notify = zram_slot_free_notify, | |
1861 | .owner = THIS_MODULE | |
1862 | }; | |
d422f401 | 1863 | #endif |
a8b456d0 | 1864 | |
522698d7 SS |
1865 | static DEVICE_ATTR_WO(compact); |
1866 | static DEVICE_ATTR_RW(disksize); | |
1867 | static DEVICE_ATTR_RO(initstate); | |
1868 | static DEVICE_ATTR_WO(reset); | |
c87d1655 SS |
1869 | static DEVICE_ATTR_WO(mem_limit); |
1870 | static DEVICE_ATTR_WO(mem_used_max); | |
e82592c4 | 1871 | static DEVICE_ATTR_WO(idle); |
522698d7 SS |
1872 | static DEVICE_ATTR_RW(max_comp_streams); |
1873 | static DEVICE_ATTR_RW(comp_algorithm); | |
013bf95a MK |
1874 | #ifdef CONFIG_ZRAM_WRITEBACK |
1875 | static DEVICE_ATTR_RW(backing_dev); | |
a939888e | 1876 | static DEVICE_ATTR_WO(writeback); |
bb416d18 | 1877 | static DEVICE_ATTR_RW(writeback_limit); |
1d69a3f8 | 1878 | static DEVICE_ATTR_RW(writeback_limit_enable); |
013bf95a | 1879 | #endif |
a68eb3b6 | 1880 | |
9b3bb7ab SS |
1881 | static struct attribute *zram_disk_attrs[] = { |
1882 | &dev_attr_disksize.attr, | |
1883 | &dev_attr_initstate.attr, | |
1884 | &dev_attr_reset.attr, | |
99ebbd30 | 1885 | &dev_attr_compact.attr, |
9ada9da9 | 1886 | &dev_attr_mem_limit.attr, |
461a8eee | 1887 | &dev_attr_mem_used_max.attr, |
e82592c4 | 1888 | &dev_attr_idle.attr, |
beca3ec7 | 1889 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1890 | &dev_attr_comp_algorithm.attr, |
013bf95a MK |
1891 | #ifdef CONFIG_ZRAM_WRITEBACK |
1892 | &dev_attr_backing_dev.attr, | |
a939888e | 1893 | &dev_attr_writeback.attr, |
bb416d18 | 1894 | &dev_attr_writeback_limit.attr, |
1d69a3f8 | 1895 | &dev_attr_writeback_limit_enable.attr, |
013bf95a | 1896 | #endif |
2f6a3bed | 1897 | &dev_attr_io_stat.attr, |
4f2109f6 | 1898 | &dev_attr_mm_stat.attr, |
23eddf39 MK |
1899 | #ifdef CONFIG_ZRAM_WRITEBACK |
1900 | &dev_attr_bd_stat.attr, | |
1901 | #endif | |
623e47fc | 1902 | &dev_attr_debug_stat.attr, |
9b3bb7ab SS |
1903 | NULL, |
1904 | }; | |
1905 | ||
7f0d2672 | 1906 | ATTRIBUTE_GROUPS(zram_disk); |
98af4d4d | 1907 | |
92ff1528 SS |
1908 | /* |
1909 | * Allocate and initialize new zram device. the function returns | |
1910 | * '>= 0' device_id upon success, and negative value otherwise. | |
1911 | */ | |
1912 | static int zram_add(void) | |
306b0c95 | 1913 | { |
85508ec6 | 1914 | struct zram *zram; |
92ff1528 | 1915 | int ret, device_id; |
85508ec6 SS |
1916 | |
1917 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1918 | if (!zram) | |
1919 | return -ENOMEM; | |
1920 | ||
92ff1528 | 1921 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1922 | if (ret < 0) |
1923 | goto out_free_dev; | |
92ff1528 | 1924 | device_id = ret; |
de1a21a0 | 1925 | |
0900beae | 1926 | init_rwsem(&zram->init_lock); |
1d69a3f8 MK |
1927 | #ifdef CONFIG_ZRAM_WRITEBACK |
1928 | spin_lock_init(&zram->wb_limit_lock); | |
1929 | #endif | |
306b0c95 | 1930 | |
85508ec6 | 1931 | /* gendisk structure */ |
7681750b | 1932 | zram->disk = blk_alloc_disk(NUMA_NO_NODE); |
f1e3cfff | 1933 | if (!zram->disk) { |
70864969 | 1934 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1935 | device_id); |
201c7b72 | 1936 | ret = -ENOMEM; |
7681750b | 1937 | goto out_free_idr; |
306b0c95 NG |
1938 | } |
1939 | ||
f1e3cfff NG |
1940 | zram->disk->major = zram_major; |
1941 | zram->disk->first_minor = device_id; | |
7681750b | 1942 | zram->disk->minors = 1; |
1ebe2e5f | 1943 | zram->disk->flags |= GENHD_FL_NO_PART; |
f1e3cfff | 1944 | zram->disk->fops = &zram_devops; |
f1e3cfff NG |
1945 | zram->disk->private_data = zram; |
1946 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1947 | |
33863c21 | 1948 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1949 | set_capacity(zram->disk, 0); |
b67d1ec1 | 1950 | /* zram devices sort of resembles non-rotational disks */ |
8b904b5b BVA |
1951 | blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); |
1952 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); | |
e447a015 | 1953 | |
a1dd52af NG |
1954 | /* |
1955 | * To ensure that we always get PAGE_SIZE aligned | |
1956 | * and n*PAGE_SIZED sized I/O requests. | |
1957 | */ | |
f1e3cfff | 1958 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1959 | blk_queue_logical_block_size(zram->disk->queue, |
1960 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1961 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1962 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1963 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1964 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
31edeacd | 1965 | |
f4659d8e JK |
1966 | /* |
1967 | * zram_bio_discard() will clear all logical blocks if logical block | |
1968 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1969 | * different, we will skip discarding some parts of logical blocks in | |
1970 | * the part of the request range which isn't aligned to physical block | |
1971 | * size. So we can't ensure that all discarded logical blocks are | |
1972 | * zeroed. | |
1973 | */ | |
1974 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
31edeacd | 1975 | blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); |
5d83d5a0 | 1976 | |
37887783 | 1977 | blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); |
7f0d2672 | 1978 | ret = device_add_disk(NULL, zram->disk, zram_disk_groups); |
5e2e1cc4 LC |
1979 | if (ret) |
1980 | goto out_cleanup_disk; | |
98af4d4d | 1981 | |
e46b8a03 | 1982 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
d12b63c9 | 1983 | |
c0265342 | 1984 | zram_debugfs_register(zram); |
d12b63c9 | 1985 | pr_info("Added device: %s\n", zram->disk->disk_name); |
92ff1528 | 1986 | return device_id; |
de1a21a0 | 1987 | |
5e2e1cc4 | 1988 | out_cleanup_disk: |
8b9ab626 | 1989 | put_disk(zram->disk); |
85508ec6 SS |
1990 | out_free_idr: |
1991 | idr_remove(&zram_index_idr, device_id); | |
1992 | out_free_dev: | |
1993 | kfree(zram); | |
de1a21a0 | 1994 | return ret; |
306b0c95 NG |
1995 | } |
1996 | ||
6566d1a3 | 1997 | static int zram_remove(struct zram *zram) |
306b0c95 | 1998 | { |
8c54499a | 1999 | bool claimed; |
6566d1a3 | 2000 | |
7a86d6dc | 2001 | mutex_lock(&zram->disk->open_mutex); |
dbdc1be3 | 2002 | if (disk_openers(zram->disk)) { |
7a86d6dc | 2003 | mutex_unlock(&zram->disk->open_mutex); |
6566d1a3 SS |
2004 | return -EBUSY; |
2005 | } | |
2006 | ||
8c54499a ML |
2007 | claimed = zram->claim; |
2008 | if (!claimed) | |
2009 | zram->claim = true; | |
7a86d6dc | 2010 | mutex_unlock(&zram->disk->open_mutex); |
6566d1a3 | 2011 | |
c0265342 | 2012 | zram_debugfs_unregister(zram); |
306b0c95 | 2013 | |
8c54499a ML |
2014 | if (claimed) { |
2015 | /* | |
2016 | * If we were claimed by reset_store(), del_gendisk() will | |
2017 | * wait until reset_store() is done, so nothing need to do. | |
2018 | */ | |
2019 | ; | |
2020 | } else { | |
2021 | /* Make sure all the pending I/O are finished */ | |
7a86d6dc | 2022 | sync_blockdev(zram->disk->part0); |
8c54499a ML |
2023 | zram_reset_device(zram); |
2024 | } | |
6566d1a3 SS |
2025 | |
2026 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
2027 | ||
85508ec6 | 2028 | del_gendisk(zram->disk); |
8c54499a ML |
2029 | |
2030 | /* del_gendisk drains pending reset_store */ | |
2031 | WARN_ON_ONCE(claimed && zram->claim); | |
2032 | ||
5a4b6536 ML |
2033 | /* |
2034 | * disksize_store() may be called in between zram_reset_device() | |
2035 | * and del_gendisk(), so run the last reset to avoid leaking | |
2036 | * anything allocated with disksize_store() | |
2037 | */ | |
2038 | zram_reset_device(zram); | |
2039 | ||
8b9ab626 | 2040 | put_disk(zram->disk); |
85508ec6 | 2041 | kfree(zram); |
6566d1a3 SS |
2042 | return 0; |
2043 | } | |
2044 | ||
2045 | /* zram-control sysfs attributes */ | |
27104a53 GKH |
2046 | |
2047 | /* | |
2048 | * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a | |
2049 | * sense that reading from this file does alter the state of your system -- it | |
2050 | * creates a new un-initialized zram device and returns back this device's | |
2051 | * device_id (or an error code if it fails to create a new device). | |
2052 | */ | |
6566d1a3 SS |
2053 | static ssize_t hot_add_show(struct class *class, |
2054 | struct class_attribute *attr, | |
2055 | char *buf) | |
2056 | { | |
2057 | int ret; | |
2058 | ||
2059 | mutex_lock(&zram_index_mutex); | |
2060 | ret = zram_add(); | |
2061 | mutex_unlock(&zram_index_mutex); | |
2062 | ||
2063 | if (ret < 0) | |
2064 | return ret; | |
2065 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
2066 | } | |
853eab68 WM |
2067 | static struct class_attribute class_attr_hot_add = |
2068 | __ATTR(hot_add, 0400, hot_add_show, NULL); | |
6566d1a3 SS |
2069 | |
2070 | static ssize_t hot_remove_store(struct class *class, | |
2071 | struct class_attribute *attr, | |
2072 | const char *buf, | |
2073 | size_t count) | |
2074 | { | |
2075 | struct zram *zram; | |
2076 | int ret, dev_id; | |
2077 | ||
2078 | /* dev_id is gendisk->first_minor, which is `int' */ | |
2079 | ret = kstrtoint(buf, 10, &dev_id); | |
2080 | if (ret) | |
2081 | return ret; | |
2082 | if (dev_id < 0) | |
2083 | return -EINVAL; | |
2084 | ||
2085 | mutex_lock(&zram_index_mutex); | |
2086 | ||
2087 | zram = idr_find(&zram_index_idr, dev_id); | |
17ec4cd9 | 2088 | if (zram) { |
6566d1a3 | 2089 | ret = zram_remove(zram); |
529e71e1 TI |
2090 | if (!ret) |
2091 | idr_remove(&zram_index_idr, dev_id); | |
17ec4cd9 | 2092 | } else { |
6566d1a3 | 2093 | ret = -ENODEV; |
17ec4cd9 | 2094 | } |
6566d1a3 SS |
2095 | |
2096 | mutex_unlock(&zram_index_mutex); | |
2097 | return ret ? ret : count; | |
85508ec6 | 2098 | } |
27104a53 | 2099 | static CLASS_ATTR_WO(hot_remove); |
a096cafc | 2100 | |
27104a53 GKH |
2101 | static struct attribute *zram_control_class_attrs[] = { |
2102 | &class_attr_hot_add.attr, | |
2103 | &class_attr_hot_remove.attr, | |
2104 | NULL, | |
6566d1a3 | 2105 | }; |
27104a53 | 2106 | ATTRIBUTE_GROUPS(zram_control_class); |
6566d1a3 SS |
2107 | |
2108 | static struct class zram_control_class = { | |
2109 | .name = "zram-control", | |
2110 | .owner = THIS_MODULE, | |
27104a53 | 2111 | .class_groups = zram_control_class_groups, |
6566d1a3 SS |
2112 | }; |
2113 | ||
85508ec6 SS |
2114 | static int zram_remove_cb(int id, void *ptr, void *data) |
2115 | { | |
8c54499a | 2116 | WARN_ON_ONCE(zram_remove(ptr)); |
85508ec6 SS |
2117 | return 0; |
2118 | } | |
a096cafc | 2119 | |
85508ec6 SS |
2120 | static void destroy_devices(void) |
2121 | { | |
6566d1a3 | 2122 | class_unregister(&zram_control_class); |
85508ec6 | 2123 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
c0265342 | 2124 | zram_debugfs_destroy(); |
85508ec6 | 2125 | idr_destroy(&zram_index_idr); |
a096cafc | 2126 | unregister_blkdev(zram_major, "zram"); |
1dd6c834 | 2127 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
306b0c95 NG |
2128 | } |
2129 | ||
f1e3cfff | 2130 | static int __init zram_init(void) |
306b0c95 | 2131 | { |
92ff1528 | 2132 | int ret; |
306b0c95 | 2133 | |
1dd6c834 AMG |
2134 | ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", |
2135 | zcomp_cpu_up_prepare, zcomp_cpu_dead); | |
2136 | if (ret < 0) | |
2137 | return ret; | |
2138 | ||
6566d1a3 SS |
2139 | ret = class_register(&zram_control_class); |
2140 | if (ret) { | |
70864969 | 2141 | pr_err("Unable to register zram-control class\n"); |
1dd6c834 | 2142 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
6566d1a3 SS |
2143 | return ret; |
2144 | } | |
2145 | ||
c0265342 | 2146 | zram_debugfs_create(); |
f1e3cfff NG |
2147 | zram_major = register_blkdev(0, "zram"); |
2148 | if (zram_major <= 0) { | |
70864969 | 2149 | pr_err("Unable to get major number\n"); |
6566d1a3 | 2150 | class_unregister(&zram_control_class); |
1dd6c834 | 2151 | cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); |
a096cafc | 2152 | return -EBUSY; |
306b0c95 NG |
2153 | } |
2154 | ||
92ff1528 | 2155 | while (num_devices != 0) { |
6566d1a3 | 2156 | mutex_lock(&zram_index_mutex); |
92ff1528 | 2157 | ret = zram_add(); |
6566d1a3 | 2158 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 2159 | if (ret < 0) |
a096cafc | 2160 | goto out_error; |
92ff1528 | 2161 | num_devices--; |
de1a21a0 NG |
2162 | } |
2163 | ||
306b0c95 | 2164 | return 0; |
de1a21a0 | 2165 | |
a096cafc | 2166 | out_error: |
85508ec6 | 2167 | destroy_devices(); |
306b0c95 NG |
2168 | return ret; |
2169 | } | |
2170 | ||
f1e3cfff | 2171 | static void __exit zram_exit(void) |
306b0c95 | 2172 | { |
85508ec6 | 2173 | destroy_devices(); |
306b0c95 NG |
2174 | } |
2175 | ||
f1e3cfff NG |
2176 | module_init(zram_init); |
2177 | module_exit(zram_exit); | |
306b0c95 | 2178 | |
9b3bb7ab | 2179 | module_param(num_devices, uint, 0); |
c3cdb40e | 2180 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 2181 | |
306b0c95 NG |
2182 | MODULE_LICENSE("Dual BSD/GPL"); |
2183 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
f1e3cfff | 2184 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |