Commit | Line | Data |
---|---|---|
306b0c95 | 1 | /* |
f1e3cfff | 2 | * Compressed RAM block device |
306b0c95 | 3 | * |
1130ebba | 4 | * Copyright (C) 2008, 2009, 2010 Nitin Gupta |
7bfb3de8 | 5 | * 2012, 2013 Minchan Kim |
306b0c95 NG |
6 | * |
7 | * This code is released using a dual license strategy: BSD/GPL | |
8 | * You can choose the licence that better fits your requirements. | |
9 | * | |
10 | * Released under the terms of 3-clause BSD License | |
11 | * Released under the terms of GNU General Public License Version 2.0 | |
12 | * | |
306b0c95 NG |
13 | */ |
14 | ||
f1e3cfff | 15 | #define KMSG_COMPONENT "zram" |
306b0c95 NG |
16 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
17 | ||
18 | #include <linux/module.h> | |
19 | #include <linux/kernel.h> | |
8946a086 | 20 | #include <linux/bio.h> |
306b0c95 NG |
21 | #include <linux/bitops.h> |
22 | #include <linux/blkdev.h> | |
23 | #include <linux/buffer_head.h> | |
24 | #include <linux/device.h> | |
25 | #include <linux/genhd.h> | |
26 | #include <linux/highmem.h> | |
5a0e3ad6 | 27 | #include <linux/slab.h> |
306b0c95 | 28 | #include <linux/string.h> |
306b0c95 | 29 | #include <linux/vmalloc.h> |
fcfa8d95 | 30 | #include <linux/err.h> |
85508ec6 | 31 | #include <linux/idr.h> |
6566d1a3 | 32 | #include <linux/sysfs.h> |
306b0c95 | 33 | |
16a4bfb9 | 34 | #include "zram_drv.h" |
306b0c95 | 35 | |
85508ec6 | 36 | static DEFINE_IDR(zram_index_idr); |
6566d1a3 SS |
37 | /* idr index must be protected */ |
38 | static DEFINE_MUTEX(zram_index_mutex); | |
39 | ||
f1e3cfff | 40 | static int zram_major; |
b7ca232e | 41 | static const char *default_compressor = "lzo"; |
306b0c95 | 42 | |
306b0c95 | 43 | /* Module params (documentation at end) */ |
ca3d70bd | 44 | static unsigned int num_devices = 1; |
33863c21 | 45 | |
8f7d282c SS |
46 | static inline void deprecated_attr_warn(const char *name) |
47 | { | |
48 | pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", | |
49 | task_pid_nr(current), | |
50 | current->comm, | |
51 | name, | |
52 | "See zram documentation."); | |
53 | } | |
54 | ||
a68eb3b6 | 55 | #define ZRAM_ATTR_RO(name) \ |
3bca3ef7 | 56 | static ssize_t name##_show(struct device *d, \ |
a68eb3b6 SS |
57 | struct device_attribute *attr, char *b) \ |
58 | { \ | |
59 | struct zram *zram = dev_to_zram(d); \ | |
8f7d282c SS |
60 | \ |
61 | deprecated_attr_warn(__stringify(name)); \ | |
56b4e8cb | 62 | return scnprintf(b, PAGE_SIZE, "%llu\n", \ |
a68eb3b6 SS |
63 | (u64)atomic64_read(&zram->stats.name)); \ |
64 | } \ | |
083914ea | 65 | static DEVICE_ATTR_RO(name); |
a68eb3b6 | 66 | |
08eee69f | 67 | static inline bool init_done(struct zram *zram) |
be2d1d56 | 68 | { |
08eee69f | 69 | return zram->disksize; |
be2d1d56 SS |
70 | } |
71 | ||
9b3bb7ab SS |
72 | static inline struct zram *dev_to_zram(struct device *dev) |
73 | { | |
74 | return (struct zram *)dev_to_disk(dev)->private_data; | |
75 | } | |
76 | ||
b31177f2 | 77 | /* flag operations require table entry bit_spin_lock() being held */ |
522698d7 SS |
78 | static int zram_test_flag(struct zram_meta *meta, u32 index, |
79 | enum zram_pageflags flag) | |
99ebbd30 | 80 | { |
522698d7 SS |
81 | return meta->table[index].value & BIT(flag); |
82 | } | |
99ebbd30 | 83 | |
522698d7 SS |
84 | static void zram_set_flag(struct zram_meta *meta, u32 index, |
85 | enum zram_pageflags flag) | |
86 | { | |
87 | meta->table[index].value |= BIT(flag); | |
88 | } | |
99ebbd30 | 89 | |
522698d7 SS |
90 | static void zram_clear_flag(struct zram_meta *meta, u32 index, |
91 | enum zram_pageflags flag) | |
92 | { | |
93 | meta->table[index].value &= ~BIT(flag); | |
94 | } | |
99ebbd30 | 95 | |
522698d7 SS |
96 | static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) |
97 | { | |
98 | return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); | |
99ebbd30 AM |
99 | } |
100 | ||
522698d7 SS |
101 | static void zram_set_obj_size(struct zram_meta *meta, |
102 | u32 index, size_t size) | |
9b3bb7ab | 103 | { |
522698d7 | 104 | unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; |
9b3bb7ab | 105 | |
522698d7 SS |
106 | meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; |
107 | } | |
108 | ||
1c53e0d2 | 109 | static inline bool is_partial_io(struct bio_vec *bvec) |
522698d7 SS |
110 | { |
111 | return bvec->bv_len != PAGE_SIZE; | |
112 | } | |
113 | ||
114 | /* | |
115 | * Check if request is within bounds and aligned on zram logical blocks. | |
116 | */ | |
1c53e0d2 | 117 | static inline bool valid_io_request(struct zram *zram, |
522698d7 SS |
118 | sector_t start, unsigned int size) |
119 | { | |
120 | u64 end, bound; | |
121 | ||
122 | /* unaligned request */ | |
123 | if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) | |
1c53e0d2 | 124 | return false; |
522698d7 | 125 | if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) |
1c53e0d2 | 126 | return false; |
522698d7 SS |
127 | |
128 | end = start + (size >> SECTOR_SHIFT); | |
129 | bound = zram->disksize >> SECTOR_SHIFT; | |
130 | /* out of range range */ | |
131 | if (unlikely(start >= bound || end > bound || start > end)) | |
1c53e0d2 | 132 | return false; |
522698d7 SS |
133 | |
134 | /* I/O request is valid */ | |
1c53e0d2 | 135 | return true; |
522698d7 SS |
136 | } |
137 | ||
138 | static void update_position(u32 *index, int *offset, struct bio_vec *bvec) | |
139 | { | |
140 | if (*offset + bvec->bv_len >= PAGE_SIZE) | |
141 | (*index)++; | |
142 | *offset = (*offset + bvec->bv_len) % PAGE_SIZE; | |
143 | } | |
144 | ||
145 | static inline void update_used_max(struct zram *zram, | |
146 | const unsigned long pages) | |
147 | { | |
148 | unsigned long old_max, cur_max; | |
149 | ||
150 | old_max = atomic_long_read(&zram->stats.max_used_pages); | |
151 | ||
152 | do { | |
153 | cur_max = old_max; | |
154 | if (pages > cur_max) | |
155 | old_max = atomic_long_cmpxchg( | |
156 | &zram->stats.max_used_pages, cur_max, pages); | |
157 | } while (old_max != cur_max); | |
158 | } | |
159 | ||
1c53e0d2 | 160 | static bool page_zero_filled(void *ptr) |
522698d7 SS |
161 | { |
162 | unsigned int pos; | |
163 | unsigned long *page; | |
164 | ||
165 | page = (unsigned long *)ptr; | |
166 | ||
167 | for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { | |
168 | if (page[pos]) | |
1c53e0d2 | 169 | return false; |
522698d7 SS |
170 | } |
171 | ||
1c53e0d2 | 172 | return true; |
522698d7 SS |
173 | } |
174 | ||
175 | static void handle_zero_page(struct bio_vec *bvec) | |
176 | { | |
177 | struct page *page = bvec->bv_page; | |
178 | void *user_mem; | |
179 | ||
180 | user_mem = kmap_atomic(page); | |
181 | if (is_partial_io(bvec)) | |
182 | memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); | |
183 | else | |
184 | clear_page(user_mem); | |
185 | kunmap_atomic(user_mem); | |
186 | ||
187 | flush_dcache_page(page); | |
9b3bb7ab SS |
188 | } |
189 | ||
190 | static ssize_t initstate_show(struct device *dev, | |
191 | struct device_attribute *attr, char *buf) | |
192 | { | |
a68eb3b6 | 193 | u32 val; |
9b3bb7ab SS |
194 | struct zram *zram = dev_to_zram(dev); |
195 | ||
a68eb3b6 SS |
196 | down_read(&zram->init_lock); |
197 | val = init_done(zram); | |
198 | up_read(&zram->init_lock); | |
9b3bb7ab | 199 | |
56b4e8cb | 200 | return scnprintf(buf, PAGE_SIZE, "%u\n", val); |
9b3bb7ab SS |
201 | } |
202 | ||
522698d7 SS |
203 | static ssize_t disksize_show(struct device *dev, |
204 | struct device_attribute *attr, char *buf) | |
205 | { | |
206 | struct zram *zram = dev_to_zram(dev); | |
207 | ||
208 | return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); | |
209 | } | |
210 | ||
9b3bb7ab SS |
211 | static ssize_t orig_data_size_show(struct device *dev, |
212 | struct device_attribute *attr, char *buf) | |
213 | { | |
214 | struct zram *zram = dev_to_zram(dev); | |
215 | ||
8f7d282c | 216 | deprecated_attr_warn("orig_data_size"); |
56b4e8cb | 217 | return scnprintf(buf, PAGE_SIZE, "%llu\n", |
90a7806e | 218 | (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); |
9b3bb7ab SS |
219 | } |
220 | ||
9b3bb7ab SS |
221 | static ssize_t mem_used_total_show(struct device *dev, |
222 | struct device_attribute *attr, char *buf) | |
223 | { | |
224 | u64 val = 0; | |
225 | struct zram *zram = dev_to_zram(dev); | |
9b3bb7ab | 226 | |
8f7d282c | 227 | deprecated_attr_warn("mem_used_total"); |
9b3bb7ab | 228 | down_read(&zram->init_lock); |
5a99e95b WY |
229 | if (init_done(zram)) { |
230 | struct zram_meta *meta = zram->meta; | |
722cdc17 | 231 | val = zs_get_total_pages(meta->mem_pool); |
5a99e95b | 232 | } |
9b3bb7ab SS |
233 | up_read(&zram->init_lock); |
234 | ||
722cdc17 | 235 | return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); |
9b3bb7ab SS |
236 | } |
237 | ||
9ada9da9 MK |
238 | static ssize_t mem_limit_show(struct device *dev, |
239 | struct device_attribute *attr, char *buf) | |
240 | { | |
241 | u64 val; | |
242 | struct zram *zram = dev_to_zram(dev); | |
243 | ||
8f7d282c | 244 | deprecated_attr_warn("mem_limit"); |
9ada9da9 MK |
245 | down_read(&zram->init_lock); |
246 | val = zram->limit_pages; | |
247 | up_read(&zram->init_lock); | |
248 | ||
249 | return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); | |
250 | } | |
251 | ||
252 | static ssize_t mem_limit_store(struct device *dev, | |
253 | struct device_attribute *attr, const char *buf, size_t len) | |
254 | { | |
255 | u64 limit; | |
256 | char *tmp; | |
257 | struct zram *zram = dev_to_zram(dev); | |
258 | ||
259 | limit = memparse(buf, &tmp); | |
260 | if (buf == tmp) /* no chars parsed, invalid input */ | |
261 | return -EINVAL; | |
262 | ||
263 | down_write(&zram->init_lock); | |
264 | zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; | |
265 | up_write(&zram->init_lock); | |
266 | ||
267 | return len; | |
268 | } | |
269 | ||
461a8eee MK |
270 | static ssize_t mem_used_max_show(struct device *dev, |
271 | struct device_attribute *attr, char *buf) | |
272 | { | |
273 | u64 val = 0; | |
274 | struct zram *zram = dev_to_zram(dev); | |
275 | ||
8f7d282c | 276 | deprecated_attr_warn("mem_used_max"); |
461a8eee MK |
277 | down_read(&zram->init_lock); |
278 | if (init_done(zram)) | |
279 | val = atomic_long_read(&zram->stats.max_used_pages); | |
280 | up_read(&zram->init_lock); | |
281 | ||
282 | return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); | |
283 | } | |
284 | ||
285 | static ssize_t mem_used_max_store(struct device *dev, | |
286 | struct device_attribute *attr, const char *buf, size_t len) | |
287 | { | |
288 | int err; | |
289 | unsigned long val; | |
290 | struct zram *zram = dev_to_zram(dev); | |
461a8eee MK |
291 | |
292 | err = kstrtoul(buf, 10, &val); | |
293 | if (err || val != 0) | |
294 | return -EINVAL; | |
295 | ||
296 | down_read(&zram->init_lock); | |
5a99e95b WY |
297 | if (init_done(zram)) { |
298 | struct zram_meta *meta = zram->meta; | |
461a8eee MK |
299 | atomic_long_set(&zram->stats.max_used_pages, |
300 | zs_get_total_pages(meta->mem_pool)); | |
5a99e95b | 301 | } |
461a8eee MK |
302 | up_read(&zram->init_lock); |
303 | ||
304 | return len; | |
305 | } | |
306 | ||
522698d7 SS |
307 | static ssize_t max_comp_streams_show(struct device *dev, |
308 | struct device_attribute *attr, char *buf) | |
309 | { | |
310 | int val; | |
311 | struct zram *zram = dev_to_zram(dev); | |
312 | ||
313 | down_read(&zram->init_lock); | |
314 | val = zram->max_comp_streams; | |
315 | up_read(&zram->init_lock); | |
316 | ||
317 | return scnprintf(buf, PAGE_SIZE, "%d\n", val); | |
318 | } | |
319 | ||
beca3ec7 SS |
320 | static ssize_t max_comp_streams_store(struct device *dev, |
321 | struct device_attribute *attr, const char *buf, size_t len) | |
322 | { | |
323 | int num; | |
324 | struct zram *zram = dev_to_zram(dev); | |
60a726e3 | 325 | int ret; |
beca3ec7 | 326 | |
60a726e3 MK |
327 | ret = kstrtoint(buf, 0, &num); |
328 | if (ret < 0) | |
329 | return ret; | |
beca3ec7 SS |
330 | if (num < 1) |
331 | return -EINVAL; | |
60a726e3 | 332 | |
beca3ec7 SS |
333 | down_write(&zram->init_lock); |
334 | if (init_done(zram)) { | |
60a726e3 | 335 | if (!zcomp_set_max_streams(zram->comp, num)) { |
fe8eb122 | 336 | pr_info("Cannot change max compression streams\n"); |
60a726e3 MK |
337 | ret = -EINVAL; |
338 | goto out; | |
339 | } | |
beca3ec7 | 340 | } |
60a726e3 | 341 | |
beca3ec7 | 342 | zram->max_comp_streams = num; |
60a726e3 MK |
343 | ret = len; |
344 | out: | |
beca3ec7 | 345 | up_write(&zram->init_lock); |
60a726e3 | 346 | return ret; |
beca3ec7 SS |
347 | } |
348 | ||
e46b8a03 SS |
349 | static ssize_t comp_algorithm_show(struct device *dev, |
350 | struct device_attribute *attr, char *buf) | |
351 | { | |
352 | size_t sz; | |
353 | struct zram *zram = dev_to_zram(dev); | |
354 | ||
355 | down_read(&zram->init_lock); | |
356 | sz = zcomp_available_show(zram->compressor, buf); | |
357 | up_read(&zram->init_lock); | |
358 | ||
359 | return sz; | |
360 | } | |
361 | ||
362 | static ssize_t comp_algorithm_store(struct device *dev, | |
363 | struct device_attribute *attr, const char *buf, size_t len) | |
364 | { | |
365 | struct zram *zram = dev_to_zram(dev); | |
4bbacd51 SS |
366 | size_t sz; |
367 | ||
1d5b43bf LH |
368 | if (!zcomp_available_algorithm(buf)) |
369 | return -EINVAL; | |
370 | ||
e46b8a03 SS |
371 | down_write(&zram->init_lock); |
372 | if (init_done(zram)) { | |
373 | up_write(&zram->init_lock); | |
374 | pr_info("Can't change algorithm for initialized device\n"); | |
375 | return -EBUSY; | |
376 | } | |
377 | strlcpy(zram->compressor, buf, sizeof(zram->compressor)); | |
4bbacd51 SS |
378 | |
379 | /* ignore trailing newline */ | |
380 | sz = strlen(zram->compressor); | |
381 | if (sz > 0 && zram->compressor[sz - 1] == '\n') | |
382 | zram->compressor[sz - 1] = 0x00; | |
383 | ||
e46b8a03 SS |
384 | up_write(&zram->init_lock); |
385 | return len; | |
386 | } | |
387 | ||
522698d7 SS |
388 | static ssize_t compact_store(struct device *dev, |
389 | struct device_attribute *attr, const char *buf, size_t len) | |
306b0c95 | 390 | { |
522698d7 SS |
391 | struct zram *zram = dev_to_zram(dev); |
392 | struct zram_meta *meta; | |
306b0c95 | 393 | |
522698d7 SS |
394 | down_read(&zram->init_lock); |
395 | if (!init_done(zram)) { | |
396 | up_read(&zram->init_lock); | |
397 | return -EINVAL; | |
398 | } | |
306b0c95 | 399 | |
522698d7 | 400 | meta = zram->meta; |
7d3f3938 | 401 | zs_compact(meta->mem_pool); |
522698d7 | 402 | up_read(&zram->init_lock); |
d2d5e762 | 403 | |
522698d7 | 404 | return len; |
d2d5e762 WY |
405 | } |
406 | ||
522698d7 SS |
407 | static ssize_t io_stat_show(struct device *dev, |
408 | struct device_attribute *attr, char *buf) | |
d2d5e762 | 409 | { |
522698d7 SS |
410 | struct zram *zram = dev_to_zram(dev); |
411 | ssize_t ret; | |
d2d5e762 | 412 | |
522698d7 SS |
413 | down_read(&zram->init_lock); |
414 | ret = scnprintf(buf, PAGE_SIZE, | |
415 | "%8llu %8llu %8llu %8llu\n", | |
416 | (u64)atomic64_read(&zram->stats.failed_reads), | |
417 | (u64)atomic64_read(&zram->stats.failed_writes), | |
418 | (u64)atomic64_read(&zram->stats.invalid_io), | |
419 | (u64)atomic64_read(&zram->stats.notify_free)); | |
420 | up_read(&zram->init_lock); | |
306b0c95 | 421 | |
522698d7 | 422 | return ret; |
9b3bb7ab SS |
423 | } |
424 | ||
522698d7 SS |
425 | static ssize_t mm_stat_show(struct device *dev, |
426 | struct device_attribute *attr, char *buf) | |
9b3bb7ab | 427 | { |
522698d7 | 428 | struct zram *zram = dev_to_zram(dev); |
7d3f3938 | 429 | struct zs_pool_stats pool_stats; |
522698d7 SS |
430 | u64 orig_size, mem_used = 0; |
431 | long max_used; | |
432 | ssize_t ret; | |
a539c72a | 433 | |
7d3f3938 SS |
434 | memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); |
435 | ||
522698d7 | 436 | down_read(&zram->init_lock); |
7d3f3938 | 437 | if (init_done(zram)) { |
522698d7 | 438 | mem_used = zs_get_total_pages(zram->meta->mem_pool); |
7d3f3938 SS |
439 | zs_pool_stats(zram->meta->mem_pool, &pool_stats); |
440 | } | |
9b3bb7ab | 441 | |
522698d7 SS |
442 | orig_size = atomic64_read(&zram->stats.pages_stored); |
443 | max_used = atomic_long_read(&zram->stats.max_used_pages); | |
9b3bb7ab | 444 | |
522698d7 | 445 | ret = scnprintf(buf, PAGE_SIZE, |
7d3f3938 | 446 | "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", |
522698d7 SS |
447 | orig_size << PAGE_SHIFT, |
448 | (u64)atomic64_read(&zram->stats.compr_data_size), | |
449 | mem_used << PAGE_SHIFT, | |
450 | zram->limit_pages << PAGE_SHIFT, | |
451 | max_used << PAGE_SHIFT, | |
452 | (u64)atomic64_read(&zram->stats.zero_pages), | |
860c707d | 453 | pool_stats.pages_compacted); |
522698d7 | 454 | up_read(&zram->init_lock); |
9b3bb7ab | 455 | |
522698d7 SS |
456 | return ret; |
457 | } | |
458 | ||
459 | static DEVICE_ATTR_RO(io_stat); | |
460 | static DEVICE_ATTR_RO(mm_stat); | |
461 | ZRAM_ATTR_RO(num_reads); | |
462 | ZRAM_ATTR_RO(num_writes); | |
463 | ZRAM_ATTR_RO(failed_reads); | |
464 | ZRAM_ATTR_RO(failed_writes); | |
465 | ZRAM_ATTR_RO(invalid_io); | |
466 | ZRAM_ATTR_RO(notify_free); | |
467 | ZRAM_ATTR_RO(zero_pages); | |
468 | ZRAM_ATTR_RO(compr_data_size); | |
469 | ||
470 | static inline bool zram_meta_get(struct zram *zram) | |
471 | { | |
472 | if (atomic_inc_not_zero(&zram->refcount)) | |
473 | return true; | |
474 | return false; | |
475 | } | |
476 | ||
477 | static inline void zram_meta_put(struct zram *zram) | |
478 | { | |
479 | atomic_dec(&zram->refcount); | |
480 | } | |
481 | ||
482 | static void zram_meta_free(struct zram_meta *meta, u64 disksize) | |
483 | { | |
484 | size_t num_pages = disksize >> PAGE_SHIFT; | |
485 | size_t index; | |
1fec1172 GM |
486 | |
487 | /* Free all pages that are still in this zram device */ | |
488 | for (index = 0; index < num_pages; index++) { | |
489 | unsigned long handle = meta->table[index].handle; | |
490 | ||
491 | if (!handle) | |
492 | continue; | |
493 | ||
494 | zs_free(meta->mem_pool, handle); | |
495 | } | |
496 | ||
9b3bb7ab | 497 | zs_destroy_pool(meta->mem_pool); |
9b3bb7ab SS |
498 | vfree(meta->table); |
499 | kfree(meta); | |
500 | } | |
501 | ||
4ce321f5 | 502 | static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) |
9b3bb7ab SS |
503 | { |
504 | size_t num_pages; | |
505 | struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); | |
b8179958 | 506 | |
9b3bb7ab | 507 | if (!meta) |
b8179958 | 508 | return NULL; |
9b3bb7ab | 509 | |
9b3bb7ab SS |
510 | num_pages = disksize >> PAGE_SHIFT; |
511 | meta->table = vzalloc(num_pages * sizeof(*meta->table)); | |
512 | if (!meta->table) { | |
513 | pr_err("Error allocating zram address table\n"); | |
b8179958 | 514 | goto out_error; |
9b3bb7ab SS |
515 | } |
516 | ||
3eba0c6a | 517 | meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM); |
9b3bb7ab SS |
518 | if (!meta->mem_pool) { |
519 | pr_err("Error creating memory pool\n"); | |
b8179958 | 520 | goto out_error; |
9b3bb7ab SS |
521 | } |
522 | ||
523 | return meta; | |
524 | ||
b8179958 | 525 | out_error: |
9b3bb7ab | 526 | vfree(meta->table); |
9b3bb7ab | 527 | kfree(meta); |
b8179958 | 528 | return NULL; |
9b3bb7ab SS |
529 | } |
530 | ||
d2d5e762 WY |
531 | /* |
532 | * To protect concurrent access to the same index entry, | |
533 | * caller should hold this table index entry's bit_spinlock to | |
534 | * indicate this index entry is accessing. | |
535 | */ | |
f1e3cfff | 536 | static void zram_free_page(struct zram *zram, size_t index) |
306b0c95 | 537 | { |
8b3cc3ed MK |
538 | struct zram_meta *meta = zram->meta; |
539 | unsigned long handle = meta->table[index].handle; | |
306b0c95 | 540 | |
fd1a30de | 541 | if (unlikely(!handle)) { |
2e882281 NG |
542 | /* |
543 | * No memory is allocated for zero filled pages. | |
544 | * Simply clear zero page flag. | |
545 | */ | |
8b3cc3ed MK |
546 | if (zram_test_flag(meta, index, ZRAM_ZERO)) { |
547 | zram_clear_flag(meta, index, ZRAM_ZERO); | |
90a7806e | 548 | atomic64_dec(&zram->stats.zero_pages); |
306b0c95 NG |
549 | } |
550 | return; | |
551 | } | |
552 | ||
8b3cc3ed | 553 | zs_free(meta->mem_pool, handle); |
306b0c95 | 554 | |
d2d5e762 WY |
555 | atomic64_sub(zram_get_obj_size(meta, index), |
556 | &zram->stats.compr_data_size); | |
90a7806e | 557 | atomic64_dec(&zram->stats.pages_stored); |
306b0c95 | 558 | |
8b3cc3ed | 559 | meta->table[index].handle = 0; |
d2d5e762 | 560 | zram_set_obj_size(meta, index, 0); |
306b0c95 NG |
561 | } |
562 | ||
37b51fdd | 563 | static int zram_decompress_page(struct zram *zram, char *mem, u32 index) |
306b0c95 | 564 | { |
b7ca232e | 565 | int ret = 0; |
37b51fdd | 566 | unsigned char *cmem; |
8b3cc3ed | 567 | struct zram_meta *meta = zram->meta; |
92967471 | 568 | unsigned long handle; |
023b409f | 569 | size_t size; |
92967471 | 570 | |
d2d5e762 | 571 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
92967471 | 572 | handle = meta->table[index].handle; |
d2d5e762 | 573 | size = zram_get_obj_size(meta, index); |
306b0c95 | 574 | |
8b3cc3ed | 575 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { |
d2d5e762 | 576 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
42e99bd9 | 577 | clear_page(mem); |
8c921b2b JM |
578 | return 0; |
579 | } | |
306b0c95 | 580 | |
8b3cc3ed | 581 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); |
92967471 | 582 | if (size == PAGE_SIZE) |
42e99bd9 | 583 | copy_page(mem, cmem); |
37b51fdd | 584 | else |
b7ca232e | 585 | ret = zcomp_decompress(zram->comp, cmem, size, mem); |
8b3cc3ed | 586 | zs_unmap_object(meta->mem_pool, handle); |
d2d5e762 | 587 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
a1dd52af | 588 | |
8c921b2b | 589 | /* Should NEVER happen. Return bio error if it does. */ |
b7ca232e | 590 | if (unlikely(ret)) { |
8c921b2b | 591 | pr_err("Decompression failed! err=%d, page=%u\n", ret, index); |
8c921b2b | 592 | return ret; |
a1dd52af | 593 | } |
306b0c95 | 594 | |
8c921b2b | 595 | return 0; |
306b0c95 NG |
596 | } |
597 | ||
37b51fdd | 598 | static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
b627cff3 | 599 | u32 index, int offset) |
924bd88d JM |
600 | { |
601 | int ret; | |
37b51fdd SS |
602 | struct page *page; |
603 | unsigned char *user_mem, *uncmem = NULL; | |
8b3cc3ed | 604 | struct zram_meta *meta = zram->meta; |
37b51fdd SS |
605 | page = bvec->bv_page; |
606 | ||
d2d5e762 | 607 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
8b3cc3ed MK |
608 | if (unlikely(!meta->table[index].handle) || |
609 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
d2d5e762 | 610 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
37b51fdd | 611 | handle_zero_page(bvec); |
924bd88d JM |
612 | return 0; |
613 | } | |
d2d5e762 | 614 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
924bd88d | 615 | |
37b51fdd SS |
616 | if (is_partial_io(bvec)) |
617 | /* Use a temporary buffer to decompress the page */ | |
7e5a5104 MK |
618 | uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); |
619 | ||
620 | user_mem = kmap_atomic(page); | |
621 | if (!is_partial_io(bvec)) | |
37b51fdd SS |
622 | uncmem = user_mem; |
623 | ||
624 | if (!uncmem) { | |
70864969 | 625 | pr_err("Unable to allocate temp memory\n"); |
37b51fdd SS |
626 | ret = -ENOMEM; |
627 | goto out_cleanup; | |
628 | } | |
924bd88d | 629 | |
37b51fdd | 630 | ret = zram_decompress_page(zram, uncmem, index); |
924bd88d | 631 | /* Should NEVER happen. Return bio error if it does. */ |
b7ca232e | 632 | if (unlikely(ret)) |
37b51fdd | 633 | goto out_cleanup; |
924bd88d | 634 | |
37b51fdd SS |
635 | if (is_partial_io(bvec)) |
636 | memcpy(user_mem + bvec->bv_offset, uncmem + offset, | |
637 | bvec->bv_len); | |
638 | ||
639 | flush_dcache_page(page); | |
640 | ret = 0; | |
641 | out_cleanup: | |
642 | kunmap_atomic(user_mem); | |
643 | if (is_partial_io(bvec)) | |
644 | kfree(uncmem); | |
645 | return ret; | |
924bd88d JM |
646 | } |
647 | ||
648 | static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
649 | int offset) | |
306b0c95 | 650 | { |
397c6066 | 651 | int ret = 0; |
8c921b2b | 652 | size_t clen; |
c2344348 | 653 | unsigned long handle; |
130f315a | 654 | struct page *page; |
924bd88d | 655 | unsigned char *user_mem, *cmem, *src, *uncmem = NULL; |
8b3cc3ed | 656 | struct zram_meta *meta = zram->meta; |
17162f41 | 657 | struct zcomp_strm *zstrm = NULL; |
461a8eee | 658 | unsigned long alloced_pages; |
306b0c95 | 659 | |
8c921b2b | 660 | page = bvec->bv_page; |
924bd88d JM |
661 | if (is_partial_io(bvec)) { |
662 | /* | |
663 | * This is a partial IO. We need to read the full page | |
664 | * before to write the changes. | |
665 | */ | |
7e5a5104 | 666 | uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); |
924bd88d | 667 | if (!uncmem) { |
924bd88d JM |
668 | ret = -ENOMEM; |
669 | goto out; | |
670 | } | |
37b51fdd | 671 | ret = zram_decompress_page(zram, uncmem, index); |
397c6066 | 672 | if (ret) |
924bd88d | 673 | goto out; |
924bd88d JM |
674 | } |
675 | ||
b7ca232e | 676 | zstrm = zcomp_strm_find(zram->comp); |
ba82fe2e | 677 | user_mem = kmap_atomic(page); |
924bd88d | 678 | |
397c6066 | 679 | if (is_partial_io(bvec)) { |
924bd88d JM |
680 | memcpy(uncmem + offset, user_mem + bvec->bv_offset, |
681 | bvec->bv_len); | |
397c6066 NG |
682 | kunmap_atomic(user_mem); |
683 | user_mem = NULL; | |
684 | } else { | |
924bd88d | 685 | uncmem = user_mem; |
397c6066 | 686 | } |
924bd88d JM |
687 | |
688 | if (page_zero_filled(uncmem)) { | |
c4065152 WY |
689 | if (user_mem) |
690 | kunmap_atomic(user_mem); | |
f40ac2ae | 691 | /* Free memory associated with this sector now. */ |
d2d5e762 | 692 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae | 693 | zram_free_page(zram, index); |
92967471 | 694 | zram_set_flag(meta, index, ZRAM_ZERO); |
d2d5e762 | 695 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae | 696 | |
90a7806e | 697 | atomic64_inc(&zram->stats.zero_pages); |
924bd88d JM |
698 | ret = 0; |
699 | goto out; | |
8c921b2b | 700 | } |
306b0c95 | 701 | |
b7ca232e | 702 | ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); |
397c6066 NG |
703 | if (!is_partial_io(bvec)) { |
704 | kunmap_atomic(user_mem); | |
705 | user_mem = NULL; | |
706 | uncmem = NULL; | |
707 | } | |
306b0c95 | 708 | |
b7ca232e | 709 | if (unlikely(ret)) { |
8c921b2b | 710 | pr_err("Compression failed! err=%d\n", ret); |
924bd88d | 711 | goto out; |
8c921b2b | 712 | } |
b7ca232e | 713 | src = zstrm->buffer; |
c8f2f0db | 714 | if (unlikely(clen > max_zpage_size)) { |
c8f2f0db | 715 | clen = PAGE_SIZE; |
397c6066 NG |
716 | if (is_partial_io(bvec)) |
717 | src = uncmem; | |
c8f2f0db | 718 | } |
a1dd52af | 719 | |
8b3cc3ed | 720 | handle = zs_malloc(meta->mem_pool, clen); |
fd1a30de | 721 | if (!handle) { |
70864969 | 722 | pr_err("Error allocating memory for compressed page: %u, size=%zu\n", |
596b3dd4 | 723 | index, clen); |
924bd88d JM |
724 | ret = -ENOMEM; |
725 | goto out; | |
8c921b2b | 726 | } |
9ada9da9 | 727 | |
461a8eee | 728 | alloced_pages = zs_get_total_pages(meta->mem_pool); |
12372755 SS |
729 | update_used_max(zram, alloced_pages); |
730 | ||
461a8eee | 731 | if (zram->limit_pages && alloced_pages > zram->limit_pages) { |
9ada9da9 MK |
732 | zs_free(meta->mem_pool, handle); |
733 | ret = -ENOMEM; | |
734 | goto out; | |
735 | } | |
736 | ||
8b3cc3ed | 737 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); |
306b0c95 | 738 | |
42e99bd9 | 739 | if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { |
397c6066 | 740 | src = kmap_atomic(page); |
42e99bd9 | 741 | copy_page(cmem, src); |
397c6066 | 742 | kunmap_atomic(src); |
42e99bd9 JL |
743 | } else { |
744 | memcpy(cmem, src, clen); | |
745 | } | |
306b0c95 | 746 | |
b7ca232e | 747 | zcomp_strm_release(zram->comp, zstrm); |
17162f41 | 748 | zstrm = NULL; |
8b3cc3ed | 749 | zs_unmap_object(meta->mem_pool, handle); |
fd1a30de | 750 | |
f40ac2ae SS |
751 | /* |
752 | * Free memory associated with this sector | |
753 | * before overwriting unused sectors. | |
754 | */ | |
d2d5e762 | 755 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f40ac2ae SS |
756 | zram_free_page(zram, index); |
757 | ||
8b3cc3ed | 758 | meta->table[index].handle = handle; |
d2d5e762 WY |
759 | zram_set_obj_size(meta, index, clen); |
760 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
306b0c95 | 761 | |
8c921b2b | 762 | /* Update stats */ |
90a7806e SS |
763 | atomic64_add(clen, &zram->stats.compr_data_size); |
764 | atomic64_inc(&zram->stats.pages_stored); | |
924bd88d | 765 | out: |
17162f41 | 766 | if (zstrm) |
b7ca232e | 767 | zcomp_strm_release(zram->comp, zstrm); |
397c6066 NG |
768 | if (is_partial_io(bvec)) |
769 | kfree(uncmem); | |
924bd88d | 770 | return ret; |
8c921b2b JM |
771 | } |
772 | ||
f4659d8e JK |
773 | /* |
774 | * zram_bio_discard - handler on discard request | |
775 | * @index: physical block index in PAGE_SIZE units | |
776 | * @offset: byte offset within physical block | |
777 | */ | |
778 | static void zram_bio_discard(struct zram *zram, u32 index, | |
779 | int offset, struct bio *bio) | |
780 | { | |
781 | size_t n = bio->bi_iter.bi_size; | |
d2d5e762 | 782 | struct zram_meta *meta = zram->meta; |
f4659d8e JK |
783 | |
784 | /* | |
785 | * zram manages data in physical block size units. Because logical block | |
786 | * size isn't identical with physical block size on some arch, we | |
787 | * could get a discard request pointing to a specific offset within a | |
788 | * certain physical block. Although we can handle this request by | |
789 | * reading that physiclal block and decompressing and partially zeroing | |
790 | * and re-compressing and then re-storing it, this isn't reasonable | |
791 | * because our intent with a discard request is to save memory. So | |
792 | * skipping this logical block is appropriate here. | |
793 | */ | |
794 | if (offset) { | |
38515c73 | 795 | if (n <= (PAGE_SIZE - offset)) |
f4659d8e JK |
796 | return; |
797 | ||
38515c73 | 798 | n -= (PAGE_SIZE - offset); |
f4659d8e JK |
799 | index++; |
800 | } | |
801 | ||
802 | while (n >= PAGE_SIZE) { | |
d2d5e762 | 803 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f4659d8e | 804 | zram_free_page(zram, index); |
d2d5e762 | 805 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
015254da | 806 | atomic64_inc(&zram->stats.notify_free); |
f4659d8e JK |
807 | index++; |
808 | n -= PAGE_SIZE; | |
809 | } | |
810 | } | |
811 | ||
522698d7 SS |
812 | static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, |
813 | int offset, int rw) | |
9b3bb7ab | 814 | { |
522698d7 | 815 | unsigned long start_time = jiffies; |
9b3bb7ab | 816 | int ret; |
9b3bb7ab | 817 | |
522698d7 SS |
818 | generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, |
819 | &zram->disk->part0); | |
46a51c80 | 820 | |
522698d7 SS |
821 | if (rw == READ) { |
822 | atomic64_inc(&zram->stats.num_reads); | |
823 | ret = zram_bvec_read(zram, bvec, index, offset); | |
824 | } else { | |
825 | atomic64_inc(&zram->stats.num_writes); | |
826 | ret = zram_bvec_write(zram, bvec, index, offset); | |
1b672224 | 827 | } |
9b3bb7ab | 828 | |
522698d7 | 829 | generic_end_io_acct(rw, &zram->disk->part0, start_time); |
9b3bb7ab | 830 | |
522698d7 SS |
831 | if (unlikely(ret)) { |
832 | if (rw == READ) | |
833 | atomic64_inc(&zram->stats.failed_reads); | |
834 | else | |
835 | atomic64_inc(&zram->stats.failed_writes); | |
1b672224 | 836 | } |
9b3bb7ab | 837 | |
1b672224 | 838 | return ret; |
8c921b2b JM |
839 | } |
840 | ||
be257c61 | 841 | static void __zram_make_request(struct zram *zram, struct bio *bio) |
8c921b2b | 842 | { |
b627cff3 | 843 | int offset, rw; |
8c921b2b | 844 | u32 index; |
7988613b KO |
845 | struct bio_vec bvec; |
846 | struct bvec_iter iter; | |
8c921b2b | 847 | |
4f024f37 KO |
848 | index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; |
849 | offset = (bio->bi_iter.bi_sector & | |
850 | (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; | |
8c921b2b | 851 | |
f4659d8e JK |
852 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { |
853 | zram_bio_discard(zram, index, offset, bio); | |
4246a0b6 | 854 | bio_endio(bio); |
f4659d8e JK |
855 | return; |
856 | } | |
857 | ||
b627cff3 | 858 | rw = bio_data_dir(bio); |
7988613b | 859 | bio_for_each_segment(bvec, bio, iter) { |
924bd88d JM |
860 | int max_transfer_size = PAGE_SIZE - offset; |
861 | ||
7988613b | 862 | if (bvec.bv_len > max_transfer_size) { |
924bd88d JM |
863 | /* |
864 | * zram_bvec_rw() can only make operation on a single | |
865 | * zram page. Split the bio vector. | |
866 | */ | |
867 | struct bio_vec bv; | |
868 | ||
7988613b | 869 | bv.bv_page = bvec.bv_page; |
924bd88d | 870 | bv.bv_len = max_transfer_size; |
7988613b | 871 | bv.bv_offset = bvec.bv_offset; |
924bd88d | 872 | |
b627cff3 | 873 | if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) |
924bd88d JM |
874 | goto out; |
875 | ||
7988613b | 876 | bv.bv_len = bvec.bv_len - max_transfer_size; |
924bd88d | 877 | bv.bv_offset += max_transfer_size; |
b627cff3 | 878 | if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) |
924bd88d JM |
879 | goto out; |
880 | } else | |
b627cff3 | 881 | if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) |
924bd88d JM |
882 | goto out; |
883 | ||
7988613b | 884 | update_position(&index, &offset, &bvec); |
a1dd52af | 885 | } |
306b0c95 | 886 | |
4246a0b6 | 887 | bio_endio(bio); |
7d7854b4 | 888 | return; |
306b0c95 NG |
889 | |
890 | out: | |
306b0c95 | 891 | bio_io_error(bio); |
306b0c95 NG |
892 | } |
893 | ||
306b0c95 | 894 | /* |
f1e3cfff | 895 | * Handler function for all zram I/O requests. |
306b0c95 | 896 | */ |
dece1635 | 897 | static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) |
306b0c95 | 898 | { |
f1e3cfff | 899 | struct zram *zram = queue->queuedata; |
306b0c95 | 900 | |
08eee69f | 901 | if (unlikely(!zram_meta_get(zram))) |
3de738cd | 902 | goto error; |
0900beae | 903 | |
54efd50b KO |
904 | blk_queue_split(queue, &bio, queue->bio_split); |
905 | ||
54850e73 | 906 | if (!valid_io_request(zram, bio->bi_iter.bi_sector, |
907 | bio->bi_iter.bi_size)) { | |
da5cc7d3 | 908 | atomic64_inc(&zram->stats.invalid_io); |
08eee69f | 909 | goto put_zram; |
6642a67c JM |
910 | } |
911 | ||
be257c61 | 912 | __zram_make_request(zram, bio); |
08eee69f | 913 | zram_meta_put(zram); |
dece1635 | 914 | return BLK_QC_T_NONE; |
08eee69f MK |
915 | put_zram: |
916 | zram_meta_put(zram); | |
0900beae JM |
917 | error: |
918 | bio_io_error(bio); | |
dece1635 | 919 | return BLK_QC_T_NONE; |
306b0c95 NG |
920 | } |
921 | ||
2ccbec05 NG |
922 | static void zram_slot_free_notify(struct block_device *bdev, |
923 | unsigned long index) | |
107c161b | 924 | { |
f1e3cfff | 925 | struct zram *zram; |
f614a9f4 | 926 | struct zram_meta *meta; |
107c161b | 927 | |
f1e3cfff | 928 | zram = bdev->bd_disk->private_data; |
f614a9f4 | 929 | meta = zram->meta; |
a0c516cb | 930 | |
d2d5e762 | 931 | bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); |
f614a9f4 | 932 | zram_free_page(zram, index); |
d2d5e762 | 933 | bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); |
f614a9f4 | 934 | atomic64_inc(&zram->stats.notify_free); |
107c161b NG |
935 | } |
936 | ||
8c7f0102 | 937 | static int zram_rw_page(struct block_device *bdev, sector_t sector, |
938 | struct page *page, int rw) | |
939 | { | |
08eee69f | 940 | int offset, err = -EIO; |
8c7f0102 | 941 | u32 index; |
942 | struct zram *zram; | |
943 | struct bio_vec bv; | |
944 | ||
945 | zram = bdev->bd_disk->private_data; | |
08eee69f MK |
946 | if (unlikely(!zram_meta_get(zram))) |
947 | goto out; | |
948 | ||
8c7f0102 | 949 | if (!valid_io_request(zram, sector, PAGE_SIZE)) { |
950 | atomic64_inc(&zram->stats.invalid_io); | |
08eee69f MK |
951 | err = -EINVAL; |
952 | goto put_zram; | |
8c7f0102 | 953 | } |
954 | ||
955 | index = sector >> SECTORS_PER_PAGE_SHIFT; | |
956 | offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; | |
957 | ||
958 | bv.bv_page = page; | |
959 | bv.bv_len = PAGE_SIZE; | |
960 | bv.bv_offset = 0; | |
961 | ||
962 | err = zram_bvec_rw(zram, &bv, index, offset, rw); | |
08eee69f MK |
963 | put_zram: |
964 | zram_meta_put(zram); | |
965 | out: | |
8c7f0102 | 966 | /* |
967 | * If I/O fails, just return error(ie, non-zero) without | |
968 | * calling page_endio. | |
969 | * It causes resubmit the I/O with bio request by upper functions | |
970 | * of rw_page(e.g., swap_readpage, __swap_writepage) and | |
971 | * bio->bi_end_io does things to handle the error | |
972 | * (e.g., SetPageError, set_page_dirty and extra works). | |
973 | */ | |
974 | if (err == 0) | |
975 | page_endio(page, rw, 0); | |
976 | return err; | |
977 | } | |
978 | ||
522698d7 SS |
979 | static void zram_reset_device(struct zram *zram) |
980 | { | |
981 | struct zram_meta *meta; | |
982 | struct zcomp *comp; | |
983 | u64 disksize; | |
306b0c95 | 984 | |
522698d7 | 985 | down_write(&zram->init_lock); |
9b3bb7ab | 986 | |
522698d7 SS |
987 | zram->limit_pages = 0; |
988 | ||
989 | if (!init_done(zram)) { | |
990 | up_write(&zram->init_lock); | |
991 | return; | |
992 | } | |
993 | ||
994 | meta = zram->meta; | |
995 | comp = zram->comp; | |
996 | disksize = zram->disksize; | |
997 | /* | |
998 | * Refcount will go down to 0 eventually and r/w handler | |
999 | * cannot handle further I/O so it will bail out by | |
1000 | * check zram_meta_get. | |
1001 | */ | |
1002 | zram_meta_put(zram); | |
1003 | /* | |
1004 | * We want to free zram_meta in process context to avoid | |
1005 | * deadlock between reclaim path and any other locks. | |
1006 | */ | |
1007 | wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); | |
1008 | ||
1009 | /* Reset stats */ | |
1010 | memset(&zram->stats, 0, sizeof(zram->stats)); | |
1011 | zram->disksize = 0; | |
1012 | zram->max_comp_streams = 1; | |
1013 | ||
1014 | set_capacity(zram->disk, 0); | |
1015 | part_stat_set_all(&zram->disk->part0, 0); | |
1016 | ||
1017 | up_write(&zram->init_lock); | |
1018 | /* I/O operation under all of CPU are done so let's free */ | |
1019 | zram_meta_free(meta, disksize); | |
1020 | zcomp_destroy(comp); | |
1021 | } | |
1022 | ||
1023 | static ssize_t disksize_store(struct device *dev, | |
1024 | struct device_attribute *attr, const char *buf, size_t len) | |
2f6a3bed | 1025 | { |
522698d7 SS |
1026 | u64 disksize; |
1027 | struct zcomp *comp; | |
1028 | struct zram_meta *meta; | |
2f6a3bed | 1029 | struct zram *zram = dev_to_zram(dev); |
522698d7 | 1030 | int err; |
2f6a3bed | 1031 | |
522698d7 SS |
1032 | disksize = memparse(buf, NULL); |
1033 | if (!disksize) | |
1034 | return -EINVAL; | |
2f6a3bed | 1035 | |
522698d7 | 1036 | disksize = PAGE_ALIGN(disksize); |
4ce321f5 | 1037 | meta = zram_meta_alloc(zram->disk->disk_name, disksize); |
522698d7 SS |
1038 | if (!meta) |
1039 | return -ENOMEM; | |
1040 | ||
1041 | comp = zcomp_create(zram->compressor, zram->max_comp_streams); | |
1042 | if (IS_ERR(comp)) { | |
70864969 | 1043 | pr_err("Cannot initialise %s compressing backend\n", |
522698d7 SS |
1044 | zram->compressor); |
1045 | err = PTR_ERR(comp); | |
1046 | goto out_free_meta; | |
1047 | } | |
1048 | ||
1049 | down_write(&zram->init_lock); | |
1050 | if (init_done(zram)) { | |
1051 | pr_info("Cannot change disksize for initialized device\n"); | |
1052 | err = -EBUSY; | |
1053 | goto out_destroy_comp; | |
1054 | } | |
1055 | ||
1056 | init_waitqueue_head(&zram->io_done); | |
1057 | atomic_set(&zram->refcount, 1); | |
1058 | zram->meta = meta; | |
1059 | zram->comp = comp; | |
1060 | zram->disksize = disksize; | |
1061 | set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); | |
1062 | up_write(&zram->init_lock); | |
1063 | ||
1064 | /* | |
1065 | * Revalidate disk out of the init_lock to avoid lockdep splat. | |
1066 | * It's okay because disk's capacity is protected by init_lock | |
1067 | * so that revalidate_disk always sees up-to-date capacity. | |
1068 | */ | |
1069 | revalidate_disk(zram->disk); | |
1070 | ||
1071 | return len; | |
1072 | ||
1073 | out_destroy_comp: | |
1074 | up_write(&zram->init_lock); | |
1075 | zcomp_destroy(comp); | |
1076 | out_free_meta: | |
1077 | zram_meta_free(meta, disksize); | |
1078 | return err; | |
2f6a3bed SS |
1079 | } |
1080 | ||
522698d7 SS |
1081 | static ssize_t reset_store(struct device *dev, |
1082 | struct device_attribute *attr, const char *buf, size_t len) | |
4f2109f6 | 1083 | { |
522698d7 SS |
1084 | int ret; |
1085 | unsigned short do_reset; | |
1086 | struct zram *zram; | |
1087 | struct block_device *bdev; | |
4f2109f6 | 1088 | |
f405c445 SS |
1089 | ret = kstrtou16(buf, 10, &do_reset); |
1090 | if (ret) | |
1091 | return ret; | |
1092 | ||
1093 | if (!do_reset) | |
1094 | return -EINVAL; | |
1095 | ||
522698d7 SS |
1096 | zram = dev_to_zram(dev); |
1097 | bdev = bdget_disk(zram->disk, 0); | |
522698d7 SS |
1098 | if (!bdev) |
1099 | return -ENOMEM; | |
4f2109f6 | 1100 | |
522698d7 | 1101 | mutex_lock(&bdev->bd_mutex); |
f405c445 SS |
1102 | /* Do not reset an active device or claimed device */ |
1103 | if (bdev->bd_openers || zram->claim) { | |
1104 | mutex_unlock(&bdev->bd_mutex); | |
1105 | bdput(bdev); | |
1106 | return -EBUSY; | |
522698d7 SS |
1107 | } |
1108 | ||
f405c445 SS |
1109 | /* From now on, anyone can't open /dev/zram[0-9] */ |
1110 | zram->claim = true; | |
1111 | mutex_unlock(&bdev->bd_mutex); | |
522698d7 | 1112 | |
f405c445 | 1113 | /* Make sure all the pending I/O are finished */ |
522698d7 SS |
1114 | fsync_bdev(bdev); |
1115 | zram_reset_device(zram); | |
522698d7 SS |
1116 | revalidate_disk(zram->disk); |
1117 | bdput(bdev); | |
1118 | ||
f405c445 SS |
1119 | mutex_lock(&bdev->bd_mutex); |
1120 | zram->claim = false; | |
1121 | mutex_unlock(&bdev->bd_mutex); | |
1122 | ||
522698d7 | 1123 | return len; |
f405c445 SS |
1124 | } |
1125 | ||
1126 | static int zram_open(struct block_device *bdev, fmode_t mode) | |
1127 | { | |
1128 | int ret = 0; | |
1129 | struct zram *zram; | |
1130 | ||
1131 | WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); | |
1132 | ||
1133 | zram = bdev->bd_disk->private_data; | |
1134 | /* zram was claimed to reset so open request fails */ | |
1135 | if (zram->claim) | |
1136 | ret = -EBUSY; | |
4f2109f6 SS |
1137 | |
1138 | return ret; | |
1139 | } | |
1140 | ||
522698d7 | 1141 | static const struct block_device_operations zram_devops = { |
f405c445 | 1142 | .open = zram_open, |
522698d7 SS |
1143 | .swap_slot_free_notify = zram_slot_free_notify, |
1144 | .rw_page = zram_rw_page, | |
1145 | .owner = THIS_MODULE | |
1146 | }; | |
1147 | ||
1148 | static DEVICE_ATTR_WO(compact); | |
1149 | static DEVICE_ATTR_RW(disksize); | |
1150 | static DEVICE_ATTR_RO(initstate); | |
1151 | static DEVICE_ATTR_WO(reset); | |
1152 | static DEVICE_ATTR_RO(orig_data_size); | |
1153 | static DEVICE_ATTR_RO(mem_used_total); | |
1154 | static DEVICE_ATTR_RW(mem_limit); | |
1155 | static DEVICE_ATTR_RW(mem_used_max); | |
1156 | static DEVICE_ATTR_RW(max_comp_streams); | |
1157 | static DEVICE_ATTR_RW(comp_algorithm); | |
a68eb3b6 | 1158 | |
9b3bb7ab SS |
1159 | static struct attribute *zram_disk_attrs[] = { |
1160 | &dev_attr_disksize.attr, | |
1161 | &dev_attr_initstate.attr, | |
1162 | &dev_attr_reset.attr, | |
1163 | &dev_attr_num_reads.attr, | |
1164 | &dev_attr_num_writes.attr, | |
64447249 SS |
1165 | &dev_attr_failed_reads.attr, |
1166 | &dev_attr_failed_writes.attr, | |
99ebbd30 | 1167 | &dev_attr_compact.attr, |
9b3bb7ab SS |
1168 | &dev_attr_invalid_io.attr, |
1169 | &dev_attr_notify_free.attr, | |
1170 | &dev_attr_zero_pages.attr, | |
1171 | &dev_attr_orig_data_size.attr, | |
1172 | &dev_attr_compr_data_size.attr, | |
1173 | &dev_attr_mem_used_total.attr, | |
9ada9da9 | 1174 | &dev_attr_mem_limit.attr, |
461a8eee | 1175 | &dev_attr_mem_used_max.attr, |
beca3ec7 | 1176 | &dev_attr_max_comp_streams.attr, |
e46b8a03 | 1177 | &dev_attr_comp_algorithm.attr, |
2f6a3bed | 1178 | &dev_attr_io_stat.attr, |
4f2109f6 | 1179 | &dev_attr_mm_stat.attr, |
9b3bb7ab SS |
1180 | NULL, |
1181 | }; | |
1182 | ||
1183 | static struct attribute_group zram_disk_attr_group = { | |
1184 | .attrs = zram_disk_attrs, | |
1185 | }; | |
1186 | ||
92ff1528 SS |
1187 | /* |
1188 | * Allocate and initialize new zram device. the function returns | |
1189 | * '>= 0' device_id upon success, and negative value otherwise. | |
1190 | */ | |
1191 | static int zram_add(void) | |
306b0c95 | 1192 | { |
85508ec6 | 1193 | struct zram *zram; |
ee980160 | 1194 | struct request_queue *queue; |
92ff1528 | 1195 | int ret, device_id; |
85508ec6 SS |
1196 | |
1197 | zram = kzalloc(sizeof(struct zram), GFP_KERNEL); | |
1198 | if (!zram) | |
1199 | return -ENOMEM; | |
1200 | ||
92ff1528 | 1201 | ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); |
85508ec6 SS |
1202 | if (ret < 0) |
1203 | goto out_free_dev; | |
92ff1528 | 1204 | device_id = ret; |
de1a21a0 | 1205 | |
0900beae | 1206 | init_rwsem(&zram->init_lock); |
306b0c95 | 1207 | |
ee980160 SS |
1208 | queue = blk_alloc_queue(GFP_KERNEL); |
1209 | if (!queue) { | |
306b0c95 NG |
1210 | pr_err("Error allocating disk queue for device %d\n", |
1211 | device_id); | |
85508ec6 SS |
1212 | ret = -ENOMEM; |
1213 | goto out_free_idr; | |
306b0c95 NG |
1214 | } |
1215 | ||
ee980160 | 1216 | blk_queue_make_request(queue, zram_make_request); |
306b0c95 | 1217 | |
85508ec6 | 1218 | /* gendisk structure */ |
f1e3cfff NG |
1219 | zram->disk = alloc_disk(1); |
1220 | if (!zram->disk) { | |
70864969 | 1221 | pr_err("Error allocating disk structure for device %d\n", |
306b0c95 | 1222 | device_id); |
201c7b72 | 1223 | ret = -ENOMEM; |
39a9b8ac | 1224 | goto out_free_queue; |
306b0c95 NG |
1225 | } |
1226 | ||
f1e3cfff NG |
1227 | zram->disk->major = zram_major; |
1228 | zram->disk->first_minor = device_id; | |
1229 | zram->disk->fops = &zram_devops; | |
ee980160 SS |
1230 | zram->disk->queue = queue; |
1231 | zram->disk->queue->queuedata = zram; | |
f1e3cfff NG |
1232 | zram->disk->private_data = zram; |
1233 | snprintf(zram->disk->disk_name, 16, "zram%d", device_id); | |
306b0c95 | 1234 | |
33863c21 | 1235 | /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ |
f1e3cfff | 1236 | set_capacity(zram->disk, 0); |
b67d1ec1 SS |
1237 | /* zram devices sort of resembles non-rotational disks */ |
1238 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); | |
b277da0a | 1239 | queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); |
a1dd52af NG |
1240 | /* |
1241 | * To ensure that we always get PAGE_SIZE aligned | |
1242 | * and n*PAGE_SIZED sized I/O requests. | |
1243 | */ | |
f1e3cfff | 1244 | blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); |
7b19b8d4 RJ |
1245 | blk_queue_logical_block_size(zram->disk->queue, |
1246 | ZRAM_LOGICAL_BLOCK_SIZE); | |
f1e3cfff NG |
1247 | blk_queue_io_min(zram->disk->queue, PAGE_SIZE); |
1248 | blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); | |
f4659d8e | 1249 | zram->disk->queue->limits.discard_granularity = PAGE_SIZE; |
2bb4cd5c | 1250 | blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); |
f4659d8e JK |
1251 | /* |
1252 | * zram_bio_discard() will clear all logical blocks if logical block | |
1253 | * size is identical with physical block size(PAGE_SIZE). But if it is | |
1254 | * different, we will skip discarding some parts of logical blocks in | |
1255 | * the part of the request range which isn't aligned to physical block | |
1256 | * size. So we can't ensure that all discarded logical blocks are | |
1257 | * zeroed. | |
1258 | */ | |
1259 | if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) | |
1260 | zram->disk->queue->limits.discard_zeroes_data = 1; | |
1261 | else | |
1262 | zram->disk->queue->limits.discard_zeroes_data = 0; | |
1263 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); | |
5d83d5a0 | 1264 | |
f1e3cfff | 1265 | add_disk(zram->disk); |
306b0c95 | 1266 | |
33863c21 NG |
1267 | ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, |
1268 | &zram_disk_attr_group); | |
1269 | if (ret < 0) { | |
70864969 SS |
1270 | pr_err("Error creating sysfs group for device %d\n", |
1271 | device_id); | |
39a9b8ac | 1272 | goto out_free_disk; |
33863c21 | 1273 | } |
e46b8a03 | 1274 | strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); |
be2d1d56 | 1275 | zram->meta = NULL; |
beca3ec7 | 1276 | zram->max_comp_streams = 1; |
d12b63c9 SS |
1277 | |
1278 | pr_info("Added device: %s\n", zram->disk->disk_name); | |
92ff1528 | 1279 | return device_id; |
de1a21a0 | 1280 | |
39a9b8ac JL |
1281 | out_free_disk: |
1282 | del_gendisk(zram->disk); | |
1283 | put_disk(zram->disk); | |
1284 | out_free_queue: | |
ee980160 | 1285 | blk_cleanup_queue(queue); |
85508ec6 SS |
1286 | out_free_idr: |
1287 | idr_remove(&zram_index_idr, device_id); | |
1288 | out_free_dev: | |
1289 | kfree(zram); | |
de1a21a0 | 1290 | return ret; |
306b0c95 NG |
1291 | } |
1292 | ||
6566d1a3 | 1293 | static int zram_remove(struct zram *zram) |
306b0c95 | 1294 | { |
6566d1a3 SS |
1295 | struct block_device *bdev; |
1296 | ||
1297 | bdev = bdget_disk(zram->disk, 0); | |
1298 | if (!bdev) | |
1299 | return -ENOMEM; | |
1300 | ||
1301 | mutex_lock(&bdev->bd_mutex); | |
1302 | if (bdev->bd_openers || zram->claim) { | |
1303 | mutex_unlock(&bdev->bd_mutex); | |
1304 | bdput(bdev); | |
1305 | return -EBUSY; | |
1306 | } | |
1307 | ||
1308 | zram->claim = true; | |
1309 | mutex_unlock(&bdev->bd_mutex); | |
1310 | ||
85508ec6 SS |
1311 | /* |
1312 | * Remove sysfs first, so no one will perform a disksize | |
6566d1a3 SS |
1313 | * store while we destroy the devices. This also helps during |
1314 | * hot_remove -- zram_reset_device() is the last holder of | |
1315 | * ->init_lock, no later/concurrent disksize_store() or any | |
1316 | * other sysfs handlers are possible. | |
85508ec6 SS |
1317 | */ |
1318 | sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, | |
1319 | &zram_disk_attr_group); | |
306b0c95 | 1320 | |
6566d1a3 SS |
1321 | /* Make sure all the pending I/O are finished */ |
1322 | fsync_bdev(bdev); | |
85508ec6 | 1323 | zram_reset_device(zram); |
6566d1a3 SS |
1324 | bdput(bdev); |
1325 | ||
1326 | pr_info("Removed device: %s\n", zram->disk->disk_name); | |
1327 | ||
85508ec6 SS |
1328 | idr_remove(&zram_index_idr, zram->disk->first_minor); |
1329 | blk_cleanup_queue(zram->disk->queue); | |
1330 | del_gendisk(zram->disk); | |
1331 | put_disk(zram->disk); | |
1332 | kfree(zram); | |
6566d1a3 SS |
1333 | return 0; |
1334 | } | |
1335 | ||
1336 | /* zram-control sysfs attributes */ | |
1337 | static ssize_t hot_add_show(struct class *class, | |
1338 | struct class_attribute *attr, | |
1339 | char *buf) | |
1340 | { | |
1341 | int ret; | |
1342 | ||
1343 | mutex_lock(&zram_index_mutex); | |
1344 | ret = zram_add(); | |
1345 | mutex_unlock(&zram_index_mutex); | |
1346 | ||
1347 | if (ret < 0) | |
1348 | return ret; | |
1349 | return scnprintf(buf, PAGE_SIZE, "%d\n", ret); | |
1350 | } | |
1351 | ||
1352 | static ssize_t hot_remove_store(struct class *class, | |
1353 | struct class_attribute *attr, | |
1354 | const char *buf, | |
1355 | size_t count) | |
1356 | { | |
1357 | struct zram *zram; | |
1358 | int ret, dev_id; | |
1359 | ||
1360 | /* dev_id is gendisk->first_minor, which is `int' */ | |
1361 | ret = kstrtoint(buf, 10, &dev_id); | |
1362 | if (ret) | |
1363 | return ret; | |
1364 | if (dev_id < 0) | |
1365 | return -EINVAL; | |
1366 | ||
1367 | mutex_lock(&zram_index_mutex); | |
1368 | ||
1369 | zram = idr_find(&zram_index_idr, dev_id); | |
1370 | if (zram) | |
1371 | ret = zram_remove(zram); | |
1372 | else | |
1373 | ret = -ENODEV; | |
1374 | ||
1375 | mutex_unlock(&zram_index_mutex); | |
1376 | return ret ? ret : count; | |
85508ec6 | 1377 | } |
a096cafc | 1378 | |
6566d1a3 SS |
1379 | static struct class_attribute zram_control_class_attrs[] = { |
1380 | __ATTR_RO(hot_add), | |
1381 | __ATTR_WO(hot_remove), | |
1382 | __ATTR_NULL, | |
1383 | }; | |
1384 | ||
1385 | static struct class zram_control_class = { | |
1386 | .name = "zram-control", | |
1387 | .owner = THIS_MODULE, | |
1388 | .class_attrs = zram_control_class_attrs, | |
1389 | }; | |
1390 | ||
85508ec6 SS |
1391 | static int zram_remove_cb(int id, void *ptr, void *data) |
1392 | { | |
1393 | zram_remove(ptr); | |
1394 | return 0; | |
1395 | } | |
a096cafc | 1396 | |
85508ec6 SS |
1397 | static void destroy_devices(void) |
1398 | { | |
6566d1a3 | 1399 | class_unregister(&zram_control_class); |
85508ec6 SS |
1400 | idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); |
1401 | idr_destroy(&zram_index_idr); | |
a096cafc | 1402 | unregister_blkdev(zram_major, "zram"); |
306b0c95 NG |
1403 | } |
1404 | ||
f1e3cfff | 1405 | static int __init zram_init(void) |
306b0c95 | 1406 | { |
92ff1528 | 1407 | int ret; |
306b0c95 | 1408 | |
6566d1a3 SS |
1409 | ret = class_register(&zram_control_class); |
1410 | if (ret) { | |
70864969 | 1411 | pr_err("Unable to register zram-control class\n"); |
6566d1a3 SS |
1412 | return ret; |
1413 | } | |
1414 | ||
f1e3cfff NG |
1415 | zram_major = register_blkdev(0, "zram"); |
1416 | if (zram_major <= 0) { | |
70864969 | 1417 | pr_err("Unable to get major number\n"); |
6566d1a3 | 1418 | class_unregister(&zram_control_class); |
a096cafc | 1419 | return -EBUSY; |
306b0c95 NG |
1420 | } |
1421 | ||
92ff1528 | 1422 | while (num_devices != 0) { |
6566d1a3 | 1423 | mutex_lock(&zram_index_mutex); |
92ff1528 | 1424 | ret = zram_add(); |
6566d1a3 | 1425 | mutex_unlock(&zram_index_mutex); |
92ff1528 | 1426 | if (ret < 0) |
a096cafc | 1427 | goto out_error; |
92ff1528 | 1428 | num_devices--; |
de1a21a0 NG |
1429 | } |
1430 | ||
306b0c95 | 1431 | return 0; |
de1a21a0 | 1432 | |
a096cafc | 1433 | out_error: |
85508ec6 | 1434 | destroy_devices(); |
306b0c95 NG |
1435 | return ret; |
1436 | } | |
1437 | ||
f1e3cfff | 1438 | static void __exit zram_exit(void) |
306b0c95 | 1439 | { |
85508ec6 | 1440 | destroy_devices(); |
306b0c95 NG |
1441 | } |
1442 | ||
f1e3cfff NG |
1443 | module_init(zram_init); |
1444 | module_exit(zram_exit); | |
306b0c95 | 1445 | |
9b3bb7ab | 1446 | module_param(num_devices, uint, 0); |
c3cdb40e | 1447 | MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); |
9b3bb7ab | 1448 | |
306b0c95 NG |
1449 | MODULE_LICENSE("Dual BSD/GPL"); |
1450 | MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); | |
f1e3cfff | 1451 | MODULE_DESCRIPTION("Compressed RAM Block Device"); |