mm/damon: improve damon_new_region strategy
[linux-block.git] / drivers / block / zram / zram_drv.c
CommitLineData
306b0c95 1/*
f1e3cfff 2 * Compressed RAM block device
306b0c95 3 *
1130ebba 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
7bfb3de8 5 * 2012, 2013 Minchan Kim
306b0c95
NG
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
306b0c95
NG
13 */
14
f1e3cfff 15#define KMSG_COMPONENT "zram"
306b0c95
NG
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
8946a086 20#include <linux/bio.h>
306b0c95
NG
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/buffer_head.h>
24#include <linux/device.h>
306b0c95 25#include <linux/highmem.h>
5a0e3ad6 26#include <linux/slab.h>
b09ab054 27#include <linux/backing-dev.h>
306b0c95 28#include <linux/string.h>
306b0c95 29#include <linux/vmalloc.h>
fcfa8d95 30#include <linux/err.h>
85508ec6 31#include <linux/idr.h>
6566d1a3 32#include <linux/sysfs.h>
c0265342 33#include <linux/debugfs.h>
1dd6c834 34#include <linux/cpuhotplug.h>
c6a564ff 35#include <linux/part_stat.h>
306b0c95 36
16a4bfb9 37#include "zram_drv.h"
306b0c95 38
85508ec6 39static DEFINE_IDR(zram_index_idr);
6566d1a3
SS
40/* idr index must be protected */
41static DEFINE_MUTEX(zram_index_mutex);
42
f1e3cfff 43static int zram_major;
3d711a38 44static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
306b0c95 45
306b0c95 46/* Module params (documentation at end) */
ca3d70bd 47static unsigned int num_devices = 1;
60f5921a
SS
48/*
49 * Pages that compress to sizes equals or greater than this are stored
50 * uncompressed in memory.
51 */
52static size_t huge_class_size;
33863c21 53
a8b456d0 54static const struct block_device_operations zram_devops;
13c1c74a 55#ifdef CONFIG_ZRAM_WRITEBACK
a8b456d0 56static const struct block_device_operations zram_wb_devops;
13c1c74a 57#endif
a8b456d0 58
1f7319c7 59static void zram_free_page(struct zram *zram, size_t index);
a939888e
MK
60static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
61 u32 index, int offset, struct bio *bio);
62
1f7319c7 63
3c9959e0
MK
64static int zram_slot_trylock(struct zram *zram, u32 index)
65{
7e529283 66 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
3c9959e0
MK
67}
68
c4d6c4cc
MK
69static void zram_slot_lock(struct zram *zram, u32 index)
70{
7e529283 71 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
72}
73
74static void zram_slot_unlock(struct zram *zram, u32 index)
75{
7e529283 76 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
77}
78
08eee69f 79static inline bool init_done(struct zram *zram)
be2d1d56 80{
08eee69f 81 return zram->disksize;
be2d1d56
SS
82}
83
9b3bb7ab
SS
84static inline struct zram *dev_to_zram(struct device *dev)
85{
86 return (struct zram *)dev_to_disk(dev)->private_data;
87}
88
643ae61d
MK
89static unsigned long zram_get_handle(struct zram *zram, u32 index)
90{
91 return zram->table[index].handle;
92}
93
94static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
95{
96 zram->table[index].handle = handle;
97}
98
b31177f2 99/* flag operations require table entry bit_spin_lock() being held */
c0265342 100static bool zram_test_flag(struct zram *zram, u32 index,
522698d7 101 enum zram_pageflags flag)
99ebbd30 102{
7e529283 103 return zram->table[index].flags & BIT(flag);
522698d7 104}
99ebbd30 105
beb6602c 106static void zram_set_flag(struct zram *zram, u32 index,
522698d7
SS
107 enum zram_pageflags flag)
108{
7e529283 109 zram->table[index].flags |= BIT(flag);
522698d7 110}
99ebbd30 111
beb6602c 112static void zram_clear_flag(struct zram *zram, u32 index,
522698d7
SS
113 enum zram_pageflags flag)
114{
7e529283 115 zram->table[index].flags &= ~BIT(flag);
522698d7 116}
99ebbd30 117
beb6602c 118static inline void zram_set_element(struct zram *zram, u32 index,
8e19d540 119 unsigned long element)
120{
beb6602c 121 zram->table[index].element = element;
8e19d540 122}
123
643ae61d 124static unsigned long zram_get_element(struct zram *zram, u32 index)
8e19d540 125{
643ae61d 126 return zram->table[index].element;
8e19d540 127}
128
beb6602c 129static size_t zram_get_obj_size(struct zram *zram, u32 index)
522698d7 130{
7e529283 131 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
99ebbd30
AM
132}
133
beb6602c 134static void zram_set_obj_size(struct zram *zram,
522698d7 135 u32 index, size_t size)
9b3bb7ab 136{
7e529283 137 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
9b3bb7ab 138
7e529283 139 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
522698d7
SS
140}
141
a939888e
MK
142static inline bool zram_allocated(struct zram *zram, u32 index)
143{
144 return zram_get_obj_size(zram, index) ||
145 zram_test_flag(zram, index, ZRAM_SAME) ||
146 zram_test_flag(zram, index, ZRAM_WB);
147}
148
1f7319c7 149#if PAGE_SIZE != 4096
1c53e0d2 150static inline bool is_partial_io(struct bio_vec *bvec)
522698d7
SS
151{
152 return bvec->bv_len != PAGE_SIZE;
153}
1f7319c7
MK
154#else
155static inline bool is_partial_io(struct bio_vec *bvec)
156{
157 return false;
158}
159#endif
522698d7
SS
160
161/*
162 * Check if request is within bounds and aligned on zram logical blocks.
163 */
1c53e0d2 164static inline bool valid_io_request(struct zram *zram,
522698d7
SS
165 sector_t start, unsigned int size)
166{
167 u64 end, bound;
168
169 /* unaligned request */
170 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
1c53e0d2 171 return false;
522698d7 172 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
1c53e0d2 173 return false;
522698d7
SS
174
175 end = start + (size >> SECTOR_SHIFT);
176 bound = zram->disksize >> SECTOR_SHIFT;
177 /* out of range range */
178 if (unlikely(start >= bound || end > bound || start > end))
1c53e0d2 179 return false;
522698d7
SS
180
181 /* I/O request is valid */
1c53e0d2 182 return true;
522698d7
SS
183}
184
185static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
186{
e86942c7 187 *index += (*offset + bvec->bv_len) / PAGE_SIZE;
522698d7
SS
188 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
189}
190
191static inline void update_used_max(struct zram *zram,
192 const unsigned long pages)
193{
194 unsigned long old_max, cur_max;
195
196 old_max = atomic_long_read(&zram->stats.max_used_pages);
197
198 do {
199 cur_max = old_max;
200 if (pages > cur_max)
201 old_max = atomic_long_cmpxchg(
202 &zram->stats.max_used_pages, cur_max, pages);
203 } while (old_max != cur_max);
204}
205
48ad1abe 206static inline void zram_fill_page(void *ptr, unsigned long len,
8e19d540 207 unsigned long value)
208{
8e19d540 209 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
48ad1abe 210 memset_l(ptr, value, len / sizeof(unsigned long));
8e19d540 211}
212
213static bool page_same_filled(void *ptr, unsigned long *element)
522698d7 214{
522698d7 215 unsigned long *page;
f0fe9984 216 unsigned long val;
90f82cbf 217 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
522698d7
SS
218
219 page = (unsigned long *)ptr;
f0fe9984 220 val = page[0];
522698d7 221
90f82cbf
TS
222 if (val != page[last_pos])
223 return false;
224
225 for (pos = 1; pos < last_pos; pos++) {
f0fe9984 226 if (val != page[pos])
1c53e0d2 227 return false;
522698d7
SS
228 }
229
f0fe9984 230 *element = val;
8e19d540 231
1c53e0d2 232 return true;
522698d7
SS
233}
234
9b3bb7ab
SS
235static ssize_t initstate_show(struct device *dev,
236 struct device_attribute *attr, char *buf)
237{
a68eb3b6 238 u32 val;
9b3bb7ab
SS
239 struct zram *zram = dev_to_zram(dev);
240
a68eb3b6
SS
241 down_read(&zram->init_lock);
242 val = init_done(zram);
243 up_read(&zram->init_lock);
9b3bb7ab 244
56b4e8cb 245 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
9b3bb7ab
SS
246}
247
522698d7
SS
248static ssize_t disksize_show(struct device *dev,
249 struct device_attribute *attr, char *buf)
250{
251 struct zram *zram = dev_to_zram(dev);
252
253 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
254}
255
9ada9da9
MK
256static ssize_t mem_limit_store(struct device *dev,
257 struct device_attribute *attr, const char *buf, size_t len)
258{
259 u64 limit;
260 char *tmp;
261 struct zram *zram = dev_to_zram(dev);
262
263 limit = memparse(buf, &tmp);
264 if (buf == tmp) /* no chars parsed, invalid input */
265 return -EINVAL;
266
267 down_write(&zram->init_lock);
268 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
269 up_write(&zram->init_lock);
270
271 return len;
272}
273
461a8eee
MK
274static ssize_t mem_used_max_store(struct device *dev,
275 struct device_attribute *attr, const char *buf, size_t len)
276{
277 int err;
278 unsigned long val;
279 struct zram *zram = dev_to_zram(dev);
461a8eee
MK
280
281 err = kstrtoul(buf, 10, &val);
282 if (err || val != 0)
283 return -EINVAL;
284
285 down_read(&zram->init_lock);
5a99e95b 286 if (init_done(zram)) {
461a8eee 287 atomic_long_set(&zram->stats.max_used_pages,
beb6602c 288 zs_get_total_pages(zram->mem_pool));
5a99e95b 289 }
461a8eee
MK
290 up_read(&zram->init_lock);
291
292 return len;
293}
294
755804d1
BG
295/*
296 * Mark all pages which are older than or equal to cutoff as IDLE.
297 * Callers should hold the zram init lock in read mode
298 */
299static void mark_idle(struct zram *zram, ktime_t cutoff)
e82592c4 300{
755804d1 301 int is_idle = 1;
e82592c4
MK
302 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
303 int index;
e82592c4 304
e82592c4 305 for (index = 0; index < nr_pages; index++) {
a939888e
MK
306 /*
307 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
308 * See the comment in writeback_store.
309 */
e82592c4 310 zram_slot_lock(zram, index);
1d69a3f8 311 if (zram_allocated(zram, index) &&
755804d1
BG
312 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
313#ifdef CONFIG_ZRAM_MEMORY_TRACKING
314 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
315#endif
316 if (is_idle)
317 zram_set_flag(zram, index, ZRAM_IDLE);
318 }
e82592c4
MK
319 zram_slot_unlock(zram, index);
320 }
755804d1 321}
e82592c4 322
755804d1
BG
323static ssize_t idle_store(struct device *dev,
324 struct device_attribute *attr, const char *buf, size_t len)
325{
326 struct zram *zram = dev_to_zram(dev);
327 ktime_t cutoff_time = 0;
328 ssize_t rv = -EINVAL;
e82592c4 329
755804d1
BG
330 if (!sysfs_streq(buf, "all")) {
331 /*
332 * If it did not parse as 'all' try to treat it as an integer when
333 * we have memory tracking enabled.
334 */
335 u64 age_sec;
336
337 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
338 cutoff_time = ktime_sub(ktime_get_boottime(),
339 ns_to_ktime(age_sec * NSEC_PER_SEC));
340 else
341 goto out;
342 }
343
344 down_read(&zram->init_lock);
345 if (!init_done(zram))
346 goto out_unlock;
347
348 /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
349 mark_idle(zram, cutoff_time);
350 rv = len;
351
352out_unlock:
353 up_read(&zram->init_lock);
354out:
355 return rv;
e82592c4
MK
356}
357
013bf95a 358#ifdef CONFIG_ZRAM_WRITEBACK
1d69a3f8
MK
359static ssize_t writeback_limit_enable_store(struct device *dev,
360 struct device_attribute *attr, const char *buf, size_t len)
361{
362 struct zram *zram = dev_to_zram(dev);
363 u64 val;
364 ssize_t ret = -EINVAL;
365
366 if (kstrtoull(buf, 10, &val))
367 return ret;
368
369 down_read(&zram->init_lock);
370 spin_lock(&zram->wb_limit_lock);
371 zram->wb_limit_enable = val;
372 spin_unlock(&zram->wb_limit_lock);
373 up_read(&zram->init_lock);
374 ret = len;
375
376 return ret;
377}
378
379static ssize_t writeback_limit_enable_show(struct device *dev,
380 struct device_attribute *attr, char *buf)
381{
382 bool val;
383 struct zram *zram = dev_to_zram(dev);
384
385 down_read(&zram->init_lock);
386 spin_lock(&zram->wb_limit_lock);
387 val = zram->wb_limit_enable;
388 spin_unlock(&zram->wb_limit_lock);
389 up_read(&zram->init_lock);
390
391 return scnprintf(buf, PAGE_SIZE, "%d\n", val);
392}
393
bb416d18
MK
394static ssize_t writeback_limit_store(struct device *dev,
395 struct device_attribute *attr, const char *buf, size_t len)
396{
397 struct zram *zram = dev_to_zram(dev);
398 u64 val;
399 ssize_t ret = -EINVAL;
400
401 if (kstrtoull(buf, 10, &val))
402 return ret;
403
404 down_read(&zram->init_lock);
1d69a3f8
MK
405 spin_lock(&zram->wb_limit_lock);
406 zram->bd_wb_limit = val;
407 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
408 up_read(&zram->init_lock);
409 ret = len;
410
411 return ret;
412}
413
414static ssize_t writeback_limit_show(struct device *dev,
415 struct device_attribute *attr, char *buf)
416{
417 u64 val;
418 struct zram *zram = dev_to_zram(dev);
419
420 down_read(&zram->init_lock);
1d69a3f8
MK
421 spin_lock(&zram->wb_limit_lock);
422 val = zram->bd_wb_limit;
423 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
424 up_read(&zram->init_lock);
425
426 return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
427}
428
013bf95a
MK
429static void reset_bdev(struct zram *zram)
430{
431 struct block_device *bdev;
432
7e529283 433 if (!zram->backing_dev)
013bf95a
MK
434 return;
435
436 bdev = zram->bdev;
013bf95a
MK
437 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
438 /* hope filp_close flush all of IO */
439 filp_close(zram->backing_dev, NULL);
440 zram->backing_dev = NULL;
013bf95a 441 zram->bdev = NULL;
a8b456d0 442 zram->disk->fops = &zram_devops;
1363d466
MK
443 kvfree(zram->bitmap);
444 zram->bitmap = NULL;
013bf95a
MK
445}
446
447static ssize_t backing_dev_show(struct device *dev,
448 struct device_attribute *attr, char *buf)
449{
f7daefe4 450 struct file *file;
013bf95a 451 struct zram *zram = dev_to_zram(dev);
013bf95a
MK
452 char *p;
453 ssize_t ret;
454
455 down_read(&zram->init_lock);
f7daefe4
C
456 file = zram->backing_dev;
457 if (!file) {
013bf95a
MK
458 memcpy(buf, "none\n", 5);
459 up_read(&zram->init_lock);
460 return 5;
461 }
462
463 p = file_path(file, buf, PAGE_SIZE - 1);
464 if (IS_ERR(p)) {
465 ret = PTR_ERR(p);
466 goto out;
467 }
468
469 ret = strlen(p);
470 memmove(buf, p, ret);
471 buf[ret++] = '\n';
472out:
473 up_read(&zram->init_lock);
474 return ret;
475}
476
477static ssize_t backing_dev_store(struct device *dev,
478 struct device_attribute *attr, const char *buf, size_t len)
479{
480 char *file_name;
c8bd134a 481 size_t sz;
013bf95a
MK
482 struct file *backing_dev = NULL;
483 struct inode *inode;
484 struct address_space *mapping;
ee763e21 485 unsigned int bitmap_sz;
1363d466 486 unsigned long nr_pages, *bitmap = NULL;
013bf95a
MK
487 struct block_device *bdev = NULL;
488 int err;
489 struct zram *zram = dev_to_zram(dev);
490
491 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
492 if (!file_name)
493 return -ENOMEM;
494
495 down_write(&zram->init_lock);
496 if (init_done(zram)) {
497 pr_info("Can't setup backing device for initialized device\n");
498 err = -EBUSY;
499 goto out;
500 }
501
c8bd134a
PK
502 strlcpy(file_name, buf, PATH_MAX);
503 /* ignore trailing newline */
504 sz = strlen(file_name);
505 if (sz > 0 && file_name[sz - 1] == '\n')
506 file_name[sz - 1] = 0x00;
013bf95a
MK
507
508 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
509 if (IS_ERR(backing_dev)) {
510 err = PTR_ERR(backing_dev);
511 backing_dev = NULL;
512 goto out;
513 }
514
515 mapping = backing_dev->f_mapping;
516 inode = mapping->host;
517
518 /* Support only block device in this moment */
519 if (!S_ISBLK(inode->i_mode)) {
520 err = -ENOTBLK;
521 goto out;
522 }
523
0fc66c9d
CH
524 bdev = blkdev_get_by_dev(inode->i_rdev,
525 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
526 if (IS_ERR(bdev)) {
527 err = PTR_ERR(bdev);
5547932d 528 bdev = NULL;
013bf95a 529 goto out;
5547932d 530 }
013bf95a 531
1363d466
MK
532 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
533 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
534 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
535 if (!bitmap) {
536 err = -ENOMEM;
537 goto out;
538 }
539
013bf95a
MK
540 reset_bdev(zram);
541
013bf95a
MK
542 zram->bdev = bdev;
543 zram->backing_dev = backing_dev;
1363d466
MK
544 zram->bitmap = bitmap;
545 zram->nr_pages = nr_pages;
4f7a7bea
MK
546 /*
547 * With writeback feature, zram does asynchronous IO so it's no longer
548 * synchronous device so let's remove synchronous io flag. Othewise,
549 * upper layer(e.g., swap) could wait IO completion rather than
550 * (submit and return), which will cause system sluggish.
551 * Furthermore, when the IO function returns(e.g., swap_readpage),
552 * upper layer expects IO was done so it could deallocate the page
553 * freely but in fact, IO is going on so finally could cause
554 * use-after-free when the IO is really done.
555 */
a8b456d0 556 zram->disk->fops = &zram_wb_devops;
013bf95a
MK
557 up_write(&zram->init_lock);
558
559 pr_info("setup backing device %s\n", file_name);
560 kfree(file_name);
561
562 return len;
563out:
294ed6b9 564 kvfree(bitmap);
1363d466 565
013bf95a
MK
566 if (bdev)
567 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
568
569 if (backing_dev)
570 filp_close(backing_dev, NULL);
571
572 up_write(&zram->init_lock);
573
574 kfree(file_name);
575
576 return err;
577}
578
7e529283 579static unsigned long alloc_block_bdev(struct zram *zram)
1363d466 580{
3c9959e0
MK
581 unsigned long blk_idx = 1;
582retry:
1363d466 583 /* skip 0 bit to confuse zram.handle = 0 */
3c9959e0
MK
584 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
585 if (blk_idx == zram->nr_pages)
1363d466 586 return 0;
1363d466 587
3c9959e0
MK
588 if (test_and_set_bit(blk_idx, zram->bitmap))
589 goto retry;
1363d466 590
23eddf39 591 atomic64_inc(&zram->stats.bd_count);
3c9959e0 592 return blk_idx;
1363d466
MK
593}
594
7e529283 595static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
1363d466
MK
596{
597 int was_set;
598
7e529283 599 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
1363d466 600 WARN_ON_ONCE(!was_set);
23eddf39 601 atomic64_dec(&zram->stats.bd_count);
1363d466
MK
602}
603
384bc41f 604static void zram_page_end_io(struct bio *bio)
db8ffbd4 605{
263663cd 606 struct page *page = bio_first_page_all(bio);
db8ffbd4
MK
607
608 page_endio(page, op_is_write(bio_op(bio)),
609 blk_status_to_errno(bio->bi_status));
610 bio_put(bio);
611}
612
8e654f8f
MK
613/*
614 * Returns 1 if the submission is successful.
615 */
616static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
617 unsigned long entry, struct bio *parent)
618{
619 struct bio *bio;
620
07888c66
CH
621 bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
622 GFP_NOIO);
8e654f8f
MK
623 if (!bio)
624 return -ENOMEM;
625
626 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
8e654f8f
MK
627 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
628 bio_put(bio);
629 return -EIO;
630 }
631
07888c66 632 if (!parent)
8e654f8f 633 bio->bi_end_io = zram_page_end_io;
07888c66 634 else
8e654f8f 635 bio_chain(bio, parent);
8e654f8f
MK
636
637 submit_bio(bio);
638 return 1;
639}
640
0d835962
MK
641#define PAGE_WB_SIG "page_index="
642
643#define PAGE_WRITEBACK 0
30226b69
BG
644#define HUGE_WRITEBACK (1<<0)
645#define IDLE_WRITEBACK (1<<1)
a939888e 646
0d835962 647
a939888e
MK
648static ssize_t writeback_store(struct device *dev,
649 struct device_attribute *attr, const char *buf, size_t len)
650{
651 struct zram *zram = dev_to_zram(dev);
652 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
0d835962 653 unsigned long index = 0;
a939888e
MK
654 struct bio bio;
655 struct bio_vec bio_vec;
656 struct page *page;
3b82a051 657 ssize_t ret = len;
57e0076e 658 int mode, err;
a939888e
MK
659 unsigned long blk_idx = 0;
660
0bc9f5d1 661 if (sysfs_streq(buf, "idle"))
a939888e 662 mode = IDLE_WRITEBACK;
0bc9f5d1 663 else if (sysfs_streq(buf, "huge"))
a939888e 664 mode = HUGE_WRITEBACK;
30226b69
BG
665 else if (sysfs_streq(buf, "huge_idle"))
666 mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
0d835962
MK
667 else {
668 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
669 return -EINVAL;
670
2766f182
MK
671 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
672 index >= nr_pages)
0d835962
MK
673 return -EINVAL;
674
675 nr_pages = 1;
676 mode = PAGE_WRITEBACK;
677 }
a939888e
MK
678
679 down_read(&zram->init_lock);
680 if (!init_done(zram)) {
681 ret = -EINVAL;
682 goto release_init_lock;
683 }
684
685 if (!zram->backing_dev) {
686 ret = -ENODEV;
687 goto release_init_lock;
688 }
689
690 page = alloc_page(GFP_KERNEL);
691 if (!page) {
692 ret = -ENOMEM;
693 goto release_init_lock;
694 }
695
2766f182 696 for (; nr_pages != 0; index++, nr_pages--) {
a939888e
MK
697 struct bio_vec bvec;
698
699 bvec.bv_page = page;
700 bvec.bv_len = PAGE_SIZE;
701 bvec.bv_offset = 0;
702
1d69a3f8
MK
703 spin_lock(&zram->wb_limit_lock);
704 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
705 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
706 ret = -EIO;
707 break;
708 }
1d69a3f8 709 spin_unlock(&zram->wb_limit_lock);
bb416d18 710
a939888e
MK
711 if (!blk_idx) {
712 blk_idx = alloc_block_bdev(zram);
713 if (!blk_idx) {
714 ret = -ENOSPC;
715 break;
716 }
717 }
718
719 zram_slot_lock(zram, index);
720 if (!zram_allocated(zram, index))
721 goto next;
722
723 if (zram_test_flag(zram, index, ZRAM_WB) ||
724 zram_test_flag(zram, index, ZRAM_SAME) ||
725 zram_test_flag(zram, index, ZRAM_UNDER_WB))
726 goto next;
727
30226b69 728 if (mode & IDLE_WRITEBACK &&
1d69a3f8
MK
729 !zram_test_flag(zram, index, ZRAM_IDLE))
730 goto next;
30226b69 731 if (mode & HUGE_WRITEBACK &&
1d69a3f8 732 !zram_test_flag(zram, index, ZRAM_HUGE))
a939888e
MK
733 goto next;
734 /*
735 * Clearing ZRAM_UNDER_WB is duty of caller.
736 * IOW, zram_free_page never clear it.
737 */
738 zram_set_flag(zram, index, ZRAM_UNDER_WB);
739 /* Need for hugepage writeback racing */
740 zram_set_flag(zram, index, ZRAM_IDLE);
741 zram_slot_unlock(zram, index);
742 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
743 zram_slot_lock(zram, index);
744 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
745 zram_clear_flag(zram, index, ZRAM_IDLE);
746 zram_slot_unlock(zram, index);
747 continue;
748 }
749
49add496
CH
750 bio_init(&bio, zram->bdev, &bio_vec, 1,
751 REQ_OP_WRITE | REQ_SYNC);
a939888e 752 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
a939888e
MK
753
754 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
755 bvec.bv_offset);
756 /*
757 * XXX: A single page IO would be inefficient for write
758 * but it would be not bad as starter.
759 */
57e0076e
MK
760 err = submit_bio_wait(&bio);
761 if (err) {
a939888e
MK
762 zram_slot_lock(zram, index);
763 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
764 zram_clear_flag(zram, index, ZRAM_IDLE);
765 zram_slot_unlock(zram, index);
57e0076e
MK
766 /*
767 * Return last IO error unless every IO were
768 * not suceeded.
769 */
770 ret = err;
a939888e
MK
771 continue;
772 }
773
23eddf39 774 atomic64_inc(&zram->stats.bd_writes);
a939888e
MK
775 /*
776 * We released zram_slot_lock so need to check if the slot was
777 * changed. If there is freeing for the slot, we can catch it
778 * easily by zram_allocated.
779 * A subtle case is the slot is freed/reallocated/marked as
780 * ZRAM_IDLE again. To close the race, idle_store doesn't
781 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
782 * Thus, we could close the race by checking ZRAM_IDLE bit.
783 */
784 zram_slot_lock(zram, index);
785 if (!zram_allocated(zram, index) ||
786 !zram_test_flag(zram, index, ZRAM_IDLE)) {
787 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
788 zram_clear_flag(zram, index, ZRAM_IDLE);
789 goto next;
790 }
791
792 zram_free_page(zram, index);
793 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
794 zram_set_flag(zram, index, ZRAM_WB);
795 zram_set_element(zram, index, blk_idx);
796 blk_idx = 0;
797 atomic64_inc(&zram->stats.pages_stored);
1d69a3f8
MK
798 spin_lock(&zram->wb_limit_lock);
799 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
800 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
801 spin_unlock(&zram->wb_limit_lock);
a939888e
MK
802next:
803 zram_slot_unlock(zram, index);
804 }
805
806 if (blk_idx)
807 free_block_bdev(zram, blk_idx);
a939888e
MK
808 __free_page(page);
809release_init_lock:
810 up_read(&zram->init_lock);
811
812 return ret;
813}
814
8e654f8f
MK
815struct zram_work {
816 struct work_struct work;
817 struct zram *zram;
818 unsigned long entry;
819 struct bio *bio;
e153abc0 820 struct bio_vec bvec;
8e654f8f
MK
821};
822
823#if PAGE_SIZE != 4096
824static void zram_sync_read(struct work_struct *work)
825{
8e654f8f
MK
826 struct zram_work *zw = container_of(work, struct zram_work, work);
827 struct zram *zram = zw->zram;
828 unsigned long entry = zw->entry;
829 struct bio *bio = zw->bio;
830
e153abc0 831 read_from_bdev_async(zram, &zw->bvec, entry, bio);
8e654f8f
MK
832}
833
834/*
c62b37d9
CH
835 * Block layer want one ->submit_bio to be active at a time, so if we use
836 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
837 * use a worker thread context.
8e654f8f
MK
838 */
839static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
840 unsigned long entry, struct bio *bio)
841{
842 struct zram_work work;
843
e153abc0 844 work.bvec = *bvec;
8e654f8f
MK
845 work.zram = zram;
846 work.entry = entry;
847 work.bio = bio;
848
849 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
850 queue_work(system_unbound_wq, &work.work);
851 flush_work(&work.work);
852 destroy_work_on_stack(&work.work);
853
854 return 1;
855}
856#else
857static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
858 unsigned long entry, struct bio *bio)
859{
860 WARN_ON(1);
861 return -EIO;
862}
863#endif
864
865static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
866 unsigned long entry, struct bio *parent, bool sync)
867{
23eddf39 868 atomic64_inc(&zram->stats.bd_reads);
8e654f8f
MK
869 if (sync)
870 return read_from_bdev_sync(zram, bvec, entry, parent);
871 else
872 return read_from_bdev_async(zram, bvec, entry, parent);
873}
013bf95a 874#else
013bf95a 875static inline void reset_bdev(struct zram *zram) {};
8e654f8f
MK
876static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
877 unsigned long entry, struct bio *parent, bool sync)
878{
879 return -EIO;
880}
7e529283
MK
881
882static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
013bf95a
MK
883#endif
884
c0265342
MK
885#ifdef CONFIG_ZRAM_MEMORY_TRACKING
886
887static struct dentry *zram_debugfs_root;
888
889static void zram_debugfs_create(void)
890{
891 zram_debugfs_root = debugfs_create_dir("zram", NULL);
892}
893
894static void zram_debugfs_destroy(void)
895{
896 debugfs_remove_recursive(zram_debugfs_root);
897}
898
899static void zram_accessed(struct zram *zram, u32 index)
900{
e82592c4 901 zram_clear_flag(zram, index, ZRAM_IDLE);
c0265342
MK
902 zram->table[index].ac_time = ktime_get_boottime();
903}
904
c0265342
MK
905static ssize_t read_block_state(struct file *file, char __user *buf,
906 size_t count, loff_t *ppos)
907{
908 char *kbuf;
909 ssize_t index, written = 0;
910 struct zram *zram = file->private_data;
911 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
912 struct timespec64 ts;
913
914 kbuf = kvmalloc(count, GFP_KERNEL);
915 if (!kbuf)
916 return -ENOMEM;
917
918 down_read(&zram->init_lock);
919 if (!init_done(zram)) {
920 up_read(&zram->init_lock);
921 kvfree(kbuf);
922 return -EINVAL;
923 }
924
925 for (index = *ppos; index < nr_pages; index++) {
926 int copied;
927
928 zram_slot_lock(zram, index);
929 if (!zram_allocated(zram, index))
930 goto next;
931
932 ts = ktime_to_timespec64(zram->table[index].ac_time);
933 copied = snprintf(kbuf + written, count,
e82592c4 934 "%12zd %12lld.%06lu %c%c%c%c\n",
c0265342
MK
935 index, (s64)ts.tv_sec,
936 ts.tv_nsec / NSEC_PER_USEC,
937 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
938 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
e82592c4
MK
939 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
940 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
c0265342 941
a88e03cf 942 if (count <= copied) {
c0265342
MK
943 zram_slot_unlock(zram, index);
944 break;
945 }
946 written += copied;
947 count -= copied;
948next:
949 zram_slot_unlock(zram, index);
950 *ppos += 1;
951 }
952
953 up_read(&zram->init_lock);
954 if (copy_to_user(buf, kbuf, written))
955 written = -EFAULT;
956 kvfree(kbuf);
957
958 return written;
959}
960
961static const struct file_operations proc_zram_block_state_op = {
962 .open = simple_open,
963 .read = read_block_state,
964 .llseek = default_llseek,
965};
966
967static void zram_debugfs_register(struct zram *zram)
968{
969 if (!zram_debugfs_root)
970 return;
971
972 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
973 zram_debugfs_root);
974 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
975 zram, &proc_zram_block_state_op);
976}
977
978static void zram_debugfs_unregister(struct zram *zram)
979{
980 debugfs_remove_recursive(zram->debugfs_dir);
981}
982#else
983static void zram_debugfs_create(void) {};
984static void zram_debugfs_destroy(void) {};
e82592c4
MK
985static void zram_accessed(struct zram *zram, u32 index)
986{
987 zram_clear_flag(zram, index, ZRAM_IDLE);
988};
c0265342
MK
989static void zram_debugfs_register(struct zram *zram) {};
990static void zram_debugfs_unregister(struct zram *zram) {};
991#endif
013bf95a 992
43209ea2
SS
993/*
994 * We switched to per-cpu streams and this attr is not needed anymore.
995 * However, we will keep it around for some time, because:
996 * a) we may revert per-cpu streams in the future
997 * b) it's visible to user space and we need to follow our 2 years
998 * retirement rule; but we already have a number of 'soon to be
999 * altered' attrs, so max_comp_streams need to wait for the next
1000 * layoff cycle.
1001 */
522698d7
SS
1002static ssize_t max_comp_streams_show(struct device *dev,
1003 struct device_attribute *attr, char *buf)
1004{
43209ea2 1005 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
522698d7
SS
1006}
1007
beca3ec7
SS
1008static ssize_t max_comp_streams_store(struct device *dev,
1009 struct device_attribute *attr, const char *buf, size_t len)
1010{
43209ea2 1011 return len;
beca3ec7
SS
1012}
1013
e46b8a03
SS
1014static ssize_t comp_algorithm_show(struct device *dev,
1015 struct device_attribute *attr, char *buf)
1016{
1017 size_t sz;
1018 struct zram *zram = dev_to_zram(dev);
1019
1020 down_read(&zram->init_lock);
1021 sz = zcomp_available_show(zram->compressor, buf);
1022 up_read(&zram->init_lock);
1023
1024 return sz;
1025}
1026
1027static ssize_t comp_algorithm_store(struct device *dev,
1028 struct device_attribute *attr, const char *buf, size_t len)
1029{
1030 struct zram *zram = dev_to_zram(dev);
f357e345 1031 char compressor[ARRAY_SIZE(zram->compressor)];
4bbacd51
SS
1032 size_t sz;
1033
415403be
SS
1034 strlcpy(compressor, buf, sizeof(compressor));
1035 /* ignore trailing newline */
1036 sz = strlen(compressor);
1037 if (sz > 0 && compressor[sz - 1] == '\n')
1038 compressor[sz - 1] = 0x00;
1039
1040 if (!zcomp_available_algorithm(compressor))
1d5b43bf
LH
1041 return -EINVAL;
1042
e46b8a03
SS
1043 down_write(&zram->init_lock);
1044 if (init_done(zram)) {
1045 up_write(&zram->init_lock);
1046 pr_info("Can't change algorithm for initialized device\n");
1047 return -EBUSY;
1048 }
4bbacd51 1049
f357e345 1050 strcpy(zram->compressor, compressor);
e46b8a03
SS
1051 up_write(&zram->init_lock);
1052 return len;
1053}
1054
522698d7
SS
1055static ssize_t compact_store(struct device *dev,
1056 struct device_attribute *attr, const char *buf, size_t len)
306b0c95 1057{
522698d7 1058 struct zram *zram = dev_to_zram(dev);
306b0c95 1059
522698d7
SS
1060 down_read(&zram->init_lock);
1061 if (!init_done(zram)) {
1062 up_read(&zram->init_lock);
1063 return -EINVAL;
1064 }
306b0c95 1065
beb6602c 1066 zs_compact(zram->mem_pool);
522698d7 1067 up_read(&zram->init_lock);
d2d5e762 1068
522698d7 1069 return len;
d2d5e762
WY
1070}
1071
522698d7
SS
1072static ssize_t io_stat_show(struct device *dev,
1073 struct device_attribute *attr, char *buf)
d2d5e762 1074{
522698d7
SS
1075 struct zram *zram = dev_to_zram(dev);
1076 ssize_t ret;
d2d5e762 1077
522698d7
SS
1078 down_read(&zram->init_lock);
1079 ret = scnprintf(buf, PAGE_SIZE,
1080 "%8llu %8llu %8llu %8llu\n",
1081 (u64)atomic64_read(&zram->stats.failed_reads),
1082 (u64)atomic64_read(&zram->stats.failed_writes),
1083 (u64)atomic64_read(&zram->stats.invalid_io),
1084 (u64)atomic64_read(&zram->stats.notify_free));
1085 up_read(&zram->init_lock);
306b0c95 1086
522698d7 1087 return ret;
9b3bb7ab
SS
1088}
1089
522698d7
SS
1090static ssize_t mm_stat_show(struct device *dev,
1091 struct device_attribute *attr, char *buf)
9b3bb7ab 1092{
522698d7 1093 struct zram *zram = dev_to_zram(dev);
7d3f3938 1094 struct zs_pool_stats pool_stats;
522698d7
SS
1095 u64 orig_size, mem_used = 0;
1096 long max_used;
1097 ssize_t ret;
a539c72a 1098
7d3f3938
SS
1099 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1100
522698d7 1101 down_read(&zram->init_lock);
7d3f3938 1102 if (init_done(zram)) {
beb6602c
MK
1103 mem_used = zs_get_total_pages(zram->mem_pool);
1104 zs_pool_stats(zram->mem_pool, &pool_stats);
7d3f3938 1105 }
9b3bb7ab 1106
522698d7
SS
1107 orig_size = atomic64_read(&zram->stats.pages_stored);
1108 max_used = atomic_long_read(&zram->stats.max_used_pages);
9b3bb7ab 1109
522698d7 1110 ret = scnprintf(buf, PAGE_SIZE,
194e28da 1111 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
522698d7
SS
1112 orig_size << PAGE_SHIFT,
1113 (u64)atomic64_read(&zram->stats.compr_data_size),
1114 mem_used << PAGE_SHIFT,
1115 zram->limit_pages << PAGE_SHIFT,
1116 max_used << PAGE_SHIFT,
8e19d540 1117 (u64)atomic64_read(&zram->stats.same_pages),
23959281 1118 atomic_long_read(&pool_stats.pages_compacted),
194e28da
MK
1119 (u64)atomic64_read(&zram->stats.huge_pages),
1120 (u64)atomic64_read(&zram->stats.huge_pages_since));
522698d7 1121 up_read(&zram->init_lock);
9b3bb7ab 1122
522698d7
SS
1123 return ret;
1124}
1125
23eddf39 1126#ifdef CONFIG_ZRAM_WRITEBACK
bb416d18 1127#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
23eddf39
MK
1128static ssize_t bd_stat_show(struct device *dev,
1129 struct device_attribute *attr, char *buf)
1130{
1131 struct zram *zram = dev_to_zram(dev);
1132 ssize_t ret;
1133
1134 down_read(&zram->init_lock);
1135 ret = scnprintf(buf, PAGE_SIZE,
1136 "%8llu %8llu %8llu\n",
bb416d18
MK
1137 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1138 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1139 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
23eddf39
MK
1140 up_read(&zram->init_lock);
1141
1142 return ret;
1143}
1144#endif
1145
623e47fc
SS
1146static ssize_t debug_stat_show(struct device *dev,
1147 struct device_attribute *attr, char *buf)
1148{
37887783 1149 int version = 1;
623e47fc
SS
1150 struct zram *zram = dev_to_zram(dev);
1151 ssize_t ret;
1152
1153 down_read(&zram->init_lock);
1154 ret = scnprintf(buf, PAGE_SIZE,
37887783 1155 "version: %d\n%8llu %8llu\n",
623e47fc 1156 version,
37887783 1157 (u64)atomic64_read(&zram->stats.writestall),
3c9959e0 1158 (u64)atomic64_read(&zram->stats.miss_free));
623e47fc
SS
1159 up_read(&zram->init_lock);
1160
1161 return ret;
1162}
1163
522698d7
SS
1164static DEVICE_ATTR_RO(io_stat);
1165static DEVICE_ATTR_RO(mm_stat);
23eddf39
MK
1166#ifdef CONFIG_ZRAM_WRITEBACK
1167static DEVICE_ATTR_RO(bd_stat);
1168#endif
623e47fc 1169static DEVICE_ATTR_RO(debug_stat);
522698d7 1170
beb6602c 1171static void zram_meta_free(struct zram *zram, u64 disksize)
522698d7
SS
1172{
1173 size_t num_pages = disksize >> PAGE_SHIFT;
1174 size_t index;
1fec1172
GM
1175
1176 /* Free all pages that are still in this zram device */
302128dc
MK
1177 for (index = 0; index < num_pages; index++)
1178 zram_free_page(zram, index);
1fec1172 1179
beb6602c
MK
1180 zs_destroy_pool(zram->mem_pool);
1181 vfree(zram->table);
9b3bb7ab
SS
1182}
1183
beb6602c 1184static bool zram_meta_alloc(struct zram *zram, u64 disksize)
9b3bb7ab
SS
1185{
1186 size_t num_pages;
9b3bb7ab 1187
9b3bb7ab 1188 num_pages = disksize >> PAGE_SHIFT;
fad953ce 1189 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
beb6602c
MK
1190 if (!zram->table)
1191 return false;
9b3bb7ab 1192
beb6602c
MK
1193 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1194 if (!zram->mem_pool) {
1195 vfree(zram->table);
1196 return false;
9b3bb7ab
SS
1197 }
1198
60f5921a
SS
1199 if (!huge_class_size)
1200 huge_class_size = zs_huge_class_size(zram->mem_pool);
beb6602c 1201 return true;
9b3bb7ab
SS
1202}
1203
d2d5e762
WY
1204/*
1205 * To protect concurrent access to the same index entry,
1206 * caller should hold this table index entry's bit_spinlock to
1207 * indicate this index entry is accessing.
1208 */
f1e3cfff 1209static void zram_free_page(struct zram *zram, size_t index)
306b0c95 1210{
db8ffbd4
MK
1211 unsigned long handle;
1212
7e529283
MK
1213#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1214 zram->table[index].ac_time = 0;
1215#endif
e82592c4
MK
1216 if (zram_test_flag(zram, index, ZRAM_IDLE))
1217 zram_clear_flag(zram, index, ZRAM_IDLE);
1218
89e85bce
MK
1219 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1220 zram_clear_flag(zram, index, ZRAM_HUGE);
1221 atomic64_dec(&zram->stats.huge_pages);
1222 }
1223
7e529283
MK
1224 if (zram_test_flag(zram, index, ZRAM_WB)) {
1225 zram_clear_flag(zram, index, ZRAM_WB);
1226 free_block_bdev(zram, zram_get_element(zram, index));
1227 goto out;
db8ffbd4 1228 }
306b0c95 1229
8e19d540 1230 /*
1231 * No memory is allocated for same element filled pages.
1232 * Simply clear same page flag.
1233 */
beb6602c
MK
1234 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1235 zram_clear_flag(zram, index, ZRAM_SAME);
8e19d540 1236 atomic64_dec(&zram->stats.same_pages);
7e529283 1237 goto out;
306b0c95
NG
1238 }
1239
db8ffbd4 1240 handle = zram_get_handle(zram, index);
8e19d540 1241 if (!handle)
1242 return;
1243
beb6602c 1244 zs_free(zram->mem_pool, handle);
306b0c95 1245
beb6602c 1246 atomic64_sub(zram_get_obj_size(zram, index),
d2d5e762 1247 &zram->stats.compr_data_size);
7e529283 1248out:
90a7806e 1249 atomic64_dec(&zram->stats.pages_stored);
643ae61d 1250 zram_set_handle(zram, index, 0);
beb6602c 1251 zram_set_obj_size(zram, index, 0);
a939888e
MK
1252 WARN_ON_ONCE(zram->table[index].flags &
1253 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
306b0c95
NG
1254}
1255
8e654f8f
MK
1256static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1257 struct bio *bio, bool partial_io)
306b0c95 1258{
0669d2b2 1259 struct zcomp_strm *zstrm;
92967471 1260 unsigned long handle;
ebaf9ab5 1261 unsigned int size;
1f7319c7 1262 void *src, *dst;
0669d2b2 1263 int ret;
1f7319c7 1264
7e529283
MK
1265 zram_slot_lock(zram, index);
1266 if (zram_test_flag(zram, index, ZRAM_WB)) {
1267 struct bio_vec bvec;
8e654f8f 1268
8e654f8f 1269 zram_slot_unlock(zram, index);
7e529283
MK
1270
1271 bvec.bv_page = page;
1272 bvec.bv_len = PAGE_SIZE;
1273 bvec.bv_offset = 0;
1274 return read_from_bdev(zram, &bvec,
1275 zram_get_element(zram, index),
1276 bio, partial_io);
8e654f8f
MK
1277 }
1278
643ae61d 1279 handle = zram_get_handle(zram, index);
ae94264e
MK
1280 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1281 unsigned long value;
1282 void *mem;
1283
1284 value = handle ? zram_get_element(zram, index) : 0;
1285 mem = kmap_atomic(page);
1286 zram_fill_page(mem, PAGE_SIZE, value);
1287 kunmap_atomic(mem);
1288 zram_slot_unlock(zram, index);
1289 return 0;
1290 }
1291
beb6602c 1292 size = zram_get_obj_size(zram, index);
306b0c95 1293
0669d2b2
PZ
1294 if (size != PAGE_SIZE)
1295 zstrm = zcomp_stream_get(zram->comp);
1296
beb6602c 1297 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
ebaf9ab5 1298 if (size == PAGE_SIZE) {
1f7319c7
MK
1299 dst = kmap_atomic(page);
1300 memcpy(dst, src, PAGE_SIZE);
1301 kunmap_atomic(dst);
1302 ret = 0;
ebaf9ab5 1303 } else {
1f7319c7
MK
1304 dst = kmap_atomic(page);
1305 ret = zcomp_decompress(zstrm, src, size, dst);
1306 kunmap_atomic(dst);
ebaf9ab5
SS
1307 zcomp_stream_put(zram->comp);
1308 }
beb6602c 1309 zs_unmap_object(zram->mem_pool, handle);
86c49814 1310 zram_slot_unlock(zram, index);
a1dd52af 1311
8c921b2b 1312 /* Should NEVER happen. Return bio error if it does. */
4e79603b 1313 if (WARN_ON(ret))
8c921b2b 1314 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
306b0c95 1315
1f7319c7 1316 return ret;
306b0c95
NG
1317}
1318
37b51fdd 1319static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
8e654f8f 1320 u32 index, int offset, struct bio *bio)
924bd88d
JM
1321{
1322 int ret;
37b51fdd 1323 struct page *page;
37b51fdd 1324
1f7319c7
MK
1325 page = bvec->bv_page;
1326 if (is_partial_io(bvec)) {
1327 /* Use a temporary buffer to decompress the page */
1328 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1329 if (!page)
1330 return -ENOMEM;
924bd88d
JM
1331 }
1332
8e654f8f 1333 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1f7319c7
MK
1334 if (unlikely(ret))
1335 goto out;
7e5a5104 1336
1f7319c7 1337 if (is_partial_io(bvec)) {
1f7319c7 1338 void *src = kmap_atomic(page);
37b51fdd 1339
b3bd0a8a 1340 memcpy_to_bvec(bvec, src + offset);
1f7319c7 1341 kunmap_atomic(src);
37b51fdd 1342 }
1f7319c7 1343out:
37b51fdd 1344 if (is_partial_io(bvec))
1f7319c7 1345 __free_page(page);
37b51fdd 1346
37b51fdd 1347 return ret;
924bd88d
JM
1348}
1349
db8ffbd4
MK
1350static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1351 u32 index, struct bio *bio)
306b0c95 1352{
ae85a807 1353 int ret = 0;
1f7319c7 1354 unsigned long alloced_pages;
37887783 1355 unsigned long handle = -ENOMEM;
97ec7c8b
MK
1356 unsigned int comp_len = 0;
1357 void *src, *dst, *mem;
1358 struct zcomp_strm *zstrm;
1359 struct page *page = bvec->bv_page;
1360 unsigned long element = 0;
1361 enum zram_pageflags flags = 0;
1362
1363 mem = kmap_atomic(page);
1364 if (page_same_filled(mem, &element)) {
1365 kunmap_atomic(mem);
1366 /* Free memory associated with this sector now. */
1367 flags = ZRAM_SAME;
1368 atomic64_inc(&zram->stats.same_pages);
1369 goto out;
1370 }
1371 kunmap_atomic(mem);
924bd88d 1372
37887783 1373compress_again:
97ec7c8b 1374 zstrm = zcomp_stream_get(zram->comp);
1f7319c7 1375 src = kmap_atomic(page);
97ec7c8b 1376 ret = zcomp_compress(zstrm, src, &comp_len);
1f7319c7 1377 kunmap_atomic(src);
306b0c95 1378
b7ca232e 1379 if (unlikely(ret)) {
97ec7c8b 1380 zcomp_stream_put(zram->comp);
8c921b2b 1381 pr_err("Compression failed! err=%d\n", ret);
37887783 1382 zs_free(zram->mem_pool, handle);
1f7319c7 1383 return ret;
8c921b2b 1384 }
da9556a2 1385
a939888e 1386 if (comp_len >= huge_class_size)
89e85bce 1387 comp_len = PAGE_SIZE;
37887783
JS
1388 /*
1389 * handle allocation has 2 paths:
1390 * a) fast path is executed with preemption disabled (for
1391 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1392 * since we can't sleep;
1393 * b) slow path enables preemption and attempts to allocate
1394 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1395 * put per-cpu compression stream and, thus, to re-do
1396 * the compression once handle is allocated.
1397 *
1398 * if we have a 'non-null' handle here then we are coming
1399 * from the slow path and handle has already been allocated.
1400 */
1401 if (IS_ERR((void *)handle))
1402 handle = zs_malloc(zram->mem_pool, comp_len,
1403 __GFP_KSWAPD_RECLAIM |
1404 __GFP_NOWARN |
1405 __GFP_HIGHMEM |
1406 __GFP_MOVABLE);
c7e6f17b 1407 if (IS_ERR((void *)handle)) {
2aea8493 1408 zcomp_stream_put(zram->comp);
37887783
JS
1409 atomic64_inc(&zram->stats.writestall);
1410 handle = zs_malloc(zram->mem_pool, comp_len,
1411 GFP_NOIO | __GFP_HIGHMEM |
1412 __GFP_MOVABLE);
641608f3
AR
1413 if (IS_ERR((void *)handle))
1414 return PTR_ERR((void *)handle);
1415
1416 if (comp_len != PAGE_SIZE)
37887783 1417 goto compress_again;
641608f3
AR
1418 /*
1419 * If the page is not compressible, you need to acquire the lock and
1420 * execute the code below. The zcomp_stream_get() call is needed to
1421 * disable the cpu hotplug and grab the zstrm buffer back.
1422 * It is necessary that the dereferencing of the zstrm variable below
1423 * occurs correctly.
1424 */
1425 zstrm = zcomp_stream_get(zram->comp);
8c921b2b 1426 }
9ada9da9 1427
beb6602c 1428 alloced_pages = zs_get_total_pages(zram->mem_pool);
12372755
SS
1429 update_used_max(zram, alloced_pages);
1430
461a8eee 1431 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
97ec7c8b 1432 zcomp_stream_put(zram->comp);
beb6602c 1433 zs_free(zram->mem_pool, handle);
1f7319c7
MK
1434 return -ENOMEM;
1435 }
1436
beb6602c 1437 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1f7319c7
MK
1438
1439 src = zstrm->buffer;
1440 if (comp_len == PAGE_SIZE)
397c6066 1441 src = kmap_atomic(page);
1f7319c7
MK
1442 memcpy(dst, src, comp_len);
1443 if (comp_len == PAGE_SIZE)
397c6066 1444 kunmap_atomic(src);
306b0c95 1445
2aea8493 1446 zcomp_stream_put(zram->comp);
beb6602c 1447 zs_unmap_object(zram->mem_pool, handle);
4ebbe7f7
MK
1448 atomic64_add(comp_len, &zram->stats.compr_data_size);
1449out:
f40ac2ae
SS
1450 /*
1451 * Free memory associated with this sector
1452 * before overwriting unused sectors.
1453 */
86c49814 1454 zram_slot_lock(zram, index);
f40ac2ae 1455 zram_free_page(zram, index);
db8ffbd4 1456
89e85bce
MK
1457 if (comp_len == PAGE_SIZE) {
1458 zram_set_flag(zram, index, ZRAM_HUGE);
1459 atomic64_inc(&zram->stats.huge_pages);
194e28da 1460 atomic64_inc(&zram->stats.huge_pages_since);
89e85bce
MK
1461 }
1462
db8ffbd4
MK
1463 if (flags) {
1464 zram_set_flag(zram, index, flags);
4ebbe7f7 1465 zram_set_element(zram, index, element);
db8ffbd4 1466 } else {
4ebbe7f7
MK
1467 zram_set_handle(zram, index, handle);
1468 zram_set_obj_size(zram, index, comp_len);
1469 }
86c49814 1470 zram_slot_unlock(zram, index);
306b0c95 1471
8c921b2b 1472 /* Update stats */
90a7806e 1473 atomic64_inc(&zram->stats.pages_stored);
ae85a807 1474 return ret;
1f7319c7
MK
1475}
1476
1477static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
db8ffbd4 1478 u32 index, int offset, struct bio *bio)
1f7319c7
MK
1479{
1480 int ret;
1481 struct page *page = NULL;
1f7319c7
MK
1482 struct bio_vec vec;
1483
1484 vec = *bvec;
1485 if (is_partial_io(bvec)) {
1486 void *dst;
1487 /*
1488 * This is a partial IO. We need to read the full page
1489 * before to write the changes.
1490 */
1491 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1492 if (!page)
1493 return -ENOMEM;
1494
8e654f8f 1495 ret = __zram_bvec_read(zram, page, index, bio, true);
1f7319c7
MK
1496 if (ret)
1497 goto out;
1498
1f7319c7 1499 dst = kmap_atomic(page);
bd3d3203 1500 memcpy_from_bvec(dst + offset, bvec);
1f7319c7 1501 kunmap_atomic(dst);
1f7319c7
MK
1502
1503 vec.bv_page = page;
1504 vec.bv_len = PAGE_SIZE;
1505 vec.bv_offset = 0;
1506 }
1507
db8ffbd4 1508 ret = __zram_bvec_write(zram, &vec, index, bio);
924bd88d 1509out:
397c6066 1510 if (is_partial_io(bvec))
1f7319c7 1511 __free_page(page);
924bd88d 1512 return ret;
8c921b2b
JM
1513}
1514
f4659d8e
JK
1515/*
1516 * zram_bio_discard - handler on discard request
1517 * @index: physical block index in PAGE_SIZE units
1518 * @offset: byte offset within physical block
1519 */
1520static void zram_bio_discard(struct zram *zram, u32 index,
1521 int offset, struct bio *bio)
1522{
1523 size_t n = bio->bi_iter.bi_size;
1524
1525 /*
1526 * zram manages data in physical block size units. Because logical block
1527 * size isn't identical with physical block size on some arch, we
1528 * could get a discard request pointing to a specific offset within a
1529 * certain physical block. Although we can handle this request by
1530 * reading that physiclal block and decompressing and partially zeroing
1531 * and re-compressing and then re-storing it, this isn't reasonable
1532 * because our intent with a discard request is to save memory. So
1533 * skipping this logical block is appropriate here.
1534 */
1535 if (offset) {
38515c73 1536 if (n <= (PAGE_SIZE - offset))
f4659d8e
JK
1537 return;
1538
38515c73 1539 n -= (PAGE_SIZE - offset);
f4659d8e
JK
1540 index++;
1541 }
1542
1543 while (n >= PAGE_SIZE) {
86c49814 1544 zram_slot_lock(zram, index);
f4659d8e 1545 zram_free_page(zram, index);
86c49814 1546 zram_slot_unlock(zram, index);
015254da 1547 atomic64_inc(&zram->stats.notify_free);
f4659d8e
JK
1548 index++;
1549 n -= PAGE_SIZE;
1550 }
1551}
1552
ae85a807
MK
1553/*
1554 * Returns errno if it has some problem. Otherwise return 0 or 1.
1555 * Returns 0 if IO request was done synchronously
1556 * Returns 1 if IO request was successfully submitted.
1557 */
522698d7 1558static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
bc0421ea 1559 int offset, enum req_op op, struct bio *bio)
9b3bb7ab
SS
1560{
1561 int ret;
9b3bb7ab 1562
3f289dcb 1563 if (!op_is_write(op)) {
522698d7 1564 atomic64_inc(&zram->stats.num_reads);
8e654f8f 1565 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1f7319c7 1566 flush_dcache_page(bvec->bv_page);
522698d7
SS
1567 } else {
1568 atomic64_inc(&zram->stats.num_writes);
db8ffbd4 1569 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1b672224 1570 }
9b3bb7ab 1571
d7eac6b6
MK
1572 zram_slot_lock(zram, index);
1573 zram_accessed(zram, index);
1574 zram_slot_unlock(zram, index);
1575
ae85a807 1576 if (unlikely(ret < 0)) {
3f289dcb 1577 if (!op_is_write(op))
522698d7
SS
1578 atomic64_inc(&zram->stats.failed_reads);
1579 else
1580 atomic64_inc(&zram->stats.failed_writes);
1b672224 1581 }
9b3bb7ab 1582
1b672224 1583 return ret;
8c921b2b
JM
1584}
1585
be257c61 1586static void __zram_make_request(struct zram *zram, struct bio *bio)
8c921b2b 1587{
abf54548 1588 int offset;
8c921b2b 1589 u32 index;
7988613b
KO
1590 struct bio_vec bvec;
1591 struct bvec_iter iter;
d7614e44 1592 unsigned long start_time;
8c921b2b 1593
4f024f37
KO
1594 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1595 offset = (bio->bi_iter.bi_sector &
1596 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
8c921b2b 1597
31edeacd
CH
1598 switch (bio_op(bio)) {
1599 case REQ_OP_DISCARD:
1600 case REQ_OP_WRITE_ZEROES:
f4659d8e 1601 zram_bio_discard(zram, index, offset, bio);
4246a0b6 1602 bio_endio(bio);
f4659d8e 1603 return;
31edeacd
CH
1604 default:
1605 break;
f4659d8e
JK
1606 }
1607
d7614e44 1608 start_time = bio_start_io_acct(bio);
7988613b 1609 bio_for_each_segment(bvec, bio, iter) {
e86942c7
MK
1610 struct bio_vec bv = bvec;
1611 unsigned int unwritten = bvec.bv_len;
924bd88d 1612
e86942c7
MK
1613 do {
1614 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1615 unwritten);
abf54548 1616 if (zram_bvec_rw(zram, &bv, index, offset,
d7614e44
CH
1617 bio_op(bio), bio) < 0) {
1618 bio->bi_status = BLK_STS_IOERR;
1619 break;
1620 }
924bd88d 1621
e86942c7
MK
1622 bv.bv_offset += bv.bv_len;
1623 unwritten -= bv.bv_len;
924bd88d 1624
e86942c7
MK
1625 update_position(&index, &offset, &bv);
1626 } while (unwritten);
a1dd52af 1627 }
d7614e44 1628 bio_end_io_acct(bio, start_time);
4246a0b6 1629 bio_endio(bio);
306b0c95
NG
1630}
1631
306b0c95 1632/*
f1e3cfff 1633 * Handler function for all zram I/O requests.
306b0c95 1634 */
3e08773c 1635static void zram_submit_bio(struct bio *bio)
306b0c95 1636{
309dca30 1637 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
306b0c95 1638
54850e73 1639 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1640 bio->bi_iter.bi_size)) {
da5cc7d3 1641 atomic64_inc(&zram->stats.invalid_io);
3e08773c
CH
1642 bio_io_error(bio);
1643 return;
6642a67c
JM
1644 }
1645
be257c61 1646 __zram_make_request(zram, bio);
306b0c95
NG
1647}
1648
2ccbec05
NG
1649static void zram_slot_free_notify(struct block_device *bdev,
1650 unsigned long index)
107c161b 1651{
f1e3cfff 1652 struct zram *zram;
107c161b 1653
f1e3cfff 1654 zram = bdev->bd_disk->private_data;
a0c516cb 1655
3c9959e0
MK
1656 atomic64_inc(&zram->stats.notify_free);
1657 if (!zram_slot_trylock(zram, index)) {
1658 atomic64_inc(&zram->stats.miss_free);
1659 return;
1660 }
1661
f614a9f4 1662 zram_free_page(zram, index);
86c49814 1663 zram_slot_unlock(zram, index);
107c161b
NG
1664}
1665
8c7f0102 1666static int zram_rw_page(struct block_device *bdev, sector_t sector,
86947df3 1667 struct page *page, enum req_op op)
8c7f0102 1668{
ae85a807 1669 int offset, ret;
8c7f0102 1670 u32 index;
1671 struct zram *zram;
1672 struct bio_vec bv;
d7614e44 1673 unsigned long start_time;
8c7f0102 1674
98cc093c
HY
1675 if (PageTransHuge(page))
1676 return -ENOTSUPP;
8c7f0102 1677 zram = bdev->bd_disk->private_data;
08eee69f 1678
8c7f0102 1679 if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1680 atomic64_inc(&zram->stats.invalid_io);
ae85a807 1681 ret = -EINVAL;
a09759ac 1682 goto out;
8c7f0102 1683 }
1684
1685 index = sector >> SECTORS_PER_PAGE_SHIFT;
4ca82dab 1686 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
8c7f0102 1687
1688 bv.bv_page = page;
1689 bv.bv_len = PAGE_SIZE;
1690 bv.bv_offset = 0;
1691
5f0614a5
ML
1692 start_time = bdev_start_io_acct(bdev->bd_disk->part0,
1693 SECTORS_PER_PAGE, op, jiffies);
3f289dcb 1694 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
5f0614a5 1695 bdev_end_io_acct(bdev->bd_disk->part0, op, start_time);
08eee69f 1696out:
8c7f0102 1697 /*
1698 * If I/O fails, just return error(ie, non-zero) without
1699 * calling page_endio.
1700 * It causes resubmit the I/O with bio request by upper functions
1701 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1702 * bio->bi_end_io does things to handle the error
1703 * (e.g., SetPageError, set_page_dirty and extra works).
1704 */
ae85a807
MK
1705 if (unlikely(ret < 0))
1706 return ret;
1707
1708 switch (ret) {
1709 case 0:
3f289dcb 1710 page_endio(page, op_is_write(op), 0);
ae85a807
MK
1711 break;
1712 case 1:
1713 ret = 0;
1714 break;
1715 default:
1716 WARN_ON(1);
1717 }
1718 return ret;
8c7f0102 1719}
1720
522698d7
SS
1721static void zram_reset_device(struct zram *zram)
1722{
522698d7 1723 down_write(&zram->init_lock);
9b3bb7ab 1724
522698d7
SS
1725 zram->limit_pages = 0;
1726
1727 if (!init_done(zram)) {
1728 up_write(&zram->init_lock);
1729 return;
1730 }
1731
6e017a39 1732 set_capacity_and_notify(zram->disk, 0);
8446fe92 1733 part_stat_set_all(zram->disk->part0, 0);
522698d7 1734
522698d7 1735 /* I/O operation under all of CPU are done so let's free */
6d2453c3
SS
1736 zram_meta_free(zram, zram->disksize);
1737 zram->disksize = 0;
302128dc 1738 memset(&zram->stats, 0, sizeof(zram->stats));
6d2453c3
SS
1739 zcomp_destroy(zram->comp);
1740 zram->comp = NULL;
013bf95a 1741 reset_bdev(zram);
6f163779
ML
1742
1743 up_write(&zram->init_lock);
522698d7
SS
1744}
1745
1746static ssize_t disksize_store(struct device *dev,
1747 struct device_attribute *attr, const char *buf, size_t len)
2f6a3bed 1748{
522698d7
SS
1749 u64 disksize;
1750 struct zcomp *comp;
2f6a3bed 1751 struct zram *zram = dev_to_zram(dev);
522698d7 1752 int err;
2f6a3bed 1753
522698d7
SS
1754 disksize = memparse(buf, NULL);
1755 if (!disksize)
1756 return -EINVAL;
2f6a3bed 1757
beb6602c
MK
1758 down_write(&zram->init_lock);
1759 if (init_done(zram)) {
1760 pr_info("Cannot change disksize for initialized device\n");
1761 err = -EBUSY;
1762 goto out_unlock;
1763 }
1764
522698d7 1765 disksize = PAGE_ALIGN(disksize);
beb6602c
MK
1766 if (!zram_meta_alloc(zram, disksize)) {
1767 err = -ENOMEM;
1768 goto out_unlock;
1769 }
522698d7 1770
da9556a2 1771 comp = zcomp_create(zram->compressor);
522698d7 1772 if (IS_ERR(comp)) {
70864969 1773 pr_err("Cannot initialise %s compressing backend\n",
522698d7
SS
1774 zram->compressor);
1775 err = PTR_ERR(comp);
1776 goto out_free_meta;
1777 }
1778
522698d7
SS
1779 zram->comp = comp;
1780 zram->disksize = disksize;
6e017a39 1781 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
e7ccfc4c 1782 up_write(&zram->init_lock);
522698d7
SS
1783
1784 return len;
1785
522698d7 1786out_free_meta:
beb6602c
MK
1787 zram_meta_free(zram, disksize);
1788out_unlock:
1789 up_write(&zram->init_lock);
522698d7 1790 return err;
2f6a3bed
SS
1791}
1792
522698d7
SS
1793static ssize_t reset_store(struct device *dev,
1794 struct device_attribute *attr, const char *buf, size_t len)
4f2109f6 1795{
522698d7
SS
1796 int ret;
1797 unsigned short do_reset;
1798 struct zram *zram;
d666e20e 1799 struct gendisk *disk;
4f2109f6 1800
f405c445
SS
1801 ret = kstrtou16(buf, 10, &do_reset);
1802 if (ret)
1803 return ret;
1804
1805 if (!do_reset)
1806 return -EINVAL;
1807
522698d7 1808 zram = dev_to_zram(dev);
d666e20e 1809 disk = zram->disk;
4f2109f6 1810
d666e20e 1811 mutex_lock(&disk->open_mutex);
f405c445 1812 /* Do not reset an active device or claimed device */
dbdc1be3 1813 if (disk_openers(disk) || zram->claim) {
d666e20e 1814 mutex_unlock(&disk->open_mutex);
f405c445 1815 return -EBUSY;
522698d7
SS
1816 }
1817
f405c445
SS
1818 /* From now on, anyone can't open /dev/zram[0-9] */
1819 zram->claim = true;
d666e20e 1820 mutex_unlock(&disk->open_mutex);
522698d7 1821
f405c445 1822 /* Make sure all the pending I/O are finished */
d666e20e 1823 sync_blockdev(disk->part0);
522698d7 1824 zram_reset_device(zram);
522698d7 1825
d666e20e 1826 mutex_lock(&disk->open_mutex);
f405c445 1827 zram->claim = false;
d666e20e 1828 mutex_unlock(&disk->open_mutex);
f405c445 1829
522698d7 1830 return len;
f405c445
SS
1831}
1832
1833static int zram_open(struct block_device *bdev, fmode_t mode)
1834{
1835 int ret = 0;
1836 struct zram *zram;
1837
a8698707 1838 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
f405c445
SS
1839
1840 zram = bdev->bd_disk->private_data;
1841 /* zram was claimed to reset so open request fails */
1842 if (zram->claim)
1843 ret = -EBUSY;
4f2109f6
SS
1844
1845 return ret;
1846}
1847
522698d7 1848static const struct block_device_operations zram_devops = {
f405c445 1849 .open = zram_open,
c62b37d9 1850 .submit_bio = zram_submit_bio,
522698d7
SS
1851 .swap_slot_free_notify = zram_slot_free_notify,
1852 .rw_page = zram_rw_page,
1853 .owner = THIS_MODULE
1854};
1855
d422f401 1856#ifdef CONFIG_ZRAM_WRITEBACK
a8b456d0
CH
1857static const struct block_device_operations zram_wb_devops = {
1858 .open = zram_open,
1859 .submit_bio = zram_submit_bio,
1860 .swap_slot_free_notify = zram_slot_free_notify,
1861 .owner = THIS_MODULE
1862};
d422f401 1863#endif
a8b456d0 1864
522698d7
SS
1865static DEVICE_ATTR_WO(compact);
1866static DEVICE_ATTR_RW(disksize);
1867static DEVICE_ATTR_RO(initstate);
1868static DEVICE_ATTR_WO(reset);
c87d1655
SS
1869static DEVICE_ATTR_WO(mem_limit);
1870static DEVICE_ATTR_WO(mem_used_max);
e82592c4 1871static DEVICE_ATTR_WO(idle);
522698d7
SS
1872static DEVICE_ATTR_RW(max_comp_streams);
1873static DEVICE_ATTR_RW(comp_algorithm);
013bf95a
MK
1874#ifdef CONFIG_ZRAM_WRITEBACK
1875static DEVICE_ATTR_RW(backing_dev);
a939888e 1876static DEVICE_ATTR_WO(writeback);
bb416d18 1877static DEVICE_ATTR_RW(writeback_limit);
1d69a3f8 1878static DEVICE_ATTR_RW(writeback_limit_enable);
013bf95a 1879#endif
a68eb3b6 1880
9b3bb7ab
SS
1881static struct attribute *zram_disk_attrs[] = {
1882 &dev_attr_disksize.attr,
1883 &dev_attr_initstate.attr,
1884 &dev_attr_reset.attr,
99ebbd30 1885 &dev_attr_compact.attr,
9ada9da9 1886 &dev_attr_mem_limit.attr,
461a8eee 1887 &dev_attr_mem_used_max.attr,
e82592c4 1888 &dev_attr_idle.attr,
beca3ec7 1889 &dev_attr_max_comp_streams.attr,
e46b8a03 1890 &dev_attr_comp_algorithm.attr,
013bf95a
MK
1891#ifdef CONFIG_ZRAM_WRITEBACK
1892 &dev_attr_backing_dev.attr,
a939888e 1893 &dev_attr_writeback.attr,
bb416d18 1894 &dev_attr_writeback_limit.attr,
1d69a3f8 1895 &dev_attr_writeback_limit_enable.attr,
013bf95a 1896#endif
2f6a3bed 1897 &dev_attr_io_stat.attr,
4f2109f6 1898 &dev_attr_mm_stat.attr,
23eddf39
MK
1899#ifdef CONFIG_ZRAM_WRITEBACK
1900 &dev_attr_bd_stat.attr,
1901#endif
623e47fc 1902 &dev_attr_debug_stat.attr,
9b3bb7ab
SS
1903 NULL,
1904};
1905
7f0d2672 1906ATTRIBUTE_GROUPS(zram_disk);
98af4d4d 1907
92ff1528
SS
1908/*
1909 * Allocate and initialize new zram device. the function returns
1910 * '>= 0' device_id upon success, and negative value otherwise.
1911 */
1912static int zram_add(void)
306b0c95 1913{
85508ec6 1914 struct zram *zram;
92ff1528 1915 int ret, device_id;
85508ec6
SS
1916
1917 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1918 if (!zram)
1919 return -ENOMEM;
1920
92ff1528 1921 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
85508ec6
SS
1922 if (ret < 0)
1923 goto out_free_dev;
92ff1528 1924 device_id = ret;
de1a21a0 1925
0900beae 1926 init_rwsem(&zram->init_lock);
1d69a3f8
MK
1927#ifdef CONFIG_ZRAM_WRITEBACK
1928 spin_lock_init(&zram->wb_limit_lock);
1929#endif
306b0c95 1930
85508ec6 1931 /* gendisk structure */
7681750b 1932 zram->disk = blk_alloc_disk(NUMA_NO_NODE);
f1e3cfff 1933 if (!zram->disk) {
70864969 1934 pr_err("Error allocating disk structure for device %d\n",
306b0c95 1935 device_id);
201c7b72 1936 ret = -ENOMEM;
7681750b 1937 goto out_free_idr;
306b0c95
NG
1938 }
1939
f1e3cfff
NG
1940 zram->disk->major = zram_major;
1941 zram->disk->first_minor = device_id;
7681750b 1942 zram->disk->minors = 1;
1ebe2e5f 1943 zram->disk->flags |= GENHD_FL_NO_PART;
f1e3cfff 1944 zram->disk->fops = &zram_devops;
f1e3cfff
NG
1945 zram->disk->private_data = zram;
1946 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
306b0c95 1947
33863c21 1948 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
f1e3cfff 1949 set_capacity(zram->disk, 0);
b67d1ec1 1950 /* zram devices sort of resembles non-rotational disks */
8b904b5b
BVA
1951 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1952 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
e447a015 1953
a1dd52af
NG
1954 /*
1955 * To ensure that we always get PAGE_SIZE aligned
1956 * and n*PAGE_SIZED sized I/O requests.
1957 */
f1e3cfff 1958 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
7b19b8d4
RJ
1959 blk_queue_logical_block_size(zram->disk->queue,
1960 ZRAM_LOGICAL_BLOCK_SIZE);
f1e3cfff
NG
1961 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1962 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
f4659d8e 1963 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
2bb4cd5c 1964 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
31edeacd 1965
f4659d8e
JK
1966 /*
1967 * zram_bio_discard() will clear all logical blocks if logical block
1968 * size is identical with physical block size(PAGE_SIZE). But if it is
1969 * different, we will skip discarding some parts of logical blocks in
1970 * the part of the request range which isn't aligned to physical block
1971 * size. So we can't ensure that all discarded logical blocks are
1972 * zeroed.
1973 */
1974 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
31edeacd 1975 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
5d83d5a0 1976
37887783 1977 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
7f0d2672 1978 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
5e2e1cc4
LC
1979 if (ret)
1980 goto out_cleanup_disk;
98af4d4d 1981
e46b8a03 1982 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
d12b63c9 1983
c0265342 1984 zram_debugfs_register(zram);
d12b63c9 1985 pr_info("Added device: %s\n", zram->disk->disk_name);
92ff1528 1986 return device_id;
de1a21a0 1987
5e2e1cc4 1988out_cleanup_disk:
8b9ab626 1989 put_disk(zram->disk);
85508ec6
SS
1990out_free_idr:
1991 idr_remove(&zram_index_idr, device_id);
1992out_free_dev:
1993 kfree(zram);
de1a21a0 1994 return ret;
306b0c95
NG
1995}
1996
6566d1a3 1997static int zram_remove(struct zram *zram)
306b0c95 1998{
8c54499a 1999 bool claimed;
6566d1a3 2000
7a86d6dc 2001 mutex_lock(&zram->disk->open_mutex);
dbdc1be3 2002 if (disk_openers(zram->disk)) {
7a86d6dc 2003 mutex_unlock(&zram->disk->open_mutex);
6566d1a3
SS
2004 return -EBUSY;
2005 }
2006
8c54499a
ML
2007 claimed = zram->claim;
2008 if (!claimed)
2009 zram->claim = true;
7a86d6dc 2010 mutex_unlock(&zram->disk->open_mutex);
6566d1a3 2011
c0265342 2012 zram_debugfs_unregister(zram);
306b0c95 2013
8c54499a
ML
2014 if (claimed) {
2015 /*
2016 * If we were claimed by reset_store(), del_gendisk() will
2017 * wait until reset_store() is done, so nothing need to do.
2018 */
2019 ;
2020 } else {
2021 /* Make sure all the pending I/O are finished */
7a86d6dc 2022 sync_blockdev(zram->disk->part0);
8c54499a
ML
2023 zram_reset_device(zram);
2024 }
6566d1a3
SS
2025
2026 pr_info("Removed device: %s\n", zram->disk->disk_name);
2027
85508ec6 2028 del_gendisk(zram->disk);
8c54499a
ML
2029
2030 /* del_gendisk drains pending reset_store */
2031 WARN_ON_ONCE(claimed && zram->claim);
2032
5a4b6536
ML
2033 /*
2034 * disksize_store() may be called in between zram_reset_device()
2035 * and del_gendisk(), so run the last reset to avoid leaking
2036 * anything allocated with disksize_store()
2037 */
2038 zram_reset_device(zram);
2039
8b9ab626 2040 put_disk(zram->disk);
85508ec6 2041 kfree(zram);
6566d1a3
SS
2042 return 0;
2043}
2044
2045/* zram-control sysfs attributes */
27104a53
GKH
2046
2047/*
2048 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2049 * sense that reading from this file does alter the state of your system -- it
2050 * creates a new un-initialized zram device and returns back this device's
2051 * device_id (or an error code if it fails to create a new device).
2052 */
6566d1a3
SS
2053static ssize_t hot_add_show(struct class *class,
2054 struct class_attribute *attr,
2055 char *buf)
2056{
2057 int ret;
2058
2059 mutex_lock(&zram_index_mutex);
2060 ret = zram_add();
2061 mutex_unlock(&zram_index_mutex);
2062
2063 if (ret < 0)
2064 return ret;
2065 return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2066}
853eab68
WM
2067static struct class_attribute class_attr_hot_add =
2068 __ATTR(hot_add, 0400, hot_add_show, NULL);
6566d1a3
SS
2069
2070static ssize_t hot_remove_store(struct class *class,
2071 struct class_attribute *attr,
2072 const char *buf,
2073 size_t count)
2074{
2075 struct zram *zram;
2076 int ret, dev_id;
2077
2078 /* dev_id is gendisk->first_minor, which is `int' */
2079 ret = kstrtoint(buf, 10, &dev_id);
2080 if (ret)
2081 return ret;
2082 if (dev_id < 0)
2083 return -EINVAL;
2084
2085 mutex_lock(&zram_index_mutex);
2086
2087 zram = idr_find(&zram_index_idr, dev_id);
17ec4cd9 2088 if (zram) {
6566d1a3 2089 ret = zram_remove(zram);
529e71e1
TI
2090 if (!ret)
2091 idr_remove(&zram_index_idr, dev_id);
17ec4cd9 2092 } else {
6566d1a3 2093 ret = -ENODEV;
17ec4cd9 2094 }
6566d1a3
SS
2095
2096 mutex_unlock(&zram_index_mutex);
2097 return ret ? ret : count;
85508ec6 2098}
27104a53 2099static CLASS_ATTR_WO(hot_remove);
a096cafc 2100
27104a53
GKH
2101static struct attribute *zram_control_class_attrs[] = {
2102 &class_attr_hot_add.attr,
2103 &class_attr_hot_remove.attr,
2104 NULL,
6566d1a3 2105};
27104a53 2106ATTRIBUTE_GROUPS(zram_control_class);
6566d1a3
SS
2107
2108static struct class zram_control_class = {
2109 .name = "zram-control",
2110 .owner = THIS_MODULE,
27104a53 2111 .class_groups = zram_control_class_groups,
6566d1a3
SS
2112};
2113
85508ec6
SS
2114static int zram_remove_cb(int id, void *ptr, void *data)
2115{
8c54499a 2116 WARN_ON_ONCE(zram_remove(ptr));
85508ec6
SS
2117 return 0;
2118}
a096cafc 2119
85508ec6
SS
2120static void destroy_devices(void)
2121{
6566d1a3 2122 class_unregister(&zram_control_class);
85508ec6 2123 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
c0265342 2124 zram_debugfs_destroy();
85508ec6 2125 idr_destroy(&zram_index_idr);
a096cafc 2126 unregister_blkdev(zram_major, "zram");
1dd6c834 2127 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
306b0c95
NG
2128}
2129
f1e3cfff 2130static int __init zram_init(void)
306b0c95 2131{
92ff1528 2132 int ret;
306b0c95 2133
1dd6c834
AMG
2134 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2135 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2136 if (ret < 0)
2137 return ret;
2138
6566d1a3
SS
2139 ret = class_register(&zram_control_class);
2140 if (ret) {
70864969 2141 pr_err("Unable to register zram-control class\n");
1dd6c834 2142 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
6566d1a3
SS
2143 return ret;
2144 }
2145
c0265342 2146 zram_debugfs_create();
f1e3cfff
NG
2147 zram_major = register_blkdev(0, "zram");
2148 if (zram_major <= 0) {
70864969 2149 pr_err("Unable to get major number\n");
6566d1a3 2150 class_unregister(&zram_control_class);
1dd6c834 2151 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
a096cafc 2152 return -EBUSY;
306b0c95
NG
2153 }
2154
92ff1528 2155 while (num_devices != 0) {
6566d1a3 2156 mutex_lock(&zram_index_mutex);
92ff1528 2157 ret = zram_add();
6566d1a3 2158 mutex_unlock(&zram_index_mutex);
92ff1528 2159 if (ret < 0)
a096cafc 2160 goto out_error;
92ff1528 2161 num_devices--;
de1a21a0
NG
2162 }
2163
306b0c95 2164 return 0;
de1a21a0 2165
a096cafc 2166out_error:
85508ec6 2167 destroy_devices();
306b0c95
NG
2168 return ret;
2169}
2170
f1e3cfff 2171static void __exit zram_exit(void)
306b0c95 2172{
85508ec6 2173 destroy_devices();
306b0c95
NG
2174}
2175
f1e3cfff
NG
2176module_init(zram_init);
2177module_exit(zram_exit);
306b0c95 2178
9b3bb7ab 2179module_param(num_devices, uint, 0);
c3cdb40e 2180MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
9b3bb7ab 2181
306b0c95
NG
2182MODULE_LICENSE("Dual BSD/GPL");
2183MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
f1e3cfff 2184MODULE_DESCRIPTION("Compressed RAM Block Device");