zram: directly call zram_read_page in writeback_store
[linux-block.git] / drivers / block / zram / zram_drv.c
CommitLineData
306b0c95 1/*
f1e3cfff 2 * Compressed RAM block device
306b0c95 3 *
1130ebba 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
7bfb3de8 5 * 2012, 2013 Minchan Kim
306b0c95
NG
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
306b0c95
NG
13 */
14
f1e3cfff 15#define KMSG_COMPONENT "zram"
306b0c95
NG
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
8946a086 20#include <linux/bio.h>
306b0c95
NG
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/buffer_head.h>
24#include <linux/device.h>
306b0c95 25#include <linux/highmem.h>
5a0e3ad6 26#include <linux/slab.h>
b09ab054 27#include <linux/backing-dev.h>
306b0c95 28#include <linux/string.h>
306b0c95 29#include <linux/vmalloc.h>
fcfa8d95 30#include <linux/err.h>
85508ec6 31#include <linux/idr.h>
6566d1a3 32#include <linux/sysfs.h>
c0265342 33#include <linux/debugfs.h>
1dd6c834 34#include <linux/cpuhotplug.h>
c6a564ff 35#include <linux/part_stat.h>
306b0c95 36
16a4bfb9 37#include "zram_drv.h"
306b0c95 38
85508ec6 39static DEFINE_IDR(zram_index_idr);
6566d1a3
SS
40/* idr index must be protected */
41static DEFINE_MUTEX(zram_index_mutex);
42
f1e3cfff 43static int zram_major;
3d711a38 44static const char *default_compressor = CONFIG_ZRAM_DEF_COMP;
306b0c95 45
306b0c95 46/* Module params (documentation at end) */
ca3d70bd 47static unsigned int num_devices = 1;
60f5921a
SS
48/*
49 * Pages that compress to sizes equals or greater than this are stored
50 * uncompressed in memory.
51 */
52static size_t huge_class_size;
33863c21 53
a8b456d0 54static const struct block_device_operations zram_devops;
a8b456d0 55
1f7319c7 56static void zram_free_page(struct zram *zram, size_t index);
79c744ee
CH
57static int zram_read_page(struct zram *zram, struct page *page, u32 index,
58 struct bio *bio, bool partial_io);
1f7319c7 59
3c9959e0
MK
60static int zram_slot_trylock(struct zram *zram, u32 index)
61{
7e529283 62 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
3c9959e0
MK
63}
64
c4d6c4cc
MK
65static void zram_slot_lock(struct zram *zram, u32 index)
66{
7e529283 67 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
68}
69
70static void zram_slot_unlock(struct zram *zram, u32 index)
71{
7e529283 72 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
73}
74
08eee69f 75static inline bool init_done(struct zram *zram)
be2d1d56 76{
08eee69f 77 return zram->disksize;
be2d1d56
SS
78}
79
9b3bb7ab
SS
80static inline struct zram *dev_to_zram(struct device *dev)
81{
82 return (struct zram *)dev_to_disk(dev)->private_data;
83}
84
643ae61d
MK
85static unsigned long zram_get_handle(struct zram *zram, u32 index)
86{
87 return zram->table[index].handle;
88}
89
90static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
91{
92 zram->table[index].handle = handle;
93}
94
b31177f2 95/* flag operations require table entry bit_spin_lock() being held */
c0265342 96static bool zram_test_flag(struct zram *zram, u32 index,
522698d7 97 enum zram_pageflags flag)
99ebbd30 98{
7e529283 99 return zram->table[index].flags & BIT(flag);
522698d7 100}
99ebbd30 101
beb6602c 102static void zram_set_flag(struct zram *zram, u32 index,
522698d7
SS
103 enum zram_pageflags flag)
104{
7e529283 105 zram->table[index].flags |= BIT(flag);
522698d7 106}
99ebbd30 107
beb6602c 108static void zram_clear_flag(struct zram *zram, u32 index,
522698d7
SS
109 enum zram_pageflags flag)
110{
7e529283 111 zram->table[index].flags &= ~BIT(flag);
522698d7 112}
99ebbd30 113
beb6602c 114static inline void zram_set_element(struct zram *zram, u32 index,
8e19d540 115 unsigned long element)
116{
beb6602c 117 zram->table[index].element = element;
8e19d540 118}
119
643ae61d 120static unsigned long zram_get_element(struct zram *zram, u32 index)
8e19d540 121{
643ae61d 122 return zram->table[index].element;
8e19d540 123}
124
beb6602c 125static size_t zram_get_obj_size(struct zram *zram, u32 index)
522698d7 126{
7e529283 127 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
99ebbd30
AM
128}
129
beb6602c 130static void zram_set_obj_size(struct zram *zram,
522698d7 131 u32 index, size_t size)
9b3bb7ab 132{
7e529283 133 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
9b3bb7ab 134
7e529283 135 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
522698d7
SS
136}
137
a939888e
MK
138static inline bool zram_allocated(struct zram *zram, u32 index)
139{
140 return zram_get_obj_size(zram, index) ||
141 zram_test_flag(zram, index, ZRAM_SAME) ||
142 zram_test_flag(zram, index, ZRAM_WB);
143}
144
1f7319c7 145#if PAGE_SIZE != 4096
1c53e0d2 146static inline bool is_partial_io(struct bio_vec *bvec)
522698d7
SS
147{
148 return bvec->bv_len != PAGE_SIZE;
149}
a70aae12 150#define ZRAM_PARTIAL_IO 1
1f7319c7
MK
151#else
152static inline bool is_partial_io(struct bio_vec *bvec)
153{
154 return false;
155}
156#endif
522698d7 157
84b33bf7
SS
158static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
159{
160 prio &= ZRAM_COMP_PRIORITY_MASK;
161 /*
162 * Clear previous priority value first, in case if we recompress
163 * further an already recompressed page
164 */
165 zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
166 ZRAM_COMP_PRIORITY_BIT1);
167 zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
168}
169
170static inline u32 zram_get_priority(struct zram *zram, u32 index)
171{
172 u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
173
174 return prio & ZRAM_COMP_PRIORITY_MASK;
175}
176
522698d7
SS
177static inline void update_used_max(struct zram *zram,
178 const unsigned long pages)
179{
70ec04f3 180 unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
522698d7
SS
181
182 do {
70ec04f3
UB
183 if (cur_max >= pages)
184 return;
185 } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
186 &cur_max, pages));
522698d7
SS
187}
188
48ad1abe 189static inline void zram_fill_page(void *ptr, unsigned long len,
8e19d540 190 unsigned long value)
191{
8e19d540 192 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
48ad1abe 193 memset_l(ptr, value, len / sizeof(unsigned long));
8e19d540 194}
195
196static bool page_same_filled(void *ptr, unsigned long *element)
522698d7 197{
522698d7 198 unsigned long *page;
f0fe9984 199 unsigned long val;
90f82cbf 200 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1;
522698d7
SS
201
202 page = (unsigned long *)ptr;
f0fe9984 203 val = page[0];
522698d7 204
90f82cbf
TS
205 if (val != page[last_pos])
206 return false;
207
208 for (pos = 1; pos < last_pos; pos++) {
f0fe9984 209 if (val != page[pos])
1c53e0d2 210 return false;
522698d7
SS
211 }
212
f0fe9984 213 *element = val;
8e19d540 214
1c53e0d2 215 return true;
522698d7
SS
216}
217
9b3bb7ab
SS
218static ssize_t initstate_show(struct device *dev,
219 struct device_attribute *attr, char *buf)
220{
a68eb3b6 221 u32 val;
9b3bb7ab
SS
222 struct zram *zram = dev_to_zram(dev);
223
a68eb3b6
SS
224 down_read(&zram->init_lock);
225 val = init_done(zram);
226 up_read(&zram->init_lock);
9b3bb7ab 227
56b4e8cb 228 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
9b3bb7ab
SS
229}
230
522698d7
SS
231static ssize_t disksize_show(struct device *dev,
232 struct device_attribute *attr, char *buf)
233{
234 struct zram *zram = dev_to_zram(dev);
235
236 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
237}
238
9ada9da9
MK
239static ssize_t mem_limit_store(struct device *dev,
240 struct device_attribute *attr, const char *buf, size_t len)
241{
242 u64 limit;
243 char *tmp;
244 struct zram *zram = dev_to_zram(dev);
245
246 limit = memparse(buf, &tmp);
247 if (buf == tmp) /* no chars parsed, invalid input */
248 return -EINVAL;
249
250 down_write(&zram->init_lock);
251 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
252 up_write(&zram->init_lock);
253
254 return len;
255}
256
461a8eee
MK
257static ssize_t mem_used_max_store(struct device *dev,
258 struct device_attribute *attr, const char *buf, size_t len)
259{
260 int err;
261 unsigned long val;
262 struct zram *zram = dev_to_zram(dev);
461a8eee
MK
263
264 err = kstrtoul(buf, 10, &val);
265 if (err || val != 0)
266 return -EINVAL;
267
268 down_read(&zram->init_lock);
5a99e95b 269 if (init_done(zram)) {
461a8eee 270 atomic_long_set(&zram->stats.max_used_pages,
beb6602c 271 zs_get_total_pages(zram->mem_pool));
5a99e95b 272 }
461a8eee
MK
273 up_read(&zram->init_lock);
274
275 return len;
276}
277
755804d1
BG
278/*
279 * Mark all pages which are older than or equal to cutoff as IDLE.
280 * Callers should hold the zram init lock in read mode
281 */
282static void mark_idle(struct zram *zram, ktime_t cutoff)
e82592c4 283{
755804d1 284 int is_idle = 1;
e82592c4
MK
285 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
286 int index;
e82592c4 287
e82592c4 288 for (index = 0; index < nr_pages; index++) {
a939888e
MK
289 /*
290 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
291 * See the comment in writeback_store.
292 */
e82592c4 293 zram_slot_lock(zram, index);
1d69a3f8 294 if (zram_allocated(zram, index) &&
755804d1
BG
295 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
296#ifdef CONFIG_ZRAM_MEMORY_TRACKING
297 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
298#endif
299 if (is_idle)
300 zram_set_flag(zram, index, ZRAM_IDLE);
301 }
e82592c4
MK
302 zram_slot_unlock(zram, index);
303 }
755804d1 304}
e82592c4 305
755804d1
BG
306static ssize_t idle_store(struct device *dev,
307 struct device_attribute *attr, const char *buf, size_t len)
308{
309 struct zram *zram = dev_to_zram(dev);
310 ktime_t cutoff_time = 0;
311 ssize_t rv = -EINVAL;
e82592c4 312
755804d1
BG
313 if (!sysfs_streq(buf, "all")) {
314 /*
f9bceb2f
SS
315 * If it did not parse as 'all' try to treat it as an integer
316 * when we have memory tracking enabled.
755804d1
BG
317 */
318 u64 age_sec;
319
320 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
321 cutoff_time = ktime_sub(ktime_get_boottime(),
322 ns_to_ktime(age_sec * NSEC_PER_SEC));
323 else
324 goto out;
325 }
326
327 down_read(&zram->init_lock);
328 if (!init_done(zram))
329 goto out_unlock;
330
f9bceb2f
SS
331 /*
332 * A cutoff_time of 0 marks everything as idle, this is the
333 * "all" behavior.
334 */
755804d1
BG
335 mark_idle(zram, cutoff_time);
336 rv = len;
337
338out_unlock:
339 up_read(&zram->init_lock);
340out:
341 return rv;
e82592c4
MK
342}
343
013bf95a 344#ifdef CONFIG_ZRAM_WRITEBACK
1d69a3f8
MK
345static ssize_t writeback_limit_enable_store(struct device *dev,
346 struct device_attribute *attr, const char *buf, size_t len)
347{
348 struct zram *zram = dev_to_zram(dev);
349 u64 val;
350 ssize_t ret = -EINVAL;
351
352 if (kstrtoull(buf, 10, &val))
353 return ret;
354
355 down_read(&zram->init_lock);
356 spin_lock(&zram->wb_limit_lock);
357 zram->wb_limit_enable = val;
358 spin_unlock(&zram->wb_limit_lock);
359 up_read(&zram->init_lock);
360 ret = len;
361
362 return ret;
363}
364
365static ssize_t writeback_limit_enable_show(struct device *dev,
366 struct device_attribute *attr, char *buf)
367{
368 bool val;
369 struct zram *zram = dev_to_zram(dev);
370
371 down_read(&zram->init_lock);
372 spin_lock(&zram->wb_limit_lock);
373 val = zram->wb_limit_enable;
374 spin_unlock(&zram->wb_limit_lock);
375 up_read(&zram->init_lock);
376
377 return scnprintf(buf, PAGE_SIZE, "%d\n", val);
378}
379
bb416d18
MK
380static ssize_t writeback_limit_store(struct device *dev,
381 struct device_attribute *attr, const char *buf, size_t len)
382{
383 struct zram *zram = dev_to_zram(dev);
384 u64 val;
385 ssize_t ret = -EINVAL;
386
387 if (kstrtoull(buf, 10, &val))
388 return ret;
389
390 down_read(&zram->init_lock);
1d69a3f8
MK
391 spin_lock(&zram->wb_limit_lock);
392 zram->bd_wb_limit = val;
393 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
394 up_read(&zram->init_lock);
395 ret = len;
396
397 return ret;
398}
399
400static ssize_t writeback_limit_show(struct device *dev,
401 struct device_attribute *attr, char *buf)
402{
403 u64 val;
404 struct zram *zram = dev_to_zram(dev);
405
406 down_read(&zram->init_lock);
1d69a3f8
MK
407 spin_lock(&zram->wb_limit_lock);
408 val = zram->bd_wb_limit;
409 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
410 up_read(&zram->init_lock);
411
412 return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
413}
414
013bf95a
MK
415static void reset_bdev(struct zram *zram)
416{
417 struct block_device *bdev;
418
7e529283 419 if (!zram->backing_dev)
013bf95a
MK
420 return;
421
422 bdev = zram->bdev;
013bf95a
MK
423 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
424 /* hope filp_close flush all of IO */
425 filp_close(zram->backing_dev, NULL);
426 zram->backing_dev = NULL;
013bf95a 427 zram->bdev = NULL;
a8b456d0 428 zram->disk->fops = &zram_devops;
1363d466
MK
429 kvfree(zram->bitmap);
430 zram->bitmap = NULL;
013bf95a
MK
431}
432
433static ssize_t backing_dev_show(struct device *dev,
434 struct device_attribute *attr, char *buf)
435{
f7daefe4 436 struct file *file;
013bf95a 437 struct zram *zram = dev_to_zram(dev);
013bf95a
MK
438 char *p;
439 ssize_t ret;
440
441 down_read(&zram->init_lock);
f7daefe4
C
442 file = zram->backing_dev;
443 if (!file) {
013bf95a
MK
444 memcpy(buf, "none\n", 5);
445 up_read(&zram->init_lock);
446 return 5;
447 }
448
449 p = file_path(file, buf, PAGE_SIZE - 1);
450 if (IS_ERR(p)) {
451 ret = PTR_ERR(p);
452 goto out;
453 }
454
455 ret = strlen(p);
456 memmove(buf, p, ret);
457 buf[ret++] = '\n';
458out:
459 up_read(&zram->init_lock);
460 return ret;
461}
462
463static ssize_t backing_dev_store(struct device *dev,
464 struct device_attribute *attr, const char *buf, size_t len)
465{
466 char *file_name;
c8bd134a 467 size_t sz;
013bf95a
MK
468 struct file *backing_dev = NULL;
469 struct inode *inode;
470 struct address_space *mapping;
ee763e21 471 unsigned int bitmap_sz;
1363d466 472 unsigned long nr_pages, *bitmap = NULL;
013bf95a
MK
473 struct block_device *bdev = NULL;
474 int err;
475 struct zram *zram = dev_to_zram(dev);
476
477 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
478 if (!file_name)
479 return -ENOMEM;
480
481 down_write(&zram->init_lock);
482 if (init_done(zram)) {
483 pr_info("Can't setup backing device for initialized device\n");
484 err = -EBUSY;
485 goto out;
486 }
487
e55e1b48 488 strscpy(file_name, buf, PATH_MAX);
c8bd134a
PK
489 /* ignore trailing newline */
490 sz = strlen(file_name);
491 if (sz > 0 && file_name[sz - 1] == '\n')
492 file_name[sz - 1] = 0x00;
013bf95a
MK
493
494 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
495 if (IS_ERR(backing_dev)) {
496 err = PTR_ERR(backing_dev);
497 backing_dev = NULL;
498 goto out;
499 }
500
501 mapping = backing_dev->f_mapping;
502 inode = mapping->host;
503
504 /* Support only block device in this moment */
505 if (!S_ISBLK(inode->i_mode)) {
506 err = -ENOTBLK;
507 goto out;
508 }
509
0fc66c9d
CH
510 bdev = blkdev_get_by_dev(inode->i_rdev,
511 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
512 if (IS_ERR(bdev)) {
513 err = PTR_ERR(bdev);
5547932d 514 bdev = NULL;
013bf95a 515 goto out;
5547932d 516 }
013bf95a 517
1363d466
MK
518 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
519 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
520 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
521 if (!bitmap) {
522 err = -ENOMEM;
523 goto out;
524 }
525
013bf95a
MK
526 reset_bdev(zram);
527
013bf95a
MK
528 zram->bdev = bdev;
529 zram->backing_dev = backing_dev;
1363d466
MK
530 zram->bitmap = bitmap;
531 zram->nr_pages = nr_pages;
013bf95a
MK
532 up_write(&zram->init_lock);
533
534 pr_info("setup backing device %s\n", file_name);
535 kfree(file_name);
536
537 return len;
538out:
294ed6b9 539 kvfree(bitmap);
1363d466 540
013bf95a
MK
541 if (bdev)
542 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
543
544 if (backing_dev)
545 filp_close(backing_dev, NULL);
546
547 up_write(&zram->init_lock);
548
549 kfree(file_name);
550
551 return err;
552}
553
7e529283 554static unsigned long alloc_block_bdev(struct zram *zram)
1363d466 555{
3c9959e0
MK
556 unsigned long blk_idx = 1;
557retry:
1363d466 558 /* skip 0 bit to confuse zram.handle = 0 */
3c9959e0
MK
559 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
560 if (blk_idx == zram->nr_pages)
1363d466 561 return 0;
1363d466 562
3c9959e0
MK
563 if (test_and_set_bit(blk_idx, zram->bitmap))
564 goto retry;
1363d466 565
23eddf39 566 atomic64_inc(&zram->stats.bd_count);
3c9959e0 567 return blk_idx;
1363d466
MK
568}
569
7e529283 570static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
1363d466
MK
571{
572 int was_set;
573
7e529283 574 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
1363d466 575 WARN_ON_ONCE(!was_set);
23eddf39 576 atomic64_dec(&zram->stats.bd_count);
1363d466
MK
577}
578
384bc41f 579static void zram_page_end_io(struct bio *bio)
db8ffbd4 580{
263663cd 581 struct page *page = bio_first_page_all(bio);
db8ffbd4
MK
582
583 page_endio(page, op_is_write(bio_op(bio)),
584 blk_status_to_errno(bio->bi_status));
585 bio_put(bio);
586}
587
8e654f8f
MK
588/*
589 * Returns 1 if the submission is successful.
590 */
591static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
592 unsigned long entry, struct bio *parent)
593{
594 struct bio *bio;
595
07888c66
CH
596 bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
597 GFP_NOIO);
8e654f8f
MK
598 if (!bio)
599 return -ENOMEM;
600
601 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
8e654f8f
MK
602 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
603 bio_put(bio);
604 return -EIO;
605 }
606
07888c66 607 if (!parent)
8e654f8f 608 bio->bi_end_io = zram_page_end_io;
07888c66 609 else
8e654f8f 610 bio_chain(bio, parent);
8e654f8f
MK
611
612 submit_bio(bio);
613 return 1;
614}
615
0d835962
MK
616#define PAGE_WB_SIG "page_index="
617
b46f9ea3
SS
618#define PAGE_WRITEBACK 0
619#define HUGE_WRITEBACK (1<<0)
620#define IDLE_WRITEBACK (1<<1)
621#define INCOMPRESSIBLE_WRITEBACK (1<<2)
0d835962 622
a939888e
MK
623static ssize_t writeback_store(struct device *dev,
624 struct device_attribute *attr, const char *buf, size_t len)
625{
626 struct zram *zram = dev_to_zram(dev);
627 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
0d835962 628 unsigned long index = 0;
a939888e
MK
629 struct bio bio;
630 struct bio_vec bio_vec;
631 struct page *page;
3b82a051 632 ssize_t ret = len;
57e0076e 633 int mode, err;
a939888e
MK
634 unsigned long blk_idx = 0;
635
0bc9f5d1 636 if (sysfs_streq(buf, "idle"))
a939888e 637 mode = IDLE_WRITEBACK;
0bc9f5d1 638 else if (sysfs_streq(buf, "huge"))
a939888e 639 mode = HUGE_WRITEBACK;
30226b69
BG
640 else if (sysfs_streq(buf, "huge_idle"))
641 mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
b46f9ea3
SS
642 else if (sysfs_streq(buf, "incompressible"))
643 mode = INCOMPRESSIBLE_WRITEBACK;
0d835962
MK
644 else {
645 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
646 return -EINVAL;
647
2766f182
MK
648 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
649 index >= nr_pages)
0d835962
MK
650 return -EINVAL;
651
652 nr_pages = 1;
653 mode = PAGE_WRITEBACK;
654 }
a939888e
MK
655
656 down_read(&zram->init_lock);
657 if (!init_done(zram)) {
658 ret = -EINVAL;
659 goto release_init_lock;
660 }
661
662 if (!zram->backing_dev) {
663 ret = -ENODEV;
664 goto release_init_lock;
665 }
666
667 page = alloc_page(GFP_KERNEL);
668 if (!page) {
669 ret = -ENOMEM;
670 goto release_init_lock;
671 }
672
2766f182 673 for (; nr_pages != 0; index++, nr_pages--) {
1d69a3f8
MK
674 spin_lock(&zram->wb_limit_lock);
675 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
676 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
677 ret = -EIO;
678 break;
679 }
1d69a3f8 680 spin_unlock(&zram->wb_limit_lock);
bb416d18 681
a939888e
MK
682 if (!blk_idx) {
683 blk_idx = alloc_block_bdev(zram);
684 if (!blk_idx) {
685 ret = -ENOSPC;
686 break;
687 }
688 }
689
690 zram_slot_lock(zram, index);
691 if (!zram_allocated(zram, index))
692 goto next;
693
694 if (zram_test_flag(zram, index, ZRAM_WB) ||
695 zram_test_flag(zram, index, ZRAM_SAME) ||
696 zram_test_flag(zram, index, ZRAM_UNDER_WB))
697 goto next;
698
30226b69 699 if (mode & IDLE_WRITEBACK &&
b46f9ea3 700 !zram_test_flag(zram, index, ZRAM_IDLE))
1d69a3f8 701 goto next;
30226b69 702 if (mode & HUGE_WRITEBACK &&
b46f9ea3
SS
703 !zram_test_flag(zram, index, ZRAM_HUGE))
704 goto next;
705 if (mode & INCOMPRESSIBLE_WRITEBACK &&
706 !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
a939888e 707 goto next;
b46f9ea3 708
a939888e
MK
709 /*
710 * Clearing ZRAM_UNDER_WB is duty of caller.
711 * IOW, zram_free_page never clear it.
712 */
713 zram_set_flag(zram, index, ZRAM_UNDER_WB);
714 /* Need for hugepage writeback racing */
715 zram_set_flag(zram, index, ZRAM_IDLE);
716 zram_slot_unlock(zram, index);
79c744ee 717 if (zram_read_page(zram, page, index, NULL, false)) {
a939888e
MK
718 zram_slot_lock(zram, index);
719 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
720 zram_clear_flag(zram, index, ZRAM_IDLE);
721 zram_slot_unlock(zram, index);
722 continue;
723 }
724
49add496
CH
725 bio_init(&bio, zram->bdev, &bio_vec, 1,
726 REQ_OP_WRITE | REQ_SYNC);
a939888e 727 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
79c744ee 728 bio_add_page(&bio, page, PAGE_SIZE, 0);
a939888e 729
a939888e
MK
730 /*
731 * XXX: A single page IO would be inefficient for write
732 * but it would be not bad as starter.
733 */
57e0076e
MK
734 err = submit_bio_wait(&bio);
735 if (err) {
a939888e
MK
736 zram_slot_lock(zram, index);
737 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
738 zram_clear_flag(zram, index, ZRAM_IDLE);
739 zram_slot_unlock(zram, index);
57e0076e 740 /*
9fda785d
SS
741 * BIO errors are not fatal, we continue and simply
742 * attempt to writeback the remaining objects (pages).
743 * At the same time we need to signal user-space that
744 * some writes (at least one, but also could be all of
745 * them) were not successful and we do so by returning
746 * the most recent BIO error.
57e0076e
MK
747 */
748 ret = err;
a939888e
MK
749 continue;
750 }
751
23eddf39 752 atomic64_inc(&zram->stats.bd_writes);
a939888e
MK
753 /*
754 * We released zram_slot_lock so need to check if the slot was
755 * changed. If there is freeing for the slot, we can catch it
756 * easily by zram_allocated.
757 * A subtle case is the slot is freed/reallocated/marked as
758 * ZRAM_IDLE again. To close the race, idle_store doesn't
759 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
760 * Thus, we could close the race by checking ZRAM_IDLE bit.
761 */
762 zram_slot_lock(zram, index);
763 if (!zram_allocated(zram, index) ||
764 !zram_test_flag(zram, index, ZRAM_IDLE)) {
765 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
766 zram_clear_flag(zram, index, ZRAM_IDLE);
767 goto next;
768 }
769
770 zram_free_page(zram, index);
771 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
772 zram_set_flag(zram, index, ZRAM_WB);
773 zram_set_element(zram, index, blk_idx);
774 blk_idx = 0;
775 atomic64_inc(&zram->stats.pages_stored);
1d69a3f8
MK
776 spin_lock(&zram->wb_limit_lock);
777 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
778 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
779 spin_unlock(&zram->wb_limit_lock);
a939888e
MK
780next:
781 zram_slot_unlock(zram, index);
782 }
783
784 if (blk_idx)
785 free_block_bdev(zram, blk_idx);
a939888e
MK
786 __free_page(page);
787release_init_lock:
788 up_read(&zram->init_lock);
789
790 return ret;
791}
792
8e654f8f
MK
793struct zram_work {
794 struct work_struct work;
795 struct zram *zram;
796 unsigned long entry;
797 struct bio *bio;
e153abc0 798 struct bio_vec bvec;
8e654f8f
MK
799};
800
8e654f8f
MK
801static void zram_sync_read(struct work_struct *work)
802{
8e654f8f
MK
803 struct zram_work *zw = container_of(work, struct zram_work, work);
804 struct zram *zram = zw->zram;
805 unsigned long entry = zw->entry;
806 struct bio *bio = zw->bio;
807
e153abc0 808 read_from_bdev_async(zram, &zw->bvec, entry, bio);
8e654f8f
MK
809}
810
811/*
c62b37d9
CH
812 * Block layer want one ->submit_bio to be active at a time, so if we use
813 * chained IO with parent IO in same context, it's a deadlock. To avoid that,
814 * use a worker thread context.
8e654f8f
MK
815 */
816static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
817 unsigned long entry, struct bio *bio)
818{
819 struct zram_work work;
820
e153abc0 821 work.bvec = *bvec;
8e654f8f
MK
822 work.zram = zram;
823 work.entry = entry;
824 work.bio = bio;
825
826 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
827 queue_work(system_unbound_wq, &work.work);
828 flush_work(&work.work);
829 destroy_work_on_stack(&work.work);
830
831 return 1;
832}
8e654f8f
MK
833
834static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
835 unsigned long entry, struct bio *parent, bool sync)
836{
23eddf39 837 atomic64_inc(&zram->stats.bd_reads);
a70aae12
CH
838 if (sync) {
839 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
840 return -EIO;
8e654f8f 841 return read_from_bdev_sync(zram, bvec, entry, parent);
a70aae12
CH
842 }
843 return read_from_bdev_async(zram, bvec, entry, parent);
8e654f8f 844}
013bf95a 845#else
013bf95a 846static inline void reset_bdev(struct zram *zram) {};
8e654f8f
MK
847static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
848 unsigned long entry, struct bio *parent, bool sync)
849{
850 return -EIO;
851}
7e529283
MK
852
853static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
013bf95a
MK
854#endif
855
c0265342
MK
856#ifdef CONFIG_ZRAM_MEMORY_TRACKING
857
858static struct dentry *zram_debugfs_root;
859
860static void zram_debugfs_create(void)
861{
862 zram_debugfs_root = debugfs_create_dir("zram", NULL);
863}
864
865static void zram_debugfs_destroy(void)
866{
867 debugfs_remove_recursive(zram_debugfs_root);
868}
869
870static void zram_accessed(struct zram *zram, u32 index)
871{
e82592c4 872 zram_clear_flag(zram, index, ZRAM_IDLE);
c0265342
MK
873 zram->table[index].ac_time = ktime_get_boottime();
874}
875
c0265342
MK
876static ssize_t read_block_state(struct file *file, char __user *buf,
877 size_t count, loff_t *ppos)
878{
879 char *kbuf;
880 ssize_t index, written = 0;
881 struct zram *zram = file->private_data;
882 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
883 struct timespec64 ts;
884
885 kbuf = kvmalloc(count, GFP_KERNEL);
886 if (!kbuf)
887 return -ENOMEM;
888
889 down_read(&zram->init_lock);
890 if (!init_done(zram)) {
891 up_read(&zram->init_lock);
892 kvfree(kbuf);
893 return -EINVAL;
894 }
895
896 for (index = *ppos; index < nr_pages; index++) {
897 int copied;
898
899 zram_slot_lock(zram, index);
900 if (!zram_allocated(zram, index))
901 goto next;
902
903 ts = ktime_to_timespec64(zram->table[index].ac_time);
904 copied = snprintf(kbuf + written, count,
77db7bb5 905 "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
c0265342
MK
906 index, (s64)ts.tv_sec,
907 ts.tv_nsec / NSEC_PER_USEC,
908 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
909 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
e82592c4 910 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
60e9b39e 911 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
77db7bb5
SS
912 zram_get_priority(zram, index) ? 'r' : '.',
913 zram_test_flag(zram, index,
914 ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
c0265342 915
a88e03cf 916 if (count <= copied) {
c0265342
MK
917 zram_slot_unlock(zram, index);
918 break;
919 }
920 written += copied;
921 count -= copied;
922next:
923 zram_slot_unlock(zram, index);
924 *ppos += 1;
925 }
926
927 up_read(&zram->init_lock);
928 if (copy_to_user(buf, kbuf, written))
929 written = -EFAULT;
930 kvfree(kbuf);
931
932 return written;
933}
934
935static const struct file_operations proc_zram_block_state_op = {
936 .open = simple_open,
937 .read = read_block_state,
938 .llseek = default_llseek,
939};
940
941static void zram_debugfs_register(struct zram *zram)
942{
943 if (!zram_debugfs_root)
944 return;
945
946 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
947 zram_debugfs_root);
948 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
949 zram, &proc_zram_block_state_op);
950}
951
952static void zram_debugfs_unregister(struct zram *zram)
953{
954 debugfs_remove_recursive(zram->debugfs_dir);
955}
956#else
957static void zram_debugfs_create(void) {};
958static void zram_debugfs_destroy(void) {};
e82592c4
MK
959static void zram_accessed(struct zram *zram, u32 index)
960{
961 zram_clear_flag(zram, index, ZRAM_IDLE);
962};
c0265342
MK
963static void zram_debugfs_register(struct zram *zram) {};
964static void zram_debugfs_unregister(struct zram *zram) {};
965#endif
013bf95a 966
43209ea2
SS
967/*
968 * We switched to per-cpu streams and this attr is not needed anymore.
969 * However, we will keep it around for some time, because:
970 * a) we may revert per-cpu streams in the future
971 * b) it's visible to user space and we need to follow our 2 years
972 * retirement rule; but we already have a number of 'soon to be
973 * altered' attrs, so max_comp_streams need to wait for the next
974 * layoff cycle.
975 */
522698d7
SS
976static ssize_t max_comp_streams_show(struct device *dev,
977 struct device_attribute *attr, char *buf)
978{
43209ea2 979 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
522698d7
SS
980}
981
beca3ec7
SS
982static ssize_t max_comp_streams_store(struct device *dev,
983 struct device_attribute *attr, const char *buf, size_t len)
984{
43209ea2 985 return len;
beca3ec7
SS
986}
987
001d9273 988static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
e46b8a03 989{
001d9273
SS
990 /* Do not free statically defined compression algorithms */
991 if (zram->comp_algs[prio] != default_compressor)
992 kfree(zram->comp_algs[prio]);
993
994 zram->comp_algs[prio] = alg;
995}
996
997static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
998{
999 ssize_t sz;
e46b8a03
SS
1000
1001 down_read(&zram->init_lock);
001d9273 1002 sz = zcomp_available_show(zram->comp_algs[prio], buf);
e46b8a03
SS
1003 up_read(&zram->init_lock);
1004
1005 return sz;
1006}
1007
001d9273 1008static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
e46b8a03 1009{
7ac07a26 1010 char *compressor;
4bbacd51
SS
1011 size_t sz;
1012
7ac07a26
SS
1013 sz = strlen(buf);
1014 if (sz >= CRYPTO_MAX_ALG_NAME)
1015 return -E2BIG;
1016
1017 compressor = kstrdup(buf, GFP_KERNEL);
1018 if (!compressor)
1019 return -ENOMEM;
1020
415403be 1021 /* ignore trailing newline */
415403be
SS
1022 if (sz > 0 && compressor[sz - 1] == '\n')
1023 compressor[sz - 1] = 0x00;
1024
7ac07a26
SS
1025 if (!zcomp_available_algorithm(compressor)) {
1026 kfree(compressor);
1d5b43bf 1027 return -EINVAL;
7ac07a26 1028 }
1d5b43bf 1029
e46b8a03
SS
1030 down_write(&zram->init_lock);
1031 if (init_done(zram)) {
1032 up_write(&zram->init_lock);
7ac07a26 1033 kfree(compressor);
e46b8a03
SS
1034 pr_info("Can't change algorithm for initialized device\n");
1035 return -EBUSY;
1036 }
4bbacd51 1037
001d9273 1038 comp_algorithm_set(zram, prio, compressor);
e46b8a03 1039 up_write(&zram->init_lock);
001d9273
SS
1040 return 0;
1041}
1042
1043static ssize_t comp_algorithm_show(struct device *dev,
1044 struct device_attribute *attr,
1045 char *buf)
1046{
1047 struct zram *zram = dev_to_zram(dev);
1048
1049 return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
1050}
1051
1052static ssize_t comp_algorithm_store(struct device *dev,
1053 struct device_attribute *attr,
1054 const char *buf,
1055 size_t len)
1056{
1057 struct zram *zram = dev_to_zram(dev);
1058 int ret;
1059
1060 ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
1061 return ret ? ret : len;
e46b8a03
SS
1062}
1063
001d9273
SS
1064#ifdef CONFIG_ZRAM_MULTI_COMP
1065static ssize_t recomp_algorithm_show(struct device *dev,
1066 struct device_attribute *attr,
1067 char *buf)
1068{
1069 struct zram *zram = dev_to_zram(dev);
1070 ssize_t sz = 0;
1071 u32 prio;
1072
1073 for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
1074 if (!zram->comp_algs[prio])
1075 continue;
1076
1077 sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
1078 sz += __comp_algorithm_show(zram, prio, buf + sz);
1079 }
1080
1081 return sz;
1082}
1083
1084static ssize_t recomp_algorithm_store(struct device *dev,
1085 struct device_attribute *attr,
1086 const char *buf,
1087 size_t len)
1088{
1089 struct zram *zram = dev_to_zram(dev);
1090 int prio = ZRAM_SECONDARY_COMP;
1091 char *args, *param, *val;
1092 char *alg = NULL;
1093 int ret;
1094
1095 args = skip_spaces(buf);
1096 while (*args) {
1097 args = next_arg(args, &param, &val);
1098
df32de14 1099 if (!val || !*val)
001d9273
SS
1100 return -EINVAL;
1101
1102 if (!strcmp(param, "algo")) {
1103 alg = val;
1104 continue;
1105 }
1106
1107 if (!strcmp(param, "priority")) {
1108 ret = kstrtoint(val, 10, &prio);
1109 if (ret)
1110 return ret;
1111 continue;
1112 }
1113 }
1114
1115 if (!alg)
1116 return -EINVAL;
1117
1118 if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
1119 return -EINVAL;
1120
1121 ret = __comp_algorithm_store(zram, prio, alg);
1122 return ret ? ret : len;
1123}
1124#endif
1125
522698d7
SS
1126static ssize_t compact_store(struct device *dev,
1127 struct device_attribute *attr, const char *buf, size_t len)
306b0c95 1128{
522698d7 1129 struct zram *zram = dev_to_zram(dev);
306b0c95 1130
522698d7
SS
1131 down_read(&zram->init_lock);
1132 if (!init_done(zram)) {
1133 up_read(&zram->init_lock);
1134 return -EINVAL;
1135 }
306b0c95 1136
beb6602c 1137 zs_compact(zram->mem_pool);
522698d7 1138 up_read(&zram->init_lock);
d2d5e762 1139
522698d7 1140 return len;
d2d5e762
WY
1141}
1142
522698d7
SS
1143static ssize_t io_stat_show(struct device *dev,
1144 struct device_attribute *attr, char *buf)
d2d5e762 1145{
522698d7
SS
1146 struct zram *zram = dev_to_zram(dev);
1147 ssize_t ret;
d2d5e762 1148
522698d7
SS
1149 down_read(&zram->init_lock);
1150 ret = scnprintf(buf, PAGE_SIZE,
9fe95bab 1151 "%8llu %8llu 0 %8llu\n",
522698d7
SS
1152 (u64)atomic64_read(&zram->stats.failed_reads),
1153 (u64)atomic64_read(&zram->stats.failed_writes),
522698d7
SS
1154 (u64)atomic64_read(&zram->stats.notify_free));
1155 up_read(&zram->init_lock);
306b0c95 1156
522698d7 1157 return ret;
9b3bb7ab
SS
1158}
1159
522698d7
SS
1160static ssize_t mm_stat_show(struct device *dev,
1161 struct device_attribute *attr, char *buf)
9b3bb7ab 1162{
522698d7 1163 struct zram *zram = dev_to_zram(dev);
7d3f3938 1164 struct zs_pool_stats pool_stats;
522698d7
SS
1165 u64 orig_size, mem_used = 0;
1166 long max_used;
1167 ssize_t ret;
a539c72a 1168
7d3f3938
SS
1169 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1170
522698d7 1171 down_read(&zram->init_lock);
7d3f3938 1172 if (init_done(zram)) {
beb6602c
MK
1173 mem_used = zs_get_total_pages(zram->mem_pool);
1174 zs_pool_stats(zram->mem_pool, &pool_stats);
7d3f3938 1175 }
9b3bb7ab 1176
522698d7
SS
1177 orig_size = atomic64_read(&zram->stats.pages_stored);
1178 max_used = atomic_long_read(&zram->stats.max_used_pages);
9b3bb7ab 1179
522698d7 1180 ret = scnprintf(buf, PAGE_SIZE,
194e28da 1181 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n",
522698d7
SS
1182 orig_size << PAGE_SHIFT,
1183 (u64)atomic64_read(&zram->stats.compr_data_size),
1184 mem_used << PAGE_SHIFT,
1185 zram->limit_pages << PAGE_SHIFT,
1186 max_used << PAGE_SHIFT,
8e19d540 1187 (u64)atomic64_read(&zram->stats.same_pages),
23959281 1188 atomic_long_read(&pool_stats.pages_compacted),
194e28da
MK
1189 (u64)atomic64_read(&zram->stats.huge_pages),
1190 (u64)atomic64_read(&zram->stats.huge_pages_since));
522698d7 1191 up_read(&zram->init_lock);
9b3bb7ab 1192
522698d7
SS
1193 return ret;
1194}
1195
23eddf39 1196#ifdef CONFIG_ZRAM_WRITEBACK
bb416d18 1197#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
23eddf39
MK
1198static ssize_t bd_stat_show(struct device *dev,
1199 struct device_attribute *attr, char *buf)
1200{
1201 struct zram *zram = dev_to_zram(dev);
1202 ssize_t ret;
1203
1204 down_read(&zram->init_lock);
1205 ret = scnprintf(buf, PAGE_SIZE,
1206 "%8llu %8llu %8llu\n",
bb416d18
MK
1207 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1208 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1209 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
23eddf39
MK
1210 up_read(&zram->init_lock);
1211
1212 return ret;
1213}
1214#endif
1215
623e47fc
SS
1216static ssize_t debug_stat_show(struct device *dev,
1217 struct device_attribute *attr, char *buf)
1218{
37887783 1219 int version = 1;
623e47fc
SS
1220 struct zram *zram = dev_to_zram(dev);
1221 ssize_t ret;
1222
1223 down_read(&zram->init_lock);
1224 ret = scnprintf(buf, PAGE_SIZE,
37887783 1225 "version: %d\n%8llu %8llu\n",
623e47fc 1226 version,
37887783 1227 (u64)atomic64_read(&zram->stats.writestall),
3c9959e0 1228 (u64)atomic64_read(&zram->stats.miss_free));
623e47fc
SS
1229 up_read(&zram->init_lock);
1230
1231 return ret;
1232}
1233
522698d7
SS
1234static DEVICE_ATTR_RO(io_stat);
1235static DEVICE_ATTR_RO(mm_stat);
23eddf39
MK
1236#ifdef CONFIG_ZRAM_WRITEBACK
1237static DEVICE_ATTR_RO(bd_stat);
1238#endif
623e47fc 1239static DEVICE_ATTR_RO(debug_stat);
522698d7 1240
beb6602c 1241static void zram_meta_free(struct zram *zram, u64 disksize)
522698d7
SS
1242{
1243 size_t num_pages = disksize >> PAGE_SHIFT;
1244 size_t index;
1fec1172
GM
1245
1246 /* Free all pages that are still in this zram device */
302128dc
MK
1247 for (index = 0; index < num_pages; index++)
1248 zram_free_page(zram, index);
1fec1172 1249
beb6602c
MK
1250 zs_destroy_pool(zram->mem_pool);
1251 vfree(zram->table);
9b3bb7ab
SS
1252}
1253
beb6602c 1254static bool zram_meta_alloc(struct zram *zram, u64 disksize)
9b3bb7ab
SS
1255{
1256 size_t num_pages;
9b3bb7ab 1257
9b3bb7ab 1258 num_pages = disksize >> PAGE_SHIFT;
fad953ce 1259 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
beb6602c
MK
1260 if (!zram->table)
1261 return false;
9b3bb7ab 1262
beb6602c
MK
1263 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1264 if (!zram->mem_pool) {
1265 vfree(zram->table);
1266 return false;
9b3bb7ab
SS
1267 }
1268
60f5921a
SS
1269 if (!huge_class_size)
1270 huge_class_size = zs_huge_class_size(zram->mem_pool);
beb6602c 1271 return true;
9b3bb7ab
SS
1272}
1273
d2d5e762
WY
1274/*
1275 * To protect concurrent access to the same index entry,
1276 * caller should hold this table index entry's bit_spinlock to
1277 * indicate this index entry is accessing.
1278 */
f1e3cfff 1279static void zram_free_page(struct zram *zram, size_t index)
306b0c95 1280{
db8ffbd4
MK
1281 unsigned long handle;
1282
7e529283
MK
1283#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1284 zram->table[index].ac_time = 0;
1285#endif
e82592c4
MK
1286 if (zram_test_flag(zram, index, ZRAM_IDLE))
1287 zram_clear_flag(zram, index, ZRAM_IDLE);
1288
89e85bce
MK
1289 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1290 zram_clear_flag(zram, index, ZRAM_HUGE);
1291 atomic64_dec(&zram->stats.huge_pages);
1292 }
1293
84b33bf7
SS
1294 if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1295 zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
1296
1297 zram_set_priority(zram, index, 0);
1298
7e529283
MK
1299 if (zram_test_flag(zram, index, ZRAM_WB)) {
1300 zram_clear_flag(zram, index, ZRAM_WB);
1301 free_block_bdev(zram, zram_get_element(zram, index));
1302 goto out;
db8ffbd4 1303 }
306b0c95 1304
8e19d540 1305 /*
1306 * No memory is allocated for same element filled pages.
1307 * Simply clear same page flag.
1308 */
beb6602c
MK
1309 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1310 zram_clear_flag(zram, index, ZRAM_SAME);
8e19d540 1311 atomic64_dec(&zram->stats.same_pages);
7e529283 1312 goto out;
306b0c95
NG
1313 }
1314
db8ffbd4 1315 handle = zram_get_handle(zram, index);
8e19d540 1316 if (!handle)
1317 return;
1318
beb6602c 1319 zs_free(zram->mem_pool, handle);
306b0c95 1320
beb6602c 1321 atomic64_sub(zram_get_obj_size(zram, index),
d2d5e762 1322 &zram->stats.compr_data_size);
7e529283 1323out:
90a7806e 1324 atomic64_dec(&zram->stats.pages_stored);
643ae61d 1325 zram_set_handle(zram, index, 0);
beb6602c 1326 zram_set_obj_size(zram, index, 0);
a939888e
MK
1327 WARN_ON_ONCE(zram->table[index].flags &
1328 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
306b0c95
NG
1329}
1330
5561347a
SS
1331/*
1332 * Reads a page from the writeback devices. Corresponding ZRAM slot
1333 * should be unlocked.
1334 */
1335static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
1336 u32 index, struct bio *bio, bool partial_io)
1337{
13ae4db0 1338 struct bio_vec bvec;
5561347a 1339
13ae4db0 1340 bvec_set_page(&bvec, page, PAGE_SIZE, 0);
5561347a
SS
1341 return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
1342 partial_io);
1343}
1344
1345/*
1346 * Reads (decompresses if needed) a page from zspool (zsmalloc).
1347 * Corresponding ZRAM slot should be locked.
1348 */
1349static int zram_read_from_zspool(struct zram *zram, struct page *page,
1350 u32 index)
306b0c95 1351{
0669d2b2 1352 struct zcomp_strm *zstrm;
92967471 1353 unsigned long handle;
ebaf9ab5 1354 unsigned int size;
1f7319c7 1355 void *src, *dst;
84b33bf7 1356 u32 prio;
0669d2b2 1357 int ret;
1f7319c7 1358
643ae61d 1359 handle = zram_get_handle(zram, index);
ae94264e
MK
1360 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1361 unsigned long value;
1362 void *mem;
1363
1364 value = handle ? zram_get_element(zram, index) : 0;
1365 mem = kmap_atomic(page);
1366 zram_fill_page(mem, PAGE_SIZE, value);
1367 kunmap_atomic(mem);
ae94264e
MK
1368 return 0;
1369 }
1370
beb6602c 1371 size = zram_get_obj_size(zram, index);
306b0c95 1372
84b33bf7
SS
1373 if (size != PAGE_SIZE) {
1374 prio = zram_get_priority(zram, index);
1375 zstrm = zcomp_stream_get(zram->comps[prio]);
1376 }
0669d2b2 1377
beb6602c 1378 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
ebaf9ab5 1379 if (size == PAGE_SIZE) {
1f7319c7
MK
1380 dst = kmap_atomic(page);
1381 memcpy(dst, src, PAGE_SIZE);
1382 kunmap_atomic(dst);
1383 ret = 0;
ebaf9ab5 1384 } else {
1f7319c7
MK
1385 dst = kmap_atomic(page);
1386 ret = zcomp_decompress(zstrm, src, size, dst);
1387 kunmap_atomic(dst);
84b33bf7 1388 zcomp_stream_put(zram->comps[prio]);
ebaf9ab5 1389 }
beb6602c 1390 zs_unmap_object(zram->mem_pool, handle);
5561347a
SS
1391 return ret;
1392}
1393
ffb0a9e6
CH
1394static int zram_read_page(struct zram *zram, struct page *page, u32 index,
1395 struct bio *bio, bool partial_io)
5561347a
SS
1396{
1397 int ret;
1398
1399 zram_slot_lock(zram, index);
1400 if (!zram_test_flag(zram, index, ZRAM_WB)) {
1401 /* Slot should be locked through out the function call */
1402 ret = zram_read_from_zspool(zram, page, index);
1403 zram_slot_unlock(zram, index);
1404 } else {
1405 /* Slot should be unlocked before the function call */
1406 zram_slot_unlock(zram, index);
1407
5561347a
SS
1408 ret = zram_bvec_read_from_bdev(zram, page, index, bio,
1409 partial_io);
1410 }
a1dd52af 1411
8c921b2b 1412 /* Should NEVER happen. Return bio error if it does. */
5561347a 1413 if (WARN_ON(ret < 0))
8c921b2b 1414 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
306b0c95 1415
1f7319c7 1416 return ret;
306b0c95
NG
1417}
1418
37b51fdd 1419static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
5561347a 1420 u32 index, int offset, struct bio *bio)
924bd88d
JM
1421{
1422 int ret;
37b51fdd 1423 struct page *page;
37b51fdd 1424
1f7319c7
MK
1425 page = bvec->bv_page;
1426 if (is_partial_io(bvec)) {
1427 /* Use a temporary buffer to decompress the page */
f575a5ad 1428 page = alloc_page(GFP_NOIO);
1f7319c7
MK
1429 if (!page)
1430 return -ENOMEM;
924bd88d
JM
1431 }
1432
ffb0a9e6 1433 ret = zram_read_page(zram, page, index, bio, is_partial_io(bvec));
1f7319c7
MK
1434 if (unlikely(ret))
1435 goto out;
7e5a5104 1436
f575a5ad
CH
1437 if (is_partial_io(bvec))
1438 memcpy_to_bvec(bvec, page_address(page) + offset);
1f7319c7 1439out:
37b51fdd 1440 if (is_partial_io(bvec))
1f7319c7 1441 __free_page(page);
37b51fdd 1442
37b51fdd 1443 return ret;
924bd88d
JM
1444}
1445
db8ffbd4
MK
1446static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1447 u32 index, struct bio *bio)
306b0c95 1448{
ae85a807 1449 int ret = 0;
1f7319c7 1450 unsigned long alloced_pages;
37887783 1451 unsigned long handle = -ENOMEM;
97ec7c8b
MK
1452 unsigned int comp_len = 0;
1453 void *src, *dst, *mem;
1454 struct zcomp_strm *zstrm;
1455 struct page *page = bvec->bv_page;
1456 unsigned long element = 0;
1457 enum zram_pageflags flags = 0;
1458
1459 mem = kmap_atomic(page);
1460 if (page_same_filled(mem, &element)) {
1461 kunmap_atomic(mem);
1462 /* Free memory associated with this sector now. */
1463 flags = ZRAM_SAME;
1464 atomic64_inc(&zram->stats.same_pages);
1465 goto out;
1466 }
1467 kunmap_atomic(mem);
924bd88d 1468
37887783 1469compress_again:
7ac07a26 1470 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
1f7319c7 1471 src = kmap_atomic(page);
97ec7c8b 1472 ret = zcomp_compress(zstrm, src, &comp_len);
1f7319c7 1473 kunmap_atomic(src);
306b0c95 1474
b7ca232e 1475 if (unlikely(ret)) {
7ac07a26 1476 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
8c921b2b 1477 pr_err("Compression failed! err=%d\n", ret);
37887783 1478 zs_free(zram->mem_pool, handle);
1f7319c7 1479 return ret;
8c921b2b 1480 }
da9556a2 1481
a939888e 1482 if (comp_len >= huge_class_size)
89e85bce 1483 comp_len = PAGE_SIZE;
37887783
JS
1484 /*
1485 * handle allocation has 2 paths:
1486 * a) fast path is executed with preemption disabled (for
1487 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1488 * since we can't sleep;
1489 * b) slow path enables preemption and attempts to allocate
1490 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1491 * put per-cpu compression stream and, thus, to re-do
1492 * the compression once handle is allocated.
1493 *
1494 * if we have a 'non-null' handle here then we are coming
1495 * from the slow path and handle has already been allocated.
1496 */
f24ee92c 1497 if (IS_ERR_VALUE(handle))
37887783
JS
1498 handle = zs_malloc(zram->mem_pool, comp_len,
1499 __GFP_KSWAPD_RECLAIM |
1500 __GFP_NOWARN |
1501 __GFP_HIGHMEM |
1502 __GFP_MOVABLE);
f24ee92c 1503 if (IS_ERR_VALUE(handle)) {
7ac07a26 1504 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
37887783
JS
1505 atomic64_inc(&zram->stats.writestall);
1506 handle = zs_malloc(zram->mem_pool, comp_len,
1507 GFP_NOIO | __GFP_HIGHMEM |
1508 __GFP_MOVABLE);
f24ee92c 1509 if (IS_ERR_VALUE(handle))
641608f3
AR
1510 return PTR_ERR((void *)handle);
1511
1512 if (comp_len != PAGE_SIZE)
37887783 1513 goto compress_again;
641608f3 1514 /*
f9bceb2f
SS
1515 * If the page is not compressible, you need to acquire the
1516 * lock and execute the code below. The zcomp_stream_get()
1517 * call is needed to disable the cpu hotplug and grab the
1518 * zstrm buffer back. It is necessary that the dereferencing
1519 * of the zstrm variable below occurs correctly.
641608f3 1520 */
7ac07a26 1521 zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
8c921b2b 1522 }
9ada9da9 1523
beb6602c 1524 alloced_pages = zs_get_total_pages(zram->mem_pool);
12372755
SS
1525 update_used_max(zram, alloced_pages);
1526
461a8eee 1527 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
7ac07a26 1528 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
beb6602c 1529 zs_free(zram->mem_pool, handle);
1f7319c7
MK
1530 return -ENOMEM;
1531 }
1532
beb6602c 1533 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1f7319c7
MK
1534
1535 src = zstrm->buffer;
1536 if (comp_len == PAGE_SIZE)
397c6066 1537 src = kmap_atomic(page);
1f7319c7
MK
1538 memcpy(dst, src, comp_len);
1539 if (comp_len == PAGE_SIZE)
397c6066 1540 kunmap_atomic(src);
306b0c95 1541
7ac07a26 1542 zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
beb6602c 1543 zs_unmap_object(zram->mem_pool, handle);
4ebbe7f7
MK
1544 atomic64_add(comp_len, &zram->stats.compr_data_size);
1545out:
f40ac2ae
SS
1546 /*
1547 * Free memory associated with this sector
1548 * before overwriting unused sectors.
1549 */
86c49814 1550 zram_slot_lock(zram, index);
f40ac2ae 1551 zram_free_page(zram, index);
db8ffbd4 1552
89e85bce
MK
1553 if (comp_len == PAGE_SIZE) {
1554 zram_set_flag(zram, index, ZRAM_HUGE);
1555 atomic64_inc(&zram->stats.huge_pages);
194e28da 1556 atomic64_inc(&zram->stats.huge_pages_since);
89e85bce
MK
1557 }
1558
db8ffbd4
MK
1559 if (flags) {
1560 zram_set_flag(zram, index, flags);
4ebbe7f7 1561 zram_set_element(zram, index, element);
db8ffbd4 1562 } else {
4ebbe7f7
MK
1563 zram_set_handle(zram, index, handle);
1564 zram_set_obj_size(zram, index, comp_len);
1565 }
86c49814 1566 zram_slot_unlock(zram, index);
306b0c95 1567
8c921b2b 1568 /* Update stats */
90a7806e 1569 atomic64_inc(&zram->stats.pages_stored);
ae85a807 1570 return ret;
1f7319c7
MK
1571}
1572
1573static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
db8ffbd4 1574 u32 index, int offset, struct bio *bio)
1f7319c7
MK
1575{
1576 int ret;
1577 struct page *page = NULL;
1f7319c7
MK
1578 struct bio_vec vec;
1579
1580 vec = *bvec;
1581 if (is_partial_io(bvec)) {
1f7319c7
MK
1582 /*
1583 * This is a partial IO. We need to read the full page
1584 * before to write the changes.
1585 */
f575a5ad 1586 page = alloc_page(GFP_NOIO);
1f7319c7
MK
1587 if (!page)
1588 return -ENOMEM;
1589
ffb0a9e6 1590 ret = zram_read_page(zram, page, index, bio, true);
1f7319c7
MK
1591 if (ret)
1592 goto out;
1593
f575a5ad 1594 memcpy_from_bvec(page_address(page) + offset, bvec);
1f7319c7 1595
13ae4db0 1596 bvec_set_page(&vec, page, PAGE_SIZE, 0);
1f7319c7
MK
1597 }
1598
db8ffbd4 1599 ret = __zram_bvec_write(zram, &vec, index, bio);
924bd88d 1600out:
397c6066 1601 if (is_partial_io(bvec))
1f7319c7 1602 __free_page(page);
924bd88d 1603 return ret;
8c921b2b
JM
1604}
1605
84b33bf7
SS
1606#ifdef CONFIG_ZRAM_MULTI_COMP
1607/*
1608 * This function will decompress (unless it's ZRAM_HUGE) the page and then
1609 * attempt to compress it using provided compression algorithm priority
1610 * (which is potentially more effective).
1611 *
1612 * Corresponding ZRAM slot should be locked.
1613 */
1614static int zram_recompress(struct zram *zram, u32 index, struct page *page,
1615 u32 threshold, u32 prio, u32 prio_max)
1616{
1617 struct zcomp_strm *zstrm = NULL;
1618 unsigned long handle_old;
1619 unsigned long handle_new;
1620 unsigned int comp_len_old;
1621 unsigned int comp_len_new;
7c2af309
AR
1622 unsigned int class_index_old;
1623 unsigned int class_index_new;
a55cf964 1624 u32 num_recomps = 0;
84b33bf7
SS
1625 void *src, *dst;
1626 int ret;
1627
1628 handle_old = zram_get_handle(zram, index);
1629 if (!handle_old)
1630 return -EINVAL;
1631
1632 comp_len_old = zram_get_obj_size(zram, index);
1633 /*
1634 * Do not recompress objects that are already "small enough".
1635 */
1636 if (comp_len_old < threshold)
1637 return 0;
1638
1639 ret = zram_read_from_zspool(zram, page, index);
1640 if (ret)
1641 return ret;
1642
7c2af309 1643 class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
84b33bf7
SS
1644 /*
1645 * Iterate the secondary comp algorithms list (in order of priority)
1646 * and try to recompress the page.
1647 */
1648 for (; prio < prio_max; prio++) {
1649 if (!zram->comps[prio])
1650 continue;
1651
1652 /*
1653 * Skip if the object is already re-compressed with a higher
1654 * priority algorithm (or same algorithm).
1655 */
1656 if (prio <= zram_get_priority(zram, index))
1657 continue;
1658
a55cf964 1659 num_recomps++;
84b33bf7
SS
1660 zstrm = zcomp_stream_get(zram->comps[prio]);
1661 src = kmap_atomic(page);
1662 ret = zcomp_compress(zstrm, src, &comp_len_new);
1663 kunmap_atomic(src);
1664
1665 if (ret) {
1666 zcomp_stream_put(zram->comps[prio]);
1667 return ret;
1668 }
1669
7c2af309
AR
1670 class_index_new = zs_lookup_class_index(zram->mem_pool,
1671 comp_len_new);
1672
84b33bf7 1673 /* Continue until we make progress */
4942cf6a 1674 if (class_index_new >= class_index_old ||
84b33bf7
SS
1675 (threshold && comp_len_new >= threshold)) {
1676 zcomp_stream_put(zram->comps[prio]);
1677 continue;
1678 }
1679
1680 /* Recompression was successful so break out */
1681 break;
1682 }
1683
1684 /*
1685 * We did not try to recompress, e.g. when we have only one
1686 * secondary algorithm and the page is already recompressed
1687 * using that algorithm
1688 */
1689 if (!zstrm)
1690 return 0;
1691
4942cf6a 1692 if (class_index_new >= class_index_old) {
a55cf964
SS
1693 /*
1694 * Secondary algorithms failed to re-compress the page
1695 * in a way that would save memory, mark the object as
1696 * incompressible so that we will not try to compress
1697 * it again.
1698 *
1699 * We need to make sure that all secondary algorithms have
1700 * failed, so we test if the number of recompressions matches
1701 * the number of active secondary algorithms.
1702 */
1703 if (num_recomps == zram->num_active_comps - 1)
1704 zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
84b33bf7
SS
1705 return 0;
1706 }
1707
1708 /* Successful recompression but above threshold */
1709 if (threshold && comp_len_new >= threshold)
1710 return 0;
1711
1712 /*
1713 * No direct reclaim (slow path) for handle allocation and no
1714 * re-compression attempt (unlike in __zram_bvec_write()) since
1715 * we already have stored that object in zsmalloc. If we cannot
1716 * alloc memory for recompressed object then we bail out and
1717 * simply keep the old (existing) object in zsmalloc.
1718 */
1719 handle_new = zs_malloc(zram->mem_pool, comp_len_new,
1720 __GFP_KSWAPD_RECLAIM |
1721 __GFP_NOWARN |
1722 __GFP_HIGHMEM |
1723 __GFP_MOVABLE);
1724 if (IS_ERR_VALUE(handle_new)) {
1725 zcomp_stream_put(zram->comps[prio]);
1726 return PTR_ERR((void *)handle_new);
1727 }
1728
1729 dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
1730 memcpy(dst, zstrm->buffer, comp_len_new);
1731 zcomp_stream_put(zram->comps[prio]);
1732
1733 zs_unmap_object(zram->mem_pool, handle_new);
1734
1735 zram_free_page(zram, index);
1736 zram_set_handle(zram, index, handle_new);
1737 zram_set_obj_size(zram, index, comp_len_new);
1738 zram_set_priority(zram, index, prio);
1739
1740 atomic64_add(comp_len_new, &zram->stats.compr_data_size);
1741 atomic64_inc(&zram->stats.pages_stored);
1742
1743 return 0;
1744}
1745
1746#define RECOMPRESS_IDLE (1 << 0)
1747#define RECOMPRESS_HUGE (1 << 1)
1748
1749static ssize_t recompress_store(struct device *dev,
1750 struct device_attribute *attr,
1751 const char *buf, size_t len)
1752{
a55cf964 1753 u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
84b33bf7 1754 struct zram *zram = dev_to_zram(dev);
84b33bf7 1755 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
a55cf964
SS
1756 char *args, *param, *val, *algo = NULL;
1757 u32 mode = 0, threshold = 0;
84b33bf7
SS
1758 unsigned long index;
1759 struct page *page;
1760 ssize_t ret;
1761
1762 args = skip_spaces(buf);
1763 while (*args) {
1764 args = next_arg(args, &param, &val);
1765
df32de14 1766 if (!val || !*val)
84b33bf7
SS
1767 return -EINVAL;
1768
1769 if (!strcmp(param, "type")) {
1770 if (!strcmp(val, "idle"))
1771 mode = RECOMPRESS_IDLE;
1772 if (!strcmp(val, "huge"))
1773 mode = RECOMPRESS_HUGE;
1774 if (!strcmp(val, "huge_idle"))
1775 mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
1776 continue;
1777 }
1778
1779 if (!strcmp(param, "threshold")) {
1780 /*
1781 * We will re-compress only idle objects equal or
1782 * greater in size than watermark.
1783 */
1784 ret = kstrtouint(val, 10, &threshold);
1785 if (ret)
1786 return ret;
1787 continue;
1788 }
a55cf964
SS
1789
1790 if (!strcmp(param, "algo")) {
1791 algo = val;
1792 continue;
1793 }
84b33bf7
SS
1794 }
1795
1796 if (threshold >= PAGE_SIZE)
1797 return -EINVAL;
1798
1799 down_read(&zram->init_lock);
1800 if (!init_done(zram)) {
1801 ret = -EINVAL;
1802 goto release_init_lock;
1803 }
1804
a55cf964
SS
1805 if (algo) {
1806 bool found = false;
1807
1808 for (; prio < ZRAM_MAX_COMPS; prio++) {
1809 if (!zram->comp_algs[prio])
1810 continue;
1811
1812 if (!strcmp(zram->comp_algs[prio], algo)) {
1813 prio_max = min(prio + 1, ZRAM_MAX_COMPS);
1814 found = true;
1815 break;
1816 }
1817 }
1818
1819 if (!found) {
1820 ret = -EINVAL;
1821 goto release_init_lock;
1822 }
1823 }
1824
84b33bf7
SS
1825 page = alloc_page(GFP_KERNEL);
1826 if (!page) {
1827 ret = -ENOMEM;
1828 goto release_init_lock;
1829 }
1830
1831 ret = len;
1832 for (index = 0; index < nr_pages; index++) {
1833 int err = 0;
1834
1835 zram_slot_lock(zram, index);
1836
1837 if (!zram_allocated(zram, index))
1838 goto next;
1839
1840 if (mode & RECOMPRESS_IDLE &&
1841 !zram_test_flag(zram, index, ZRAM_IDLE))
1842 goto next;
1843
1844 if (mode & RECOMPRESS_HUGE &&
1845 !zram_test_flag(zram, index, ZRAM_HUGE))
1846 goto next;
1847
1848 if (zram_test_flag(zram, index, ZRAM_WB) ||
1849 zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
1850 zram_test_flag(zram, index, ZRAM_SAME) ||
1851 zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
1852 goto next;
1853
1854 err = zram_recompress(zram, index, page, threshold,
a55cf964 1855 prio, prio_max);
84b33bf7
SS
1856next:
1857 zram_slot_unlock(zram, index);
1858 if (err) {
1859 ret = err;
1860 break;
1861 }
1862
1863 cond_resched();
1864 }
1865
1866 __free_page(page);
1867
1868release_init_lock:
1869 up_read(&zram->init_lock);
1870 return ret;
1871}
1872#endif
1873
0120dd6e 1874static void zram_bio_discard(struct zram *zram, struct bio *bio)
f4659d8e
JK
1875{
1876 size_t n = bio->bi_iter.bi_size;
0120dd6e
CH
1877 u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1878 u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
1879 SECTOR_SHIFT;
f4659d8e
JK
1880
1881 /*
1882 * zram manages data in physical block size units. Because logical block
1883 * size isn't identical with physical block size on some arch, we
1884 * could get a discard request pointing to a specific offset within a
1885 * certain physical block. Although we can handle this request by
1886 * reading that physiclal block and decompressing and partially zeroing
1887 * and re-compressing and then re-storing it, this isn't reasonable
1888 * because our intent with a discard request is to save memory. So
1889 * skipping this logical block is appropriate here.
1890 */
1891 if (offset) {
38515c73 1892 if (n <= (PAGE_SIZE - offset))
f4659d8e
JK
1893 return;
1894
38515c73 1895 n -= (PAGE_SIZE - offset);
f4659d8e
JK
1896 index++;
1897 }
1898
1899 while (n >= PAGE_SIZE) {
86c49814 1900 zram_slot_lock(zram, index);
f4659d8e 1901 zram_free_page(zram, index);
86c49814 1902 zram_slot_unlock(zram, index);
015254da 1903 atomic64_inc(&zram->stats.notify_free);
f4659d8e
JK
1904 index++;
1905 n -= PAGE_SIZE;
1906 }
0120dd6e
CH
1907
1908 bio_endio(bio);
f4659d8e
JK
1909}
1910
82ca875d 1911static void zram_bio_read(struct zram *zram, struct bio *bio)
9b3bb7ab 1912{
82ca875d
CH
1913 struct bvec_iter iter;
1914 struct bio_vec bv;
1915 unsigned long start_time;
9b3bb7ab 1916
82ca875d
CH
1917 start_time = bio_start_io_acct(bio);
1918 bio_for_each_segment(bv, bio, iter) {
1919 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1920 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
1921 SECTOR_SHIFT;
1922
1923 if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
57de7bd8 1924 atomic64_inc(&zram->stats.failed_reads);
82ca875d
CH
1925 bio->bi_status = BLK_STS_IOERR;
1926 break;
57de7bd8 1927 }
82ca875d 1928 flush_dcache_page(bv.bv_page);
9b3bb7ab 1929
82ca875d
CH
1930 zram_slot_lock(zram, index);
1931 zram_accessed(zram, index);
1932 zram_slot_unlock(zram, index);
1933 }
1934 bio_end_io_acct(bio, start_time);
1935 bio_endio(bio);
8c921b2b
JM
1936}
1937
82ca875d 1938static void zram_bio_write(struct zram *zram, struct bio *bio)
8c921b2b 1939{
7988613b 1940 struct bvec_iter iter;
af8b04c6 1941 struct bio_vec bv;
d7614e44 1942 unsigned long start_time;
8c921b2b 1943
d7614e44 1944 start_time = bio_start_io_acct(bio);
af8b04c6
CH
1945 bio_for_each_segment(bv, bio, iter) {
1946 u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1947 u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
1948 SECTOR_SHIFT;
1949
82ca875d
CH
1950 if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
1951 atomic64_inc(&zram->stats.failed_writes);
af8b04c6
CH
1952 bio->bi_status = BLK_STS_IOERR;
1953 break;
1954 }
82ca875d
CH
1955
1956 zram_slot_lock(zram, index);
1957 zram_accessed(zram, index);
1958 zram_slot_unlock(zram, index);
a1dd52af 1959 }
d7614e44 1960 bio_end_io_acct(bio, start_time);
4246a0b6 1961 bio_endio(bio);
306b0c95
NG
1962}
1963
306b0c95 1964/*
f1e3cfff 1965 * Handler function for all zram I/O requests.
306b0c95 1966 */
3e08773c 1967static void zram_submit_bio(struct bio *bio)
306b0c95 1968{
309dca30 1969 struct zram *zram = bio->bi_bdev->bd_disk->private_data;
306b0c95 1970
d6eea009
CH
1971 switch (bio_op(bio)) {
1972 case REQ_OP_READ:
82ca875d
CH
1973 zram_bio_read(zram, bio);
1974 break;
d6eea009 1975 case REQ_OP_WRITE:
82ca875d 1976 zram_bio_write(zram, bio);
d6eea009
CH
1977 break;
1978 case REQ_OP_DISCARD:
1979 case REQ_OP_WRITE_ZEROES:
1980 zram_bio_discard(zram, bio);
1981 break;
1982 default:
1983 WARN_ON_ONCE(1);
1984 bio_endio(bio);
1985 }
306b0c95
NG
1986}
1987
2ccbec05
NG
1988static void zram_slot_free_notify(struct block_device *bdev,
1989 unsigned long index)
107c161b 1990{
f1e3cfff 1991 struct zram *zram;
107c161b 1992
f1e3cfff 1993 zram = bdev->bd_disk->private_data;
a0c516cb 1994
3c9959e0
MK
1995 atomic64_inc(&zram->stats.notify_free);
1996 if (!zram_slot_trylock(zram, index)) {
1997 atomic64_inc(&zram->stats.miss_free);
1998 return;
1999 }
2000
f614a9f4 2001 zram_free_page(zram, index);
86c49814 2002 zram_slot_unlock(zram, index);
107c161b
NG
2003}
2004
7ac07a26
SS
2005static void zram_destroy_comps(struct zram *zram)
2006{
2007 u32 prio;
2008
2009 for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
2010 struct zcomp *comp = zram->comps[prio];
2011
2012 zram->comps[prio] = NULL;
2013 if (!comp)
2014 continue;
2015 zcomp_destroy(comp);
a55cf964 2016 zram->num_active_comps--;
7ac07a26
SS
2017 }
2018}
2019
522698d7
SS
2020static void zram_reset_device(struct zram *zram)
2021{
522698d7 2022 down_write(&zram->init_lock);
9b3bb7ab 2023
522698d7
SS
2024 zram->limit_pages = 0;
2025
2026 if (!init_done(zram)) {
2027 up_write(&zram->init_lock);
2028 return;
2029 }
2030
6e017a39 2031 set_capacity_and_notify(zram->disk, 0);
8446fe92 2032 part_stat_set_all(zram->disk->part0, 0);
522698d7 2033
522698d7 2034 /* I/O operation under all of CPU are done so let's free */
6d2453c3
SS
2035 zram_meta_free(zram, zram->disksize);
2036 zram->disksize = 0;
7ac07a26 2037 zram_destroy_comps(zram);
302128dc 2038 memset(&zram->stats, 0, sizeof(zram->stats));
013bf95a 2039 reset_bdev(zram);
6f163779 2040
7ac07a26 2041 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
6f163779 2042 up_write(&zram->init_lock);
522698d7
SS
2043}
2044
2045static ssize_t disksize_store(struct device *dev,
2046 struct device_attribute *attr, const char *buf, size_t len)
2f6a3bed 2047{
522698d7
SS
2048 u64 disksize;
2049 struct zcomp *comp;
2f6a3bed 2050 struct zram *zram = dev_to_zram(dev);
522698d7 2051 int err;
7ac07a26 2052 u32 prio;
2f6a3bed 2053
522698d7
SS
2054 disksize = memparse(buf, NULL);
2055 if (!disksize)
2056 return -EINVAL;
2f6a3bed 2057
beb6602c
MK
2058 down_write(&zram->init_lock);
2059 if (init_done(zram)) {
2060 pr_info("Cannot change disksize for initialized device\n");
2061 err = -EBUSY;
2062 goto out_unlock;
2063 }
2064
522698d7 2065 disksize = PAGE_ALIGN(disksize);
beb6602c
MK
2066 if (!zram_meta_alloc(zram, disksize)) {
2067 err = -ENOMEM;
2068 goto out_unlock;
2069 }
522698d7 2070
7ac07a26
SS
2071 for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
2072 if (!zram->comp_algs[prio])
2073 continue;
2074
2075 comp = zcomp_create(zram->comp_algs[prio]);
2076 if (IS_ERR(comp)) {
2077 pr_err("Cannot initialise %s compressing backend\n",
2078 zram->comp_algs[prio]);
2079 err = PTR_ERR(comp);
2080 goto out_free_comps;
2081 }
522698d7 2082
7ac07a26 2083 zram->comps[prio] = comp;
a55cf964 2084 zram->num_active_comps++;
7ac07a26 2085 }
522698d7 2086 zram->disksize = disksize;
6e017a39 2087 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
e7ccfc4c 2088 up_write(&zram->init_lock);
522698d7
SS
2089
2090 return len;
2091
7ac07a26
SS
2092out_free_comps:
2093 zram_destroy_comps(zram);
beb6602c
MK
2094 zram_meta_free(zram, disksize);
2095out_unlock:
2096 up_write(&zram->init_lock);
522698d7 2097 return err;
2f6a3bed
SS
2098}
2099
522698d7
SS
2100static ssize_t reset_store(struct device *dev,
2101 struct device_attribute *attr, const char *buf, size_t len)
4f2109f6 2102{
522698d7
SS
2103 int ret;
2104 unsigned short do_reset;
2105 struct zram *zram;
d666e20e 2106 struct gendisk *disk;
4f2109f6 2107
f405c445
SS
2108 ret = kstrtou16(buf, 10, &do_reset);
2109 if (ret)
2110 return ret;
2111
2112 if (!do_reset)
2113 return -EINVAL;
2114
522698d7 2115 zram = dev_to_zram(dev);
d666e20e 2116 disk = zram->disk;
4f2109f6 2117
d666e20e 2118 mutex_lock(&disk->open_mutex);
f405c445 2119 /* Do not reset an active device or claimed device */
dbdc1be3 2120 if (disk_openers(disk) || zram->claim) {
d666e20e 2121 mutex_unlock(&disk->open_mutex);
f405c445 2122 return -EBUSY;
522698d7
SS
2123 }
2124
f405c445
SS
2125 /* From now on, anyone can't open /dev/zram[0-9] */
2126 zram->claim = true;
d666e20e 2127 mutex_unlock(&disk->open_mutex);
522698d7 2128
f405c445 2129 /* Make sure all the pending I/O are finished */
d666e20e 2130 sync_blockdev(disk->part0);
522698d7 2131 zram_reset_device(zram);
522698d7 2132
d666e20e 2133 mutex_lock(&disk->open_mutex);
f405c445 2134 zram->claim = false;
d666e20e 2135 mutex_unlock(&disk->open_mutex);
f405c445 2136
522698d7 2137 return len;
f405c445
SS
2138}
2139
2140static int zram_open(struct block_device *bdev, fmode_t mode)
2141{
2142 int ret = 0;
2143 struct zram *zram;
2144
a8698707 2145 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
f405c445
SS
2146
2147 zram = bdev->bd_disk->private_data;
2148 /* zram was claimed to reset so open request fails */
2149 if (zram->claim)
2150 ret = -EBUSY;
4f2109f6
SS
2151
2152 return ret;
2153}
2154
522698d7 2155static const struct block_device_operations zram_devops = {
f405c445 2156 .open = zram_open,
c62b37d9 2157 .submit_bio = zram_submit_bio,
522698d7 2158 .swap_slot_free_notify = zram_slot_free_notify,
522698d7
SS
2159 .owner = THIS_MODULE
2160};
2161
2162static DEVICE_ATTR_WO(compact);
2163static DEVICE_ATTR_RW(disksize);
2164static DEVICE_ATTR_RO(initstate);
2165static DEVICE_ATTR_WO(reset);
c87d1655
SS
2166static DEVICE_ATTR_WO(mem_limit);
2167static DEVICE_ATTR_WO(mem_used_max);
e82592c4 2168static DEVICE_ATTR_WO(idle);
522698d7
SS
2169static DEVICE_ATTR_RW(max_comp_streams);
2170static DEVICE_ATTR_RW(comp_algorithm);
013bf95a
MK
2171#ifdef CONFIG_ZRAM_WRITEBACK
2172static DEVICE_ATTR_RW(backing_dev);
a939888e 2173static DEVICE_ATTR_WO(writeback);
bb416d18 2174static DEVICE_ATTR_RW(writeback_limit);
1d69a3f8 2175static DEVICE_ATTR_RW(writeback_limit_enable);
013bf95a 2176#endif
001d9273
SS
2177#ifdef CONFIG_ZRAM_MULTI_COMP
2178static DEVICE_ATTR_RW(recomp_algorithm);
84b33bf7 2179static DEVICE_ATTR_WO(recompress);
001d9273 2180#endif
a68eb3b6 2181
9b3bb7ab
SS
2182static struct attribute *zram_disk_attrs[] = {
2183 &dev_attr_disksize.attr,
2184 &dev_attr_initstate.attr,
2185 &dev_attr_reset.attr,
99ebbd30 2186 &dev_attr_compact.attr,
9ada9da9 2187 &dev_attr_mem_limit.attr,
461a8eee 2188 &dev_attr_mem_used_max.attr,
e82592c4 2189 &dev_attr_idle.attr,
beca3ec7 2190 &dev_attr_max_comp_streams.attr,
e46b8a03 2191 &dev_attr_comp_algorithm.attr,
013bf95a
MK
2192#ifdef CONFIG_ZRAM_WRITEBACK
2193 &dev_attr_backing_dev.attr,
a939888e 2194 &dev_attr_writeback.attr,
bb416d18 2195 &dev_attr_writeback_limit.attr,
1d69a3f8 2196 &dev_attr_writeback_limit_enable.attr,
013bf95a 2197#endif
2f6a3bed 2198 &dev_attr_io_stat.attr,
4f2109f6 2199 &dev_attr_mm_stat.attr,
23eddf39
MK
2200#ifdef CONFIG_ZRAM_WRITEBACK
2201 &dev_attr_bd_stat.attr,
2202#endif
623e47fc 2203 &dev_attr_debug_stat.attr,
001d9273
SS
2204#ifdef CONFIG_ZRAM_MULTI_COMP
2205 &dev_attr_recomp_algorithm.attr,
84b33bf7 2206 &dev_attr_recompress.attr,
001d9273 2207#endif
9b3bb7ab
SS
2208 NULL,
2209};
2210
7f0d2672 2211ATTRIBUTE_GROUPS(zram_disk);
98af4d4d 2212
92ff1528
SS
2213/*
2214 * Allocate and initialize new zram device. the function returns
2215 * '>= 0' device_id upon success, and negative value otherwise.
2216 */
2217static int zram_add(void)
306b0c95 2218{
85508ec6 2219 struct zram *zram;
92ff1528 2220 int ret, device_id;
85508ec6
SS
2221
2222 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
2223 if (!zram)
2224 return -ENOMEM;
2225
92ff1528 2226 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
85508ec6
SS
2227 if (ret < 0)
2228 goto out_free_dev;
92ff1528 2229 device_id = ret;
de1a21a0 2230
0900beae 2231 init_rwsem(&zram->init_lock);
1d69a3f8
MK
2232#ifdef CONFIG_ZRAM_WRITEBACK
2233 spin_lock_init(&zram->wb_limit_lock);
2234#endif
306b0c95 2235
85508ec6 2236 /* gendisk structure */
7681750b 2237 zram->disk = blk_alloc_disk(NUMA_NO_NODE);
f1e3cfff 2238 if (!zram->disk) {
70864969 2239 pr_err("Error allocating disk structure for device %d\n",
306b0c95 2240 device_id);
201c7b72 2241 ret = -ENOMEM;
7681750b 2242 goto out_free_idr;
306b0c95
NG
2243 }
2244
f1e3cfff
NG
2245 zram->disk->major = zram_major;
2246 zram->disk->first_minor = device_id;
7681750b 2247 zram->disk->minors = 1;
1ebe2e5f 2248 zram->disk->flags |= GENHD_FL_NO_PART;
f1e3cfff 2249 zram->disk->fops = &zram_devops;
f1e3cfff
NG
2250 zram->disk->private_data = zram;
2251 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
306b0c95 2252
071acb30 2253 /* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
f1e3cfff 2254 set_capacity(zram->disk, 0);
b67d1ec1 2255 /* zram devices sort of resembles non-rotational disks */
8b904b5b 2256 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
3222d8c2 2257 blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
8b904b5b 2258 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
e447a015 2259
a1dd52af
NG
2260 /*
2261 * To ensure that we always get PAGE_SIZE aligned
2262 * and n*PAGE_SIZED sized I/O requests.
2263 */
f1e3cfff 2264 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
7b19b8d4
RJ
2265 blk_queue_logical_block_size(zram->disk->queue,
2266 ZRAM_LOGICAL_BLOCK_SIZE);
f1e3cfff
NG
2267 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
2268 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
f4659d8e 2269 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
2bb4cd5c 2270 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
31edeacd 2271
f4659d8e
JK
2272 /*
2273 * zram_bio_discard() will clear all logical blocks if logical block
2274 * size is identical with physical block size(PAGE_SIZE). But if it is
2275 * different, we will skip discarding some parts of logical blocks in
2276 * the part of the request range which isn't aligned to physical block
2277 * size. So we can't ensure that all discarded logical blocks are
2278 * zeroed.
2279 */
2280 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
31edeacd 2281 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
5d83d5a0 2282
37887783 2283 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
7f0d2672 2284 ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
5e2e1cc4
LC
2285 if (ret)
2286 goto out_cleanup_disk;
98af4d4d 2287
001d9273 2288 comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
d12b63c9 2289
c0265342 2290 zram_debugfs_register(zram);
d12b63c9 2291 pr_info("Added device: %s\n", zram->disk->disk_name);
92ff1528 2292 return device_id;
de1a21a0 2293
5e2e1cc4 2294out_cleanup_disk:
8b9ab626 2295 put_disk(zram->disk);
85508ec6
SS
2296out_free_idr:
2297 idr_remove(&zram_index_idr, device_id);
2298out_free_dev:
2299 kfree(zram);
de1a21a0 2300 return ret;
306b0c95
NG
2301}
2302
6566d1a3 2303static int zram_remove(struct zram *zram)
306b0c95 2304{
8c54499a 2305 bool claimed;
6566d1a3 2306
7a86d6dc 2307 mutex_lock(&zram->disk->open_mutex);
dbdc1be3 2308 if (disk_openers(zram->disk)) {
7a86d6dc 2309 mutex_unlock(&zram->disk->open_mutex);
6566d1a3
SS
2310 return -EBUSY;
2311 }
2312
8c54499a
ML
2313 claimed = zram->claim;
2314 if (!claimed)
2315 zram->claim = true;
7a86d6dc 2316 mutex_unlock(&zram->disk->open_mutex);
6566d1a3 2317
c0265342 2318 zram_debugfs_unregister(zram);
306b0c95 2319
8c54499a
ML
2320 if (claimed) {
2321 /*
2322 * If we were claimed by reset_store(), del_gendisk() will
2323 * wait until reset_store() is done, so nothing need to do.
2324 */
2325 ;
2326 } else {
2327 /* Make sure all the pending I/O are finished */
7a86d6dc 2328 sync_blockdev(zram->disk->part0);
8c54499a
ML
2329 zram_reset_device(zram);
2330 }
6566d1a3
SS
2331
2332 pr_info("Removed device: %s\n", zram->disk->disk_name);
2333
85508ec6 2334 del_gendisk(zram->disk);
8c54499a
ML
2335
2336 /* del_gendisk drains pending reset_store */
2337 WARN_ON_ONCE(claimed && zram->claim);
2338
5a4b6536
ML
2339 /*
2340 * disksize_store() may be called in between zram_reset_device()
2341 * and del_gendisk(), so run the last reset to avoid leaking
2342 * anything allocated with disksize_store()
2343 */
2344 zram_reset_device(zram);
2345
8b9ab626 2346 put_disk(zram->disk);
85508ec6 2347 kfree(zram);
6566d1a3
SS
2348 return 0;
2349}
2350
2351/* zram-control sysfs attributes */
27104a53
GKH
2352
2353/*
2354 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2355 * sense that reading from this file does alter the state of your system -- it
2356 * creates a new un-initialized zram device and returns back this device's
2357 * device_id (or an error code if it fails to create a new device).
2358 */
6566d1a3
SS
2359static ssize_t hot_add_show(struct class *class,
2360 struct class_attribute *attr,
2361 char *buf)
2362{
2363 int ret;
2364
2365 mutex_lock(&zram_index_mutex);
2366 ret = zram_add();
2367 mutex_unlock(&zram_index_mutex);
2368
2369 if (ret < 0)
2370 return ret;
2371 return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2372}
853eab68
WM
2373static struct class_attribute class_attr_hot_add =
2374 __ATTR(hot_add, 0400, hot_add_show, NULL);
6566d1a3
SS
2375
2376static ssize_t hot_remove_store(struct class *class,
2377 struct class_attribute *attr,
2378 const char *buf,
2379 size_t count)
2380{
2381 struct zram *zram;
2382 int ret, dev_id;
2383
2384 /* dev_id is gendisk->first_minor, which is `int' */
2385 ret = kstrtoint(buf, 10, &dev_id);
2386 if (ret)
2387 return ret;
2388 if (dev_id < 0)
2389 return -EINVAL;
2390
2391 mutex_lock(&zram_index_mutex);
2392
2393 zram = idr_find(&zram_index_idr, dev_id);
17ec4cd9 2394 if (zram) {
6566d1a3 2395 ret = zram_remove(zram);
529e71e1
TI
2396 if (!ret)
2397 idr_remove(&zram_index_idr, dev_id);
17ec4cd9 2398 } else {
6566d1a3 2399 ret = -ENODEV;
17ec4cd9 2400 }
6566d1a3
SS
2401
2402 mutex_unlock(&zram_index_mutex);
2403 return ret ? ret : count;
85508ec6 2404}
27104a53 2405static CLASS_ATTR_WO(hot_remove);
a096cafc 2406
27104a53
GKH
2407static struct attribute *zram_control_class_attrs[] = {
2408 &class_attr_hot_add.attr,
2409 &class_attr_hot_remove.attr,
2410 NULL,
6566d1a3 2411};
27104a53 2412ATTRIBUTE_GROUPS(zram_control_class);
6566d1a3
SS
2413
2414static struct class zram_control_class = {
2415 .name = "zram-control",
2416 .owner = THIS_MODULE,
27104a53 2417 .class_groups = zram_control_class_groups,
6566d1a3
SS
2418};
2419
85508ec6
SS
2420static int zram_remove_cb(int id, void *ptr, void *data)
2421{
8c54499a 2422 WARN_ON_ONCE(zram_remove(ptr));
85508ec6
SS
2423 return 0;
2424}
a096cafc 2425
85508ec6
SS
2426static void destroy_devices(void)
2427{
6566d1a3 2428 class_unregister(&zram_control_class);
85508ec6 2429 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
c0265342 2430 zram_debugfs_destroy();
85508ec6 2431 idr_destroy(&zram_index_idr);
a096cafc 2432 unregister_blkdev(zram_major, "zram");
1dd6c834 2433 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
306b0c95
NG
2434}
2435
f1e3cfff 2436static int __init zram_init(void)
306b0c95 2437{
92ff1528 2438 int ret;
306b0c95 2439
f635725c
SS
2440 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG);
2441
1dd6c834
AMG
2442 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2443 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2444 if (ret < 0)
2445 return ret;
2446
6566d1a3
SS
2447 ret = class_register(&zram_control_class);
2448 if (ret) {
70864969 2449 pr_err("Unable to register zram-control class\n");
1dd6c834 2450 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
6566d1a3
SS
2451 return ret;
2452 }
2453
c0265342 2454 zram_debugfs_create();
f1e3cfff
NG
2455 zram_major = register_blkdev(0, "zram");
2456 if (zram_major <= 0) {
70864969 2457 pr_err("Unable to get major number\n");
6566d1a3 2458 class_unregister(&zram_control_class);
1dd6c834 2459 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
a096cafc 2460 return -EBUSY;
306b0c95
NG
2461 }
2462
92ff1528 2463 while (num_devices != 0) {
6566d1a3 2464 mutex_lock(&zram_index_mutex);
92ff1528 2465 ret = zram_add();
6566d1a3 2466 mutex_unlock(&zram_index_mutex);
92ff1528 2467 if (ret < 0)
a096cafc 2468 goto out_error;
92ff1528 2469 num_devices--;
de1a21a0
NG
2470 }
2471
306b0c95 2472 return 0;
de1a21a0 2473
a096cafc 2474out_error:
85508ec6 2475 destroy_devices();
306b0c95
NG
2476 return ret;
2477}
2478
f1e3cfff 2479static void __exit zram_exit(void)
306b0c95 2480{
85508ec6 2481 destroy_devices();
306b0c95
NG
2482}
2483
f1e3cfff
NG
2484module_init(zram_init);
2485module_exit(zram_exit);
306b0c95 2486
9b3bb7ab 2487module_param(num_devices, uint, 0);
c3cdb40e 2488MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
9b3bb7ab 2489
306b0c95
NG
2490MODULE_LICENSE("Dual BSD/GPL");
2491MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
f1e3cfff 2492MODULE_DESCRIPTION("Compressed RAM Block Device");