Merge tag 'drm-fixes-2019-01-11' of git://anongit.freedesktop.org/drm/drm
[linux-2.6-block.git] / drivers / block / zram / zram_drv.c
CommitLineData
306b0c95 1/*
f1e3cfff 2 * Compressed RAM block device
306b0c95 3 *
1130ebba 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
7bfb3de8 5 * 2012, 2013 Minchan Kim
306b0c95
NG
6 *
7 * This code is released using a dual license strategy: BSD/GPL
8 * You can choose the licence that better fits your requirements.
9 *
10 * Released under the terms of 3-clause BSD License
11 * Released under the terms of GNU General Public License Version 2.0
12 *
306b0c95
NG
13 */
14
f1e3cfff 15#define KMSG_COMPONENT "zram"
306b0c95
NG
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/kernel.h>
8946a086 20#include <linux/bio.h>
306b0c95
NG
21#include <linux/bitops.h>
22#include <linux/blkdev.h>
23#include <linux/buffer_head.h>
24#include <linux/device.h>
25#include <linux/genhd.h>
26#include <linux/highmem.h>
5a0e3ad6 27#include <linux/slab.h>
b09ab054 28#include <linux/backing-dev.h>
306b0c95 29#include <linux/string.h>
306b0c95 30#include <linux/vmalloc.h>
fcfa8d95 31#include <linux/err.h>
85508ec6 32#include <linux/idr.h>
6566d1a3 33#include <linux/sysfs.h>
c0265342 34#include <linux/debugfs.h>
1dd6c834 35#include <linux/cpuhotplug.h>
306b0c95 36
16a4bfb9 37#include "zram_drv.h"
306b0c95 38
85508ec6 39static DEFINE_IDR(zram_index_idr);
6566d1a3
SS
40/* idr index must be protected */
41static DEFINE_MUTEX(zram_index_mutex);
42
f1e3cfff 43static int zram_major;
b7ca232e 44static const char *default_compressor = "lzo";
306b0c95 45
306b0c95 46/* Module params (documentation at end) */
ca3d70bd 47static unsigned int num_devices = 1;
60f5921a
SS
48/*
49 * Pages that compress to sizes equals or greater than this are stored
50 * uncompressed in memory.
51 */
52static size_t huge_class_size;
33863c21 53
1f7319c7 54static void zram_free_page(struct zram *zram, size_t index);
a939888e
MK
55static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
56 u32 index, int offset, struct bio *bio);
57
1f7319c7 58
3c9959e0
MK
59static int zram_slot_trylock(struct zram *zram, u32 index)
60{
7e529283 61 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
3c9959e0
MK
62}
63
c4d6c4cc
MK
64static void zram_slot_lock(struct zram *zram, u32 index)
65{
7e529283 66 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
67}
68
69static void zram_slot_unlock(struct zram *zram, u32 index)
70{
7e529283 71 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
c4d6c4cc
MK
72}
73
08eee69f 74static inline bool init_done(struct zram *zram)
be2d1d56 75{
08eee69f 76 return zram->disksize;
be2d1d56
SS
77}
78
9b3bb7ab
SS
79static inline struct zram *dev_to_zram(struct device *dev)
80{
81 return (struct zram *)dev_to_disk(dev)->private_data;
82}
83
643ae61d
MK
84static unsigned long zram_get_handle(struct zram *zram, u32 index)
85{
86 return zram->table[index].handle;
87}
88
89static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
90{
91 zram->table[index].handle = handle;
92}
93
b31177f2 94/* flag operations require table entry bit_spin_lock() being held */
c0265342 95static bool zram_test_flag(struct zram *zram, u32 index,
522698d7 96 enum zram_pageflags flag)
99ebbd30 97{
7e529283 98 return zram->table[index].flags & BIT(flag);
522698d7 99}
99ebbd30 100
beb6602c 101static void zram_set_flag(struct zram *zram, u32 index,
522698d7
SS
102 enum zram_pageflags flag)
103{
7e529283 104 zram->table[index].flags |= BIT(flag);
522698d7 105}
99ebbd30 106
beb6602c 107static void zram_clear_flag(struct zram *zram, u32 index,
522698d7
SS
108 enum zram_pageflags flag)
109{
7e529283 110 zram->table[index].flags &= ~BIT(flag);
522698d7 111}
99ebbd30 112
beb6602c 113static inline void zram_set_element(struct zram *zram, u32 index,
8e19d540 114 unsigned long element)
115{
beb6602c 116 zram->table[index].element = element;
8e19d540 117}
118
643ae61d 119static unsigned long zram_get_element(struct zram *zram, u32 index)
8e19d540 120{
643ae61d 121 return zram->table[index].element;
8e19d540 122}
123
beb6602c 124static size_t zram_get_obj_size(struct zram *zram, u32 index)
522698d7 125{
7e529283 126 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1);
99ebbd30
AM
127}
128
beb6602c 129static void zram_set_obj_size(struct zram *zram,
522698d7 130 u32 index, size_t size)
9b3bb7ab 131{
7e529283 132 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT;
9b3bb7ab 133
7e529283 134 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size;
522698d7
SS
135}
136
a939888e
MK
137static inline bool zram_allocated(struct zram *zram, u32 index)
138{
139 return zram_get_obj_size(zram, index) ||
140 zram_test_flag(zram, index, ZRAM_SAME) ||
141 zram_test_flag(zram, index, ZRAM_WB);
142}
143
1f7319c7 144#if PAGE_SIZE != 4096
1c53e0d2 145static inline bool is_partial_io(struct bio_vec *bvec)
522698d7
SS
146{
147 return bvec->bv_len != PAGE_SIZE;
148}
1f7319c7
MK
149#else
150static inline bool is_partial_io(struct bio_vec *bvec)
151{
152 return false;
153}
154#endif
522698d7
SS
155
156/*
157 * Check if request is within bounds and aligned on zram logical blocks.
158 */
1c53e0d2 159static inline bool valid_io_request(struct zram *zram,
522698d7
SS
160 sector_t start, unsigned int size)
161{
162 u64 end, bound;
163
164 /* unaligned request */
165 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
1c53e0d2 166 return false;
522698d7 167 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
1c53e0d2 168 return false;
522698d7
SS
169
170 end = start + (size >> SECTOR_SHIFT);
171 bound = zram->disksize >> SECTOR_SHIFT;
172 /* out of range range */
173 if (unlikely(start >= bound || end > bound || start > end))
1c53e0d2 174 return false;
522698d7
SS
175
176 /* I/O request is valid */
1c53e0d2 177 return true;
522698d7
SS
178}
179
180static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
181{
e86942c7 182 *index += (*offset + bvec->bv_len) / PAGE_SIZE;
522698d7
SS
183 *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
184}
185
186static inline void update_used_max(struct zram *zram,
187 const unsigned long pages)
188{
189 unsigned long old_max, cur_max;
190
191 old_max = atomic_long_read(&zram->stats.max_used_pages);
192
193 do {
194 cur_max = old_max;
195 if (pages > cur_max)
196 old_max = atomic_long_cmpxchg(
197 &zram->stats.max_used_pages, cur_max, pages);
198 } while (old_max != cur_max);
199}
200
48ad1abe 201static inline void zram_fill_page(void *ptr, unsigned long len,
8e19d540 202 unsigned long value)
203{
8e19d540 204 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
48ad1abe 205 memset_l(ptr, value, len / sizeof(unsigned long));
8e19d540 206}
207
208static bool page_same_filled(void *ptr, unsigned long *element)
522698d7
SS
209{
210 unsigned int pos;
211 unsigned long *page;
f0fe9984 212 unsigned long val;
522698d7
SS
213
214 page = (unsigned long *)ptr;
f0fe9984 215 val = page[0];
522698d7 216
f0fe9984
SP
217 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
218 if (val != page[pos])
1c53e0d2 219 return false;
522698d7
SS
220 }
221
f0fe9984 222 *element = val;
8e19d540 223
1c53e0d2 224 return true;
522698d7
SS
225}
226
9b3bb7ab
SS
227static ssize_t initstate_show(struct device *dev,
228 struct device_attribute *attr, char *buf)
229{
a68eb3b6 230 u32 val;
9b3bb7ab
SS
231 struct zram *zram = dev_to_zram(dev);
232
a68eb3b6
SS
233 down_read(&zram->init_lock);
234 val = init_done(zram);
235 up_read(&zram->init_lock);
9b3bb7ab 236
56b4e8cb 237 return scnprintf(buf, PAGE_SIZE, "%u\n", val);
9b3bb7ab
SS
238}
239
522698d7
SS
240static ssize_t disksize_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct zram *zram = dev_to_zram(dev);
244
245 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
246}
247
9ada9da9
MK
248static ssize_t mem_limit_store(struct device *dev,
249 struct device_attribute *attr, const char *buf, size_t len)
250{
251 u64 limit;
252 char *tmp;
253 struct zram *zram = dev_to_zram(dev);
254
255 limit = memparse(buf, &tmp);
256 if (buf == tmp) /* no chars parsed, invalid input */
257 return -EINVAL;
258
259 down_write(&zram->init_lock);
260 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
261 up_write(&zram->init_lock);
262
263 return len;
264}
265
461a8eee
MK
266static ssize_t mem_used_max_store(struct device *dev,
267 struct device_attribute *attr, const char *buf, size_t len)
268{
269 int err;
270 unsigned long val;
271 struct zram *zram = dev_to_zram(dev);
461a8eee
MK
272
273 err = kstrtoul(buf, 10, &val);
274 if (err || val != 0)
275 return -EINVAL;
276
277 down_read(&zram->init_lock);
5a99e95b 278 if (init_done(zram)) {
461a8eee 279 atomic_long_set(&zram->stats.max_used_pages,
beb6602c 280 zs_get_total_pages(zram->mem_pool));
5a99e95b 281 }
461a8eee
MK
282 up_read(&zram->init_lock);
283
284 return len;
285}
286
e82592c4
MK
287static ssize_t idle_store(struct device *dev,
288 struct device_attribute *attr, const char *buf, size_t len)
289{
290 struct zram *zram = dev_to_zram(dev);
291 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
292 int index;
293 char mode_buf[8];
294 ssize_t sz;
295
296 sz = strscpy(mode_buf, buf, sizeof(mode_buf));
297 if (sz <= 0)
298 return -EINVAL;
299
300 /* ignore trailing new line */
301 if (mode_buf[sz - 1] == '\n')
302 mode_buf[sz - 1] = 0x00;
303
304 if (strcmp(mode_buf, "all"))
305 return -EINVAL;
306
307 down_read(&zram->init_lock);
308 if (!init_done(zram)) {
309 up_read(&zram->init_lock);
310 return -EINVAL;
311 }
312
313 for (index = 0; index < nr_pages; index++) {
a939888e
MK
314 /*
315 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
316 * See the comment in writeback_store.
317 */
e82592c4 318 zram_slot_lock(zram, index);
1d69a3f8
MK
319 if (zram_allocated(zram, index) &&
320 !zram_test_flag(zram, index, ZRAM_UNDER_WB))
321 zram_set_flag(zram, index, ZRAM_IDLE);
e82592c4
MK
322 zram_slot_unlock(zram, index);
323 }
324
325 up_read(&zram->init_lock);
326
327 return len;
328}
329
013bf95a 330#ifdef CONFIG_ZRAM_WRITEBACK
1d69a3f8
MK
331static ssize_t writeback_limit_enable_store(struct device *dev,
332 struct device_attribute *attr, const char *buf, size_t len)
333{
334 struct zram *zram = dev_to_zram(dev);
335 u64 val;
336 ssize_t ret = -EINVAL;
337
338 if (kstrtoull(buf, 10, &val))
339 return ret;
340
341 down_read(&zram->init_lock);
342 spin_lock(&zram->wb_limit_lock);
343 zram->wb_limit_enable = val;
344 spin_unlock(&zram->wb_limit_lock);
345 up_read(&zram->init_lock);
346 ret = len;
347
348 return ret;
349}
350
351static ssize_t writeback_limit_enable_show(struct device *dev,
352 struct device_attribute *attr, char *buf)
353{
354 bool val;
355 struct zram *zram = dev_to_zram(dev);
356
357 down_read(&zram->init_lock);
358 spin_lock(&zram->wb_limit_lock);
359 val = zram->wb_limit_enable;
360 spin_unlock(&zram->wb_limit_lock);
361 up_read(&zram->init_lock);
362
363 return scnprintf(buf, PAGE_SIZE, "%d\n", val);
364}
365
bb416d18
MK
366static ssize_t writeback_limit_store(struct device *dev,
367 struct device_attribute *attr, const char *buf, size_t len)
368{
369 struct zram *zram = dev_to_zram(dev);
370 u64 val;
371 ssize_t ret = -EINVAL;
372
373 if (kstrtoull(buf, 10, &val))
374 return ret;
375
376 down_read(&zram->init_lock);
1d69a3f8
MK
377 spin_lock(&zram->wb_limit_lock);
378 zram->bd_wb_limit = val;
379 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
380 up_read(&zram->init_lock);
381 ret = len;
382
383 return ret;
384}
385
386static ssize_t writeback_limit_show(struct device *dev,
387 struct device_attribute *attr, char *buf)
388{
389 u64 val;
390 struct zram *zram = dev_to_zram(dev);
391
392 down_read(&zram->init_lock);
1d69a3f8
MK
393 spin_lock(&zram->wb_limit_lock);
394 val = zram->bd_wb_limit;
395 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
396 up_read(&zram->init_lock);
397
398 return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
399}
400
013bf95a
MK
401static void reset_bdev(struct zram *zram)
402{
403 struct block_device *bdev;
404
7e529283 405 if (!zram->backing_dev)
013bf95a
MK
406 return;
407
408 bdev = zram->bdev;
409 if (zram->old_block_size)
410 set_blocksize(bdev, zram->old_block_size);
411 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
412 /* hope filp_close flush all of IO */
413 filp_close(zram->backing_dev, NULL);
414 zram->backing_dev = NULL;
415 zram->old_block_size = 0;
416 zram->bdev = NULL;
4f7a7bea
MK
417 zram->disk->queue->backing_dev_info->capabilities |=
418 BDI_CAP_SYNCHRONOUS_IO;
1363d466
MK
419 kvfree(zram->bitmap);
420 zram->bitmap = NULL;
013bf95a
MK
421}
422
423static ssize_t backing_dev_show(struct device *dev,
424 struct device_attribute *attr, char *buf)
425{
426 struct zram *zram = dev_to_zram(dev);
427 struct file *file = zram->backing_dev;
428 char *p;
429 ssize_t ret;
430
431 down_read(&zram->init_lock);
7e529283 432 if (!zram->backing_dev) {
013bf95a
MK
433 memcpy(buf, "none\n", 5);
434 up_read(&zram->init_lock);
435 return 5;
436 }
437
438 p = file_path(file, buf, PAGE_SIZE - 1);
439 if (IS_ERR(p)) {
440 ret = PTR_ERR(p);
441 goto out;
442 }
443
444 ret = strlen(p);
445 memmove(buf, p, ret);
446 buf[ret++] = '\n';
447out:
448 up_read(&zram->init_lock);
449 return ret;
450}
451
452static ssize_t backing_dev_store(struct device *dev,
453 struct device_attribute *attr, const char *buf, size_t len)
454{
455 char *file_name;
c8bd134a 456 size_t sz;
013bf95a
MK
457 struct file *backing_dev = NULL;
458 struct inode *inode;
459 struct address_space *mapping;
1363d466
MK
460 unsigned int bitmap_sz, old_block_size = 0;
461 unsigned long nr_pages, *bitmap = NULL;
013bf95a
MK
462 struct block_device *bdev = NULL;
463 int err;
464 struct zram *zram = dev_to_zram(dev);
465
466 file_name = kmalloc(PATH_MAX, GFP_KERNEL);
467 if (!file_name)
468 return -ENOMEM;
469
470 down_write(&zram->init_lock);
471 if (init_done(zram)) {
472 pr_info("Can't setup backing device for initialized device\n");
473 err = -EBUSY;
474 goto out;
475 }
476
c8bd134a
PK
477 strlcpy(file_name, buf, PATH_MAX);
478 /* ignore trailing newline */
479 sz = strlen(file_name);
480 if (sz > 0 && file_name[sz - 1] == '\n')
481 file_name[sz - 1] = 0x00;
013bf95a
MK
482
483 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
484 if (IS_ERR(backing_dev)) {
485 err = PTR_ERR(backing_dev);
486 backing_dev = NULL;
487 goto out;
488 }
489
490 mapping = backing_dev->f_mapping;
491 inode = mapping->host;
492
493 /* Support only block device in this moment */
494 if (!S_ISBLK(inode->i_mode)) {
495 err = -ENOTBLK;
496 goto out;
497 }
498
499 bdev = bdgrab(I_BDEV(inode));
500 err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
5547932d
MK
501 if (err < 0) {
502 bdev = NULL;
013bf95a 503 goto out;
5547932d 504 }
013bf95a 505
1363d466
MK
506 nr_pages = i_size_read(inode) >> PAGE_SHIFT;
507 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
508 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
509 if (!bitmap) {
510 err = -ENOMEM;
511 goto out;
512 }
513
013bf95a
MK
514 old_block_size = block_size(bdev);
515 err = set_blocksize(bdev, PAGE_SIZE);
516 if (err)
517 goto out;
518
519 reset_bdev(zram);
520
521 zram->old_block_size = old_block_size;
522 zram->bdev = bdev;
523 zram->backing_dev = backing_dev;
1363d466
MK
524 zram->bitmap = bitmap;
525 zram->nr_pages = nr_pages;
4f7a7bea
MK
526 /*
527 * With writeback feature, zram does asynchronous IO so it's no longer
528 * synchronous device so let's remove synchronous io flag. Othewise,
529 * upper layer(e.g., swap) could wait IO completion rather than
530 * (submit and return), which will cause system sluggish.
531 * Furthermore, when the IO function returns(e.g., swap_readpage),
532 * upper layer expects IO was done so it could deallocate the page
533 * freely but in fact, IO is going on so finally could cause
534 * use-after-free when the IO is really done.
535 */
536 zram->disk->queue->backing_dev_info->capabilities &=
537 ~BDI_CAP_SYNCHRONOUS_IO;
013bf95a
MK
538 up_write(&zram->init_lock);
539
540 pr_info("setup backing device %s\n", file_name);
541 kfree(file_name);
542
543 return len;
544out:
1363d466
MK
545 if (bitmap)
546 kvfree(bitmap);
547
013bf95a
MK
548 if (bdev)
549 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
550
551 if (backing_dev)
552 filp_close(backing_dev, NULL);
553
554 up_write(&zram->init_lock);
555
556 kfree(file_name);
557
558 return err;
559}
560
7e529283 561static unsigned long alloc_block_bdev(struct zram *zram)
1363d466 562{
3c9959e0
MK
563 unsigned long blk_idx = 1;
564retry:
1363d466 565 /* skip 0 bit to confuse zram.handle = 0 */
3c9959e0
MK
566 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx);
567 if (blk_idx == zram->nr_pages)
1363d466 568 return 0;
1363d466 569
3c9959e0
MK
570 if (test_and_set_bit(blk_idx, zram->bitmap))
571 goto retry;
1363d466 572
23eddf39 573 atomic64_inc(&zram->stats.bd_count);
3c9959e0 574 return blk_idx;
1363d466
MK
575}
576
7e529283 577static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
1363d466
MK
578{
579 int was_set;
580
7e529283 581 was_set = test_and_clear_bit(blk_idx, zram->bitmap);
1363d466 582 WARN_ON_ONCE(!was_set);
23eddf39 583 atomic64_dec(&zram->stats.bd_count);
1363d466
MK
584}
585
384bc41f 586static void zram_page_end_io(struct bio *bio)
db8ffbd4 587{
263663cd 588 struct page *page = bio_first_page_all(bio);
db8ffbd4
MK
589
590 page_endio(page, op_is_write(bio_op(bio)),
591 blk_status_to_errno(bio->bi_status));
592 bio_put(bio);
593}
594
8e654f8f
MK
595/*
596 * Returns 1 if the submission is successful.
597 */
598static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
599 unsigned long entry, struct bio *parent)
600{
601 struct bio *bio;
602
603 bio = bio_alloc(GFP_ATOMIC, 1);
604 if (!bio)
605 return -ENOMEM;
606
607 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
a0725ab0 608 bio_set_dev(bio, zram->bdev);
8e654f8f
MK
609 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
610 bio_put(bio);
611 return -EIO;
612 }
613
614 if (!parent) {
615 bio->bi_opf = REQ_OP_READ;
616 bio->bi_end_io = zram_page_end_io;
617 } else {
618 bio->bi_opf = parent->bi_opf;
619 bio_chain(bio, parent);
620 }
621
622 submit_bio(bio);
623 return 1;
624}
625
1d69a3f8
MK
626#define HUGE_WRITEBACK 1
627#define IDLE_WRITEBACK 2
a939888e
MK
628
629static ssize_t writeback_store(struct device *dev,
630 struct device_attribute *attr, const char *buf, size_t len)
631{
632 struct zram *zram = dev_to_zram(dev);
633 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
634 unsigned long index;
635 struct bio bio;
636 struct bio_vec bio_vec;
637 struct page *page;
638 ssize_t ret, sz;
639 char mode_buf[8];
1d69a3f8 640 int mode = -1;
a939888e
MK
641 unsigned long blk_idx = 0;
642
643 sz = strscpy(mode_buf, buf, sizeof(mode_buf));
644 if (sz <= 0)
645 return -EINVAL;
646
647 /* ignore trailing newline */
648 if (mode_buf[sz - 1] == '\n')
649 mode_buf[sz - 1] = 0x00;
650
651 if (!strcmp(mode_buf, "idle"))
652 mode = IDLE_WRITEBACK;
653 else if (!strcmp(mode_buf, "huge"))
654 mode = HUGE_WRITEBACK;
655
1d69a3f8 656 if (mode == -1)
a939888e
MK
657 return -EINVAL;
658
659 down_read(&zram->init_lock);
660 if (!init_done(zram)) {
661 ret = -EINVAL;
662 goto release_init_lock;
663 }
664
665 if (!zram->backing_dev) {
666 ret = -ENODEV;
667 goto release_init_lock;
668 }
669
670 page = alloc_page(GFP_KERNEL);
671 if (!page) {
672 ret = -ENOMEM;
673 goto release_init_lock;
674 }
675
676 for (index = 0; index < nr_pages; index++) {
677 struct bio_vec bvec;
678
679 bvec.bv_page = page;
680 bvec.bv_len = PAGE_SIZE;
681 bvec.bv_offset = 0;
682
1d69a3f8
MK
683 spin_lock(&zram->wb_limit_lock);
684 if (zram->wb_limit_enable && !zram->bd_wb_limit) {
685 spin_unlock(&zram->wb_limit_lock);
bb416d18
MK
686 ret = -EIO;
687 break;
688 }
1d69a3f8 689 spin_unlock(&zram->wb_limit_lock);
bb416d18 690
a939888e
MK
691 if (!blk_idx) {
692 blk_idx = alloc_block_bdev(zram);
693 if (!blk_idx) {
694 ret = -ENOSPC;
695 break;
696 }
697 }
698
699 zram_slot_lock(zram, index);
700 if (!zram_allocated(zram, index))
701 goto next;
702
703 if (zram_test_flag(zram, index, ZRAM_WB) ||
704 zram_test_flag(zram, index, ZRAM_SAME) ||
705 zram_test_flag(zram, index, ZRAM_UNDER_WB))
706 goto next;
707
1d69a3f8
MK
708 if (mode == IDLE_WRITEBACK &&
709 !zram_test_flag(zram, index, ZRAM_IDLE))
710 goto next;
711 if (mode == HUGE_WRITEBACK &&
712 !zram_test_flag(zram, index, ZRAM_HUGE))
a939888e
MK
713 goto next;
714 /*
715 * Clearing ZRAM_UNDER_WB is duty of caller.
716 * IOW, zram_free_page never clear it.
717 */
718 zram_set_flag(zram, index, ZRAM_UNDER_WB);
719 /* Need for hugepage writeback racing */
720 zram_set_flag(zram, index, ZRAM_IDLE);
721 zram_slot_unlock(zram, index);
722 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
723 zram_slot_lock(zram, index);
724 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
725 zram_clear_flag(zram, index, ZRAM_IDLE);
726 zram_slot_unlock(zram, index);
727 continue;
728 }
729
730 bio_init(&bio, &bio_vec, 1);
731 bio_set_dev(&bio, zram->bdev);
732 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
733 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC;
734
735 bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
736 bvec.bv_offset);
737 /*
738 * XXX: A single page IO would be inefficient for write
739 * but it would be not bad as starter.
740 */
741 ret = submit_bio_wait(&bio);
742 if (ret) {
743 zram_slot_lock(zram, index);
744 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
745 zram_clear_flag(zram, index, ZRAM_IDLE);
746 zram_slot_unlock(zram, index);
747 continue;
748 }
749
23eddf39 750 atomic64_inc(&zram->stats.bd_writes);
a939888e
MK
751 /*
752 * We released zram_slot_lock so need to check if the slot was
753 * changed. If there is freeing for the slot, we can catch it
754 * easily by zram_allocated.
755 * A subtle case is the slot is freed/reallocated/marked as
756 * ZRAM_IDLE again. To close the race, idle_store doesn't
757 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
758 * Thus, we could close the race by checking ZRAM_IDLE bit.
759 */
760 zram_slot_lock(zram, index);
761 if (!zram_allocated(zram, index) ||
762 !zram_test_flag(zram, index, ZRAM_IDLE)) {
763 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
764 zram_clear_flag(zram, index, ZRAM_IDLE);
765 goto next;
766 }
767
768 zram_free_page(zram, index);
769 zram_clear_flag(zram, index, ZRAM_UNDER_WB);
770 zram_set_flag(zram, index, ZRAM_WB);
771 zram_set_element(zram, index, blk_idx);
772 blk_idx = 0;
773 atomic64_inc(&zram->stats.pages_stored);
1d69a3f8
MK
774 spin_lock(&zram->wb_limit_lock);
775 if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
776 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
777 spin_unlock(&zram->wb_limit_lock);
a939888e
MK
778next:
779 zram_slot_unlock(zram, index);
780 }
781
782 if (blk_idx)
783 free_block_bdev(zram, blk_idx);
784 ret = len;
785 __free_page(page);
786release_init_lock:
787 up_read(&zram->init_lock);
788
789 return ret;
790}
791
8e654f8f
MK
792struct zram_work {
793 struct work_struct work;
794 struct zram *zram;
795 unsigned long entry;
796 struct bio *bio;
797};
798
799#if PAGE_SIZE != 4096
800static void zram_sync_read(struct work_struct *work)
801{
802 struct bio_vec bvec;
803 struct zram_work *zw = container_of(work, struct zram_work, work);
804 struct zram *zram = zw->zram;
805 unsigned long entry = zw->entry;
806 struct bio *bio = zw->bio;
807
808 read_from_bdev_async(zram, &bvec, entry, bio);
809}
810
811/*
812 * Block layer want one ->make_request_fn to be active at a time
813 * so if we use chained IO with parent IO in same context,
814 * it's a deadlock. To avoid, it, it uses worker thread context.
815 */
816static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
817 unsigned long entry, struct bio *bio)
818{
819 struct zram_work work;
820
821 work.zram = zram;
822 work.entry = entry;
823 work.bio = bio;
824
825 INIT_WORK_ONSTACK(&work.work, zram_sync_read);
826 queue_work(system_unbound_wq, &work.work);
827 flush_work(&work.work);
828 destroy_work_on_stack(&work.work);
829
830 return 1;
831}
832#else
833static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
834 unsigned long entry, struct bio *bio)
835{
836 WARN_ON(1);
837 return -EIO;
838}
839#endif
840
841static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
842 unsigned long entry, struct bio *parent, bool sync)
843{
23eddf39 844 atomic64_inc(&zram->stats.bd_reads);
8e654f8f
MK
845 if (sync)
846 return read_from_bdev_sync(zram, bvec, entry, parent);
847 else
848 return read_from_bdev_async(zram, bvec, entry, parent);
849}
013bf95a 850#else
013bf95a 851static inline void reset_bdev(struct zram *zram) {};
8e654f8f
MK
852static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
853 unsigned long entry, struct bio *parent, bool sync)
854{
855 return -EIO;
856}
7e529283
MK
857
858static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {};
013bf95a
MK
859#endif
860
c0265342
MK
861#ifdef CONFIG_ZRAM_MEMORY_TRACKING
862
863static struct dentry *zram_debugfs_root;
864
865static void zram_debugfs_create(void)
866{
867 zram_debugfs_root = debugfs_create_dir("zram", NULL);
868}
869
870static void zram_debugfs_destroy(void)
871{
872 debugfs_remove_recursive(zram_debugfs_root);
873}
874
875static void zram_accessed(struct zram *zram, u32 index)
876{
e82592c4 877 zram_clear_flag(zram, index, ZRAM_IDLE);
c0265342
MK
878 zram->table[index].ac_time = ktime_get_boottime();
879}
880
c0265342
MK
881static ssize_t read_block_state(struct file *file, char __user *buf,
882 size_t count, loff_t *ppos)
883{
884 char *kbuf;
885 ssize_t index, written = 0;
886 struct zram *zram = file->private_data;
887 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
888 struct timespec64 ts;
889
890 kbuf = kvmalloc(count, GFP_KERNEL);
891 if (!kbuf)
892 return -ENOMEM;
893
894 down_read(&zram->init_lock);
895 if (!init_done(zram)) {
896 up_read(&zram->init_lock);
897 kvfree(kbuf);
898 return -EINVAL;
899 }
900
901 for (index = *ppos; index < nr_pages; index++) {
902 int copied;
903
904 zram_slot_lock(zram, index);
905 if (!zram_allocated(zram, index))
906 goto next;
907
908 ts = ktime_to_timespec64(zram->table[index].ac_time);
909 copied = snprintf(kbuf + written, count,
e82592c4 910 "%12zd %12lld.%06lu %c%c%c%c\n",
c0265342
MK
911 index, (s64)ts.tv_sec,
912 ts.tv_nsec / NSEC_PER_USEC,
913 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
914 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
e82592c4
MK
915 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
916 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
c0265342
MK
917
918 if (count < copied) {
919 zram_slot_unlock(zram, index);
920 break;
921 }
922 written += copied;
923 count -= copied;
924next:
925 zram_slot_unlock(zram, index);
926 *ppos += 1;
927 }
928
929 up_read(&zram->init_lock);
930 if (copy_to_user(buf, kbuf, written))
931 written = -EFAULT;
932 kvfree(kbuf);
933
934 return written;
935}
936
937static const struct file_operations proc_zram_block_state_op = {
938 .open = simple_open,
939 .read = read_block_state,
940 .llseek = default_llseek,
941};
942
943static void zram_debugfs_register(struct zram *zram)
944{
945 if (!zram_debugfs_root)
946 return;
947
948 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
949 zram_debugfs_root);
950 debugfs_create_file("block_state", 0400, zram->debugfs_dir,
951 zram, &proc_zram_block_state_op);
952}
953
954static void zram_debugfs_unregister(struct zram *zram)
955{
956 debugfs_remove_recursive(zram->debugfs_dir);
957}
958#else
959static void zram_debugfs_create(void) {};
960static void zram_debugfs_destroy(void) {};
e82592c4
MK
961static void zram_accessed(struct zram *zram, u32 index)
962{
963 zram_clear_flag(zram, index, ZRAM_IDLE);
964};
c0265342
MK
965static void zram_debugfs_register(struct zram *zram) {};
966static void zram_debugfs_unregister(struct zram *zram) {};
967#endif
013bf95a 968
43209ea2
SS
969/*
970 * We switched to per-cpu streams and this attr is not needed anymore.
971 * However, we will keep it around for some time, because:
972 * a) we may revert per-cpu streams in the future
973 * b) it's visible to user space and we need to follow our 2 years
974 * retirement rule; but we already have a number of 'soon to be
975 * altered' attrs, so max_comp_streams need to wait for the next
976 * layoff cycle.
977 */
522698d7
SS
978static ssize_t max_comp_streams_show(struct device *dev,
979 struct device_attribute *attr, char *buf)
980{
43209ea2 981 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
522698d7
SS
982}
983
beca3ec7
SS
984static ssize_t max_comp_streams_store(struct device *dev,
985 struct device_attribute *attr, const char *buf, size_t len)
986{
43209ea2 987 return len;
beca3ec7
SS
988}
989
e46b8a03
SS
990static ssize_t comp_algorithm_show(struct device *dev,
991 struct device_attribute *attr, char *buf)
992{
993 size_t sz;
994 struct zram *zram = dev_to_zram(dev);
995
996 down_read(&zram->init_lock);
997 sz = zcomp_available_show(zram->compressor, buf);
998 up_read(&zram->init_lock);
999
1000 return sz;
1001}
1002
1003static ssize_t comp_algorithm_store(struct device *dev,
1004 struct device_attribute *attr, const char *buf, size_t len)
1005{
1006 struct zram *zram = dev_to_zram(dev);
f357e345 1007 char compressor[ARRAY_SIZE(zram->compressor)];
4bbacd51
SS
1008 size_t sz;
1009
415403be
SS
1010 strlcpy(compressor, buf, sizeof(compressor));
1011 /* ignore trailing newline */
1012 sz = strlen(compressor);
1013 if (sz > 0 && compressor[sz - 1] == '\n')
1014 compressor[sz - 1] = 0x00;
1015
1016 if (!zcomp_available_algorithm(compressor))
1d5b43bf
LH
1017 return -EINVAL;
1018
e46b8a03
SS
1019 down_write(&zram->init_lock);
1020 if (init_done(zram)) {
1021 up_write(&zram->init_lock);
1022 pr_info("Can't change algorithm for initialized device\n");
1023 return -EBUSY;
1024 }
4bbacd51 1025
f357e345 1026 strcpy(zram->compressor, compressor);
e46b8a03
SS
1027 up_write(&zram->init_lock);
1028 return len;
1029}
1030
522698d7
SS
1031static ssize_t compact_store(struct device *dev,
1032 struct device_attribute *attr, const char *buf, size_t len)
306b0c95 1033{
522698d7 1034 struct zram *zram = dev_to_zram(dev);
306b0c95 1035
522698d7
SS
1036 down_read(&zram->init_lock);
1037 if (!init_done(zram)) {
1038 up_read(&zram->init_lock);
1039 return -EINVAL;
1040 }
306b0c95 1041
beb6602c 1042 zs_compact(zram->mem_pool);
522698d7 1043 up_read(&zram->init_lock);
d2d5e762 1044
522698d7 1045 return len;
d2d5e762
WY
1046}
1047
522698d7
SS
1048static ssize_t io_stat_show(struct device *dev,
1049 struct device_attribute *attr, char *buf)
d2d5e762 1050{
522698d7
SS
1051 struct zram *zram = dev_to_zram(dev);
1052 ssize_t ret;
d2d5e762 1053
522698d7
SS
1054 down_read(&zram->init_lock);
1055 ret = scnprintf(buf, PAGE_SIZE,
1056 "%8llu %8llu %8llu %8llu\n",
1057 (u64)atomic64_read(&zram->stats.failed_reads),
1058 (u64)atomic64_read(&zram->stats.failed_writes),
1059 (u64)atomic64_read(&zram->stats.invalid_io),
1060 (u64)atomic64_read(&zram->stats.notify_free));
1061 up_read(&zram->init_lock);
306b0c95 1062
522698d7 1063 return ret;
9b3bb7ab
SS
1064}
1065
522698d7
SS
1066static ssize_t mm_stat_show(struct device *dev,
1067 struct device_attribute *attr, char *buf)
9b3bb7ab 1068{
522698d7 1069 struct zram *zram = dev_to_zram(dev);
7d3f3938 1070 struct zs_pool_stats pool_stats;
522698d7
SS
1071 u64 orig_size, mem_used = 0;
1072 long max_used;
1073 ssize_t ret;
a539c72a 1074
7d3f3938
SS
1075 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
1076
522698d7 1077 down_read(&zram->init_lock);
7d3f3938 1078 if (init_done(zram)) {
beb6602c
MK
1079 mem_used = zs_get_total_pages(zram->mem_pool);
1080 zs_pool_stats(zram->mem_pool, &pool_stats);
7d3f3938 1081 }
9b3bb7ab 1082
522698d7
SS
1083 orig_size = atomic64_read(&zram->stats.pages_stored);
1084 max_used = atomic_long_read(&zram->stats.max_used_pages);
9b3bb7ab 1085
522698d7 1086 ret = scnprintf(buf, PAGE_SIZE,
89e85bce 1087 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
522698d7
SS
1088 orig_size << PAGE_SHIFT,
1089 (u64)atomic64_read(&zram->stats.compr_data_size),
1090 mem_used << PAGE_SHIFT,
1091 zram->limit_pages << PAGE_SHIFT,
1092 max_used << PAGE_SHIFT,
8e19d540 1093 (u64)atomic64_read(&zram->stats.same_pages),
89e85bce
MK
1094 pool_stats.pages_compacted,
1095 (u64)atomic64_read(&zram->stats.huge_pages));
522698d7 1096 up_read(&zram->init_lock);
9b3bb7ab 1097
522698d7
SS
1098 return ret;
1099}
1100
23eddf39 1101#ifdef CONFIG_ZRAM_WRITEBACK
bb416d18 1102#define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12)))
23eddf39
MK
1103static ssize_t bd_stat_show(struct device *dev,
1104 struct device_attribute *attr, char *buf)
1105{
1106 struct zram *zram = dev_to_zram(dev);
1107 ssize_t ret;
1108
1109 down_read(&zram->init_lock);
1110 ret = scnprintf(buf, PAGE_SIZE,
1111 "%8llu %8llu %8llu\n",
bb416d18
MK
1112 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)),
1113 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)),
1114 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes)));
23eddf39
MK
1115 up_read(&zram->init_lock);
1116
1117 return ret;
1118}
1119#endif
1120
623e47fc
SS
1121static ssize_t debug_stat_show(struct device *dev,
1122 struct device_attribute *attr, char *buf)
1123{
1124 int version = 1;
1125 struct zram *zram = dev_to_zram(dev);
1126 ssize_t ret;
1127
1128 down_read(&zram->init_lock);
1129 ret = scnprintf(buf, PAGE_SIZE,
3c9959e0 1130 "version: %d\n%8llu %8llu\n",
623e47fc 1131 version,
3c9959e0
MK
1132 (u64)atomic64_read(&zram->stats.writestall),
1133 (u64)atomic64_read(&zram->stats.miss_free));
623e47fc
SS
1134 up_read(&zram->init_lock);
1135
1136 return ret;
1137}
1138
522698d7
SS
1139static DEVICE_ATTR_RO(io_stat);
1140static DEVICE_ATTR_RO(mm_stat);
23eddf39
MK
1141#ifdef CONFIG_ZRAM_WRITEBACK
1142static DEVICE_ATTR_RO(bd_stat);
1143#endif
623e47fc 1144static DEVICE_ATTR_RO(debug_stat);
522698d7 1145
beb6602c 1146static void zram_meta_free(struct zram *zram, u64 disksize)
522698d7
SS
1147{
1148 size_t num_pages = disksize >> PAGE_SHIFT;
1149 size_t index;
1fec1172
GM
1150
1151 /* Free all pages that are still in this zram device */
302128dc
MK
1152 for (index = 0; index < num_pages; index++)
1153 zram_free_page(zram, index);
1fec1172 1154
beb6602c
MK
1155 zs_destroy_pool(zram->mem_pool);
1156 vfree(zram->table);
9b3bb7ab
SS
1157}
1158
beb6602c 1159static bool zram_meta_alloc(struct zram *zram, u64 disksize)
9b3bb7ab
SS
1160{
1161 size_t num_pages;
9b3bb7ab 1162
9b3bb7ab 1163 num_pages = disksize >> PAGE_SHIFT;
fad953ce 1164 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
beb6602c
MK
1165 if (!zram->table)
1166 return false;
9b3bb7ab 1167
beb6602c
MK
1168 zram->mem_pool = zs_create_pool(zram->disk->disk_name);
1169 if (!zram->mem_pool) {
1170 vfree(zram->table);
1171 return false;
9b3bb7ab
SS
1172 }
1173
60f5921a
SS
1174 if (!huge_class_size)
1175 huge_class_size = zs_huge_class_size(zram->mem_pool);
beb6602c 1176 return true;
9b3bb7ab
SS
1177}
1178
d2d5e762
WY
1179/*
1180 * To protect concurrent access to the same index entry,
1181 * caller should hold this table index entry's bit_spinlock to
1182 * indicate this index entry is accessing.
1183 */
f1e3cfff 1184static void zram_free_page(struct zram *zram, size_t index)
306b0c95 1185{
db8ffbd4
MK
1186 unsigned long handle;
1187
7e529283
MK
1188#ifdef CONFIG_ZRAM_MEMORY_TRACKING
1189 zram->table[index].ac_time = 0;
1190#endif
e82592c4
MK
1191 if (zram_test_flag(zram, index, ZRAM_IDLE))
1192 zram_clear_flag(zram, index, ZRAM_IDLE);
1193
89e85bce
MK
1194 if (zram_test_flag(zram, index, ZRAM_HUGE)) {
1195 zram_clear_flag(zram, index, ZRAM_HUGE);
1196 atomic64_dec(&zram->stats.huge_pages);
1197 }
1198
7e529283
MK
1199 if (zram_test_flag(zram, index, ZRAM_WB)) {
1200 zram_clear_flag(zram, index, ZRAM_WB);
1201 free_block_bdev(zram, zram_get_element(zram, index));
1202 goto out;
db8ffbd4 1203 }
306b0c95 1204
8e19d540 1205 /*
1206 * No memory is allocated for same element filled pages.
1207 * Simply clear same page flag.
1208 */
beb6602c
MK
1209 if (zram_test_flag(zram, index, ZRAM_SAME)) {
1210 zram_clear_flag(zram, index, ZRAM_SAME);
8e19d540 1211 atomic64_dec(&zram->stats.same_pages);
7e529283 1212 goto out;
306b0c95
NG
1213 }
1214
db8ffbd4 1215 handle = zram_get_handle(zram, index);
8e19d540 1216 if (!handle)
1217 return;
1218
beb6602c 1219 zs_free(zram->mem_pool, handle);
306b0c95 1220
beb6602c 1221 atomic64_sub(zram_get_obj_size(zram, index),
d2d5e762 1222 &zram->stats.compr_data_size);
7e529283 1223out:
90a7806e 1224 atomic64_dec(&zram->stats.pages_stored);
643ae61d 1225 zram_set_handle(zram, index, 0);
beb6602c 1226 zram_set_obj_size(zram, index, 0);
a939888e
MK
1227 WARN_ON_ONCE(zram->table[index].flags &
1228 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
306b0c95
NG
1229}
1230
8e654f8f
MK
1231static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
1232 struct bio *bio, bool partial_io)
306b0c95 1233{
1f7319c7 1234 int ret;
92967471 1235 unsigned long handle;
ebaf9ab5 1236 unsigned int size;
1f7319c7 1237 void *src, *dst;
1f7319c7 1238
7e529283
MK
1239 zram_slot_lock(zram, index);
1240 if (zram_test_flag(zram, index, ZRAM_WB)) {
1241 struct bio_vec bvec;
8e654f8f 1242
8e654f8f 1243 zram_slot_unlock(zram, index);
7e529283
MK
1244
1245 bvec.bv_page = page;
1246 bvec.bv_len = PAGE_SIZE;
1247 bvec.bv_offset = 0;
1248 return read_from_bdev(zram, &bvec,
1249 zram_get_element(zram, index),
1250 bio, partial_io);
8e654f8f
MK
1251 }
1252
643ae61d 1253 handle = zram_get_handle(zram, index);
ae94264e
MK
1254 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1255 unsigned long value;
1256 void *mem;
1257
1258 value = handle ? zram_get_element(zram, index) : 0;
1259 mem = kmap_atomic(page);
1260 zram_fill_page(mem, PAGE_SIZE, value);
1261 kunmap_atomic(mem);
1262 zram_slot_unlock(zram, index);
1263 return 0;
1264 }
1265
beb6602c 1266 size = zram_get_obj_size(zram, index);
306b0c95 1267
beb6602c 1268 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
ebaf9ab5 1269 if (size == PAGE_SIZE) {
1f7319c7
MK
1270 dst = kmap_atomic(page);
1271 memcpy(dst, src, PAGE_SIZE);
1272 kunmap_atomic(dst);
1273 ret = 0;
ebaf9ab5
SS
1274 } else {
1275 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1276
1f7319c7
MK
1277 dst = kmap_atomic(page);
1278 ret = zcomp_decompress(zstrm, src, size, dst);
1279 kunmap_atomic(dst);
ebaf9ab5
SS
1280 zcomp_stream_put(zram->comp);
1281 }
beb6602c 1282 zs_unmap_object(zram->mem_pool, handle);
86c49814 1283 zram_slot_unlock(zram, index);
a1dd52af 1284
8c921b2b 1285 /* Should NEVER happen. Return bio error if it does. */
1f7319c7 1286 if (unlikely(ret))
8c921b2b 1287 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
306b0c95 1288
1f7319c7 1289 return ret;
306b0c95
NG
1290}
1291
37b51fdd 1292static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
8e654f8f 1293 u32 index, int offset, struct bio *bio)
924bd88d
JM
1294{
1295 int ret;
37b51fdd 1296 struct page *page;
37b51fdd 1297
1f7319c7
MK
1298 page = bvec->bv_page;
1299 if (is_partial_io(bvec)) {
1300 /* Use a temporary buffer to decompress the page */
1301 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1302 if (!page)
1303 return -ENOMEM;
924bd88d
JM
1304 }
1305
8e654f8f 1306 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1f7319c7
MK
1307 if (unlikely(ret))
1308 goto out;
7e5a5104 1309
1f7319c7
MK
1310 if (is_partial_io(bvec)) {
1311 void *dst = kmap_atomic(bvec->bv_page);
1312 void *src = kmap_atomic(page);
37b51fdd 1313
1f7319c7
MK
1314 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1315 kunmap_atomic(src);
1316 kunmap_atomic(dst);
37b51fdd 1317 }
1f7319c7 1318out:
37b51fdd 1319 if (is_partial_io(bvec))
1f7319c7 1320 __free_page(page);
37b51fdd 1321
37b51fdd 1322 return ret;
924bd88d
JM
1323}
1324
db8ffbd4
MK
1325static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1326 u32 index, struct bio *bio)
306b0c95 1327{
ae85a807 1328 int ret = 0;
1f7319c7 1329 unsigned long alloced_pages;
da9556a2 1330 unsigned long handle = 0;
97ec7c8b
MK
1331 unsigned int comp_len = 0;
1332 void *src, *dst, *mem;
1333 struct zcomp_strm *zstrm;
1334 struct page *page = bvec->bv_page;
1335 unsigned long element = 0;
1336 enum zram_pageflags flags = 0;
1337
1338 mem = kmap_atomic(page);
1339 if (page_same_filled(mem, &element)) {
1340 kunmap_atomic(mem);
1341 /* Free memory associated with this sector now. */
1342 flags = ZRAM_SAME;
1343 atomic64_inc(&zram->stats.same_pages);
1344 goto out;
1345 }
1346 kunmap_atomic(mem);
924bd88d 1347
da9556a2 1348compress_again:
97ec7c8b 1349 zstrm = zcomp_stream_get(zram->comp);
1f7319c7 1350 src = kmap_atomic(page);
97ec7c8b 1351 ret = zcomp_compress(zstrm, src, &comp_len);
1f7319c7 1352 kunmap_atomic(src);
306b0c95 1353
b7ca232e 1354 if (unlikely(ret)) {
97ec7c8b 1355 zcomp_stream_put(zram->comp);
8c921b2b 1356 pr_err("Compression failed! err=%d\n", ret);
97ec7c8b 1357 zs_free(zram->mem_pool, handle);
1f7319c7 1358 return ret;
8c921b2b 1359 }
da9556a2 1360
a939888e 1361 if (comp_len >= huge_class_size)
89e85bce 1362 comp_len = PAGE_SIZE;
da9556a2
SS
1363 /*
1364 * handle allocation has 2 paths:
1365 * a) fast path is executed with preemption disabled (for
1366 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1367 * since we can't sleep;
1368 * b) slow path enables preemption and attempts to allocate
1369 * the page with __GFP_DIRECT_RECLAIM bit set. we have to
1370 * put per-cpu compression stream and, thus, to re-do
1371 * the compression once handle is allocated.
1372 *
1373 * if we have a 'non-null' handle here then we are coming
1374 * from the slow path and handle has already been allocated.
1375 */
1376 if (!handle)
beb6602c 1377 handle = zs_malloc(zram->mem_pool, comp_len,
da9556a2
SS
1378 __GFP_KSWAPD_RECLAIM |
1379 __GFP_NOWARN |
9bc482d3
MK
1380 __GFP_HIGHMEM |
1381 __GFP_MOVABLE);
fd1a30de 1382 if (!handle) {
2aea8493 1383 zcomp_stream_put(zram->comp);
623e47fc 1384 atomic64_inc(&zram->stats.writestall);
beb6602c 1385 handle = zs_malloc(zram->mem_pool, comp_len,
9bc482d3
MK
1386 GFP_NOIO | __GFP_HIGHMEM |
1387 __GFP_MOVABLE);
da9556a2
SS
1388 if (handle)
1389 goto compress_again;
1f7319c7 1390 return -ENOMEM;
8c921b2b 1391 }
9ada9da9 1392
beb6602c 1393 alloced_pages = zs_get_total_pages(zram->mem_pool);
12372755
SS
1394 update_used_max(zram, alloced_pages);
1395
461a8eee 1396 if (zram->limit_pages && alloced_pages > zram->limit_pages) {
97ec7c8b 1397 zcomp_stream_put(zram->comp);
beb6602c 1398 zs_free(zram->mem_pool, handle);
1f7319c7
MK
1399 return -ENOMEM;
1400 }
1401
beb6602c 1402 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1f7319c7
MK
1403
1404 src = zstrm->buffer;
1405 if (comp_len == PAGE_SIZE)
397c6066 1406 src = kmap_atomic(page);
1f7319c7
MK
1407 memcpy(dst, src, comp_len);
1408 if (comp_len == PAGE_SIZE)
397c6066 1409 kunmap_atomic(src);
306b0c95 1410
2aea8493 1411 zcomp_stream_put(zram->comp);
beb6602c 1412 zs_unmap_object(zram->mem_pool, handle);
4ebbe7f7
MK
1413 atomic64_add(comp_len, &zram->stats.compr_data_size);
1414out:
f40ac2ae
SS
1415 /*
1416 * Free memory associated with this sector
1417 * before overwriting unused sectors.
1418 */
86c49814 1419 zram_slot_lock(zram, index);
f40ac2ae 1420 zram_free_page(zram, index);
db8ffbd4 1421
89e85bce
MK
1422 if (comp_len == PAGE_SIZE) {
1423 zram_set_flag(zram, index, ZRAM_HUGE);
1424 atomic64_inc(&zram->stats.huge_pages);
1425 }
1426
db8ffbd4
MK
1427 if (flags) {
1428 zram_set_flag(zram, index, flags);
4ebbe7f7 1429 zram_set_element(zram, index, element);
db8ffbd4 1430 } else {
4ebbe7f7
MK
1431 zram_set_handle(zram, index, handle);
1432 zram_set_obj_size(zram, index, comp_len);
1433 }
86c49814 1434 zram_slot_unlock(zram, index);
306b0c95 1435
8c921b2b 1436 /* Update stats */
90a7806e 1437 atomic64_inc(&zram->stats.pages_stored);
ae85a807 1438 return ret;
1f7319c7
MK
1439}
1440
1441static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
db8ffbd4 1442 u32 index, int offset, struct bio *bio)
1f7319c7
MK
1443{
1444 int ret;
1445 struct page *page = NULL;
1446 void *src;
1447 struct bio_vec vec;
1448
1449 vec = *bvec;
1450 if (is_partial_io(bvec)) {
1451 void *dst;
1452 /*
1453 * This is a partial IO. We need to read the full page
1454 * before to write the changes.
1455 */
1456 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1457 if (!page)
1458 return -ENOMEM;
1459
8e654f8f 1460 ret = __zram_bvec_read(zram, page, index, bio, true);
1f7319c7
MK
1461 if (ret)
1462 goto out;
1463
1464 src = kmap_atomic(bvec->bv_page);
1465 dst = kmap_atomic(page);
1466 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1467 kunmap_atomic(dst);
1468 kunmap_atomic(src);
1469
1470 vec.bv_page = page;
1471 vec.bv_len = PAGE_SIZE;
1472 vec.bv_offset = 0;
1473 }
1474
db8ffbd4 1475 ret = __zram_bvec_write(zram, &vec, index, bio);
924bd88d 1476out:
397c6066 1477 if (is_partial_io(bvec))
1f7319c7 1478 __free_page(page);
924bd88d 1479 return ret;
8c921b2b
JM
1480}
1481
f4659d8e
JK
1482/*
1483 * zram_bio_discard - handler on discard request
1484 * @index: physical block index in PAGE_SIZE units
1485 * @offset: byte offset within physical block
1486 */
1487static void zram_bio_discard(struct zram *zram, u32 index,
1488 int offset, struct bio *bio)
1489{
1490 size_t n = bio->bi_iter.bi_size;
1491
1492 /*
1493 * zram manages data in physical block size units. Because logical block
1494 * size isn't identical with physical block size on some arch, we
1495 * could get a discard request pointing to a specific offset within a
1496 * certain physical block. Although we can handle this request by
1497 * reading that physiclal block and decompressing and partially zeroing
1498 * and re-compressing and then re-storing it, this isn't reasonable
1499 * because our intent with a discard request is to save memory. So
1500 * skipping this logical block is appropriate here.
1501 */
1502 if (offset) {
38515c73 1503 if (n <= (PAGE_SIZE - offset))
f4659d8e
JK
1504 return;
1505
38515c73 1506 n -= (PAGE_SIZE - offset);
f4659d8e
JK
1507 index++;
1508 }
1509
1510 while (n >= PAGE_SIZE) {
86c49814 1511 zram_slot_lock(zram, index);
f4659d8e 1512 zram_free_page(zram, index);
86c49814 1513 zram_slot_unlock(zram, index);
015254da 1514 atomic64_inc(&zram->stats.notify_free);
f4659d8e
JK
1515 index++;
1516 n -= PAGE_SIZE;
1517 }
1518}
1519
ae85a807
MK
1520/*
1521 * Returns errno if it has some problem. Otherwise return 0 or 1.
1522 * Returns 0 if IO request was done synchronously
1523 * Returns 1 if IO request was successfully submitted.
1524 */
522698d7 1525static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
3f289dcb 1526 int offset, unsigned int op, struct bio *bio)
9b3bb7ab 1527{
522698d7 1528 unsigned long start_time = jiffies;
d62e26b3 1529 struct request_queue *q = zram->disk->queue;
9b3bb7ab 1530 int ret;
9b3bb7ab 1531
ddcf35d3 1532 generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
522698d7 1533 &zram->disk->part0);
46a51c80 1534
3f289dcb 1535 if (!op_is_write(op)) {
522698d7 1536 atomic64_inc(&zram->stats.num_reads);
8e654f8f 1537 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1f7319c7 1538 flush_dcache_page(bvec->bv_page);
522698d7
SS
1539 } else {
1540 atomic64_inc(&zram->stats.num_writes);
db8ffbd4 1541 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1b672224 1542 }
9b3bb7ab 1543
ddcf35d3 1544 generic_end_io_acct(q, op, &zram->disk->part0, start_time);
9b3bb7ab 1545
d7eac6b6
MK
1546 zram_slot_lock(zram, index);
1547 zram_accessed(zram, index);
1548 zram_slot_unlock(zram, index);
1549
ae85a807 1550 if (unlikely(ret < 0)) {
3f289dcb 1551 if (!op_is_write(op))
522698d7
SS
1552 atomic64_inc(&zram->stats.failed_reads);
1553 else
1554 atomic64_inc(&zram->stats.failed_writes);
1b672224 1555 }
9b3bb7ab 1556
1b672224 1557 return ret;
8c921b2b
JM
1558}
1559
be257c61 1560static void __zram_make_request(struct zram *zram, struct bio *bio)
8c921b2b 1561{
abf54548 1562 int offset;
8c921b2b 1563 u32 index;
7988613b
KO
1564 struct bio_vec bvec;
1565 struct bvec_iter iter;
8c921b2b 1566
4f024f37
KO
1567 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1568 offset = (bio->bi_iter.bi_sector &
1569 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
8c921b2b 1570
31edeacd
CH
1571 switch (bio_op(bio)) {
1572 case REQ_OP_DISCARD:
1573 case REQ_OP_WRITE_ZEROES:
f4659d8e 1574 zram_bio_discard(zram, index, offset, bio);
4246a0b6 1575 bio_endio(bio);
f4659d8e 1576 return;
31edeacd
CH
1577 default:
1578 break;
f4659d8e
JK
1579 }
1580
7988613b 1581 bio_for_each_segment(bvec, bio, iter) {
e86942c7
MK
1582 struct bio_vec bv = bvec;
1583 unsigned int unwritten = bvec.bv_len;
924bd88d 1584
e86942c7
MK
1585 do {
1586 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1587 unwritten);
abf54548 1588 if (zram_bvec_rw(zram, &bv, index, offset,
3f289dcb 1589 bio_op(bio), bio) < 0)
924bd88d
JM
1590 goto out;
1591
e86942c7
MK
1592 bv.bv_offset += bv.bv_len;
1593 unwritten -= bv.bv_len;
924bd88d 1594
e86942c7
MK
1595 update_position(&index, &offset, &bv);
1596 } while (unwritten);
a1dd52af 1597 }
306b0c95 1598
4246a0b6 1599 bio_endio(bio);
7d7854b4 1600 return;
306b0c95
NG
1601
1602out:
306b0c95 1603 bio_io_error(bio);
306b0c95
NG
1604}
1605
306b0c95 1606/*
f1e3cfff 1607 * Handler function for all zram I/O requests.
306b0c95 1608 */
dece1635 1609static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
306b0c95 1610{
f1e3cfff 1611 struct zram *zram = queue->queuedata;
306b0c95 1612
54850e73 1613 if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1614 bio->bi_iter.bi_size)) {
da5cc7d3 1615 atomic64_inc(&zram->stats.invalid_io);
a09759ac 1616 goto error;
6642a67c
JM
1617 }
1618
be257c61 1619 __zram_make_request(zram, bio);
dece1635 1620 return BLK_QC_T_NONE;
a09759ac 1621
0900beae
JM
1622error:
1623 bio_io_error(bio);
dece1635 1624 return BLK_QC_T_NONE;
306b0c95
NG
1625}
1626
2ccbec05
NG
1627static void zram_slot_free_notify(struct block_device *bdev,
1628 unsigned long index)
107c161b 1629{
f1e3cfff 1630 struct zram *zram;
107c161b 1631
f1e3cfff 1632 zram = bdev->bd_disk->private_data;
a0c516cb 1633
3c9959e0
MK
1634 atomic64_inc(&zram->stats.notify_free);
1635 if (!zram_slot_trylock(zram, index)) {
1636 atomic64_inc(&zram->stats.miss_free);
1637 return;
1638 }
1639
f614a9f4 1640 zram_free_page(zram, index);
86c49814 1641 zram_slot_unlock(zram, index);
107c161b
NG
1642}
1643
8c7f0102 1644static int zram_rw_page(struct block_device *bdev, sector_t sector,
3f289dcb 1645 struct page *page, unsigned int op)
8c7f0102 1646{
ae85a807 1647 int offset, ret;
8c7f0102 1648 u32 index;
1649 struct zram *zram;
1650 struct bio_vec bv;
1651
98cc093c
HY
1652 if (PageTransHuge(page))
1653 return -ENOTSUPP;
8c7f0102 1654 zram = bdev->bd_disk->private_data;
08eee69f 1655
8c7f0102 1656 if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1657 atomic64_inc(&zram->stats.invalid_io);
ae85a807 1658 ret = -EINVAL;
a09759ac 1659 goto out;
8c7f0102 1660 }
1661
1662 index = sector >> SECTORS_PER_PAGE_SHIFT;
4ca82dab 1663 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
8c7f0102 1664
1665 bv.bv_page = page;
1666 bv.bv_len = PAGE_SIZE;
1667 bv.bv_offset = 0;
1668
3f289dcb 1669 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
08eee69f 1670out:
8c7f0102 1671 /*
1672 * If I/O fails, just return error(ie, non-zero) without
1673 * calling page_endio.
1674 * It causes resubmit the I/O with bio request by upper functions
1675 * of rw_page(e.g., swap_readpage, __swap_writepage) and
1676 * bio->bi_end_io does things to handle the error
1677 * (e.g., SetPageError, set_page_dirty and extra works).
1678 */
ae85a807
MK
1679 if (unlikely(ret < 0))
1680 return ret;
1681
1682 switch (ret) {
1683 case 0:
3f289dcb 1684 page_endio(page, op_is_write(op), 0);
ae85a807
MK
1685 break;
1686 case 1:
1687 ret = 0;
1688 break;
1689 default:
1690 WARN_ON(1);
1691 }
1692 return ret;
8c7f0102 1693}
1694
522698d7
SS
1695static void zram_reset_device(struct zram *zram)
1696{
522698d7
SS
1697 struct zcomp *comp;
1698 u64 disksize;
306b0c95 1699
522698d7 1700 down_write(&zram->init_lock);
9b3bb7ab 1701
522698d7
SS
1702 zram->limit_pages = 0;
1703
1704 if (!init_done(zram)) {
1705 up_write(&zram->init_lock);
1706 return;
1707 }
1708
522698d7
SS
1709 comp = zram->comp;
1710 disksize = zram->disksize;
522698d7 1711 zram->disksize = 0;
522698d7
SS
1712
1713 set_capacity(zram->disk, 0);
1714 part_stat_set_all(&zram->disk->part0, 0);
1715
1716 up_write(&zram->init_lock);
1717 /* I/O operation under all of CPU are done so let's free */
beb6602c 1718 zram_meta_free(zram, disksize);
302128dc 1719 memset(&zram->stats, 0, sizeof(zram->stats));
522698d7 1720 zcomp_destroy(comp);
013bf95a 1721 reset_bdev(zram);
522698d7
SS
1722}
1723
1724static ssize_t disksize_store(struct device *dev,
1725 struct device_attribute *attr, const char *buf, size_t len)
2f6a3bed 1726{
522698d7
SS
1727 u64 disksize;
1728 struct zcomp *comp;
2f6a3bed 1729 struct zram *zram = dev_to_zram(dev);
522698d7 1730 int err;
2f6a3bed 1731
522698d7
SS
1732 disksize = memparse(buf, NULL);
1733 if (!disksize)
1734 return -EINVAL;
2f6a3bed 1735
beb6602c
MK
1736 down_write(&zram->init_lock);
1737 if (init_done(zram)) {
1738 pr_info("Cannot change disksize for initialized device\n");
1739 err = -EBUSY;
1740 goto out_unlock;
1741 }
1742
522698d7 1743 disksize = PAGE_ALIGN(disksize);
beb6602c
MK
1744 if (!zram_meta_alloc(zram, disksize)) {
1745 err = -ENOMEM;
1746 goto out_unlock;
1747 }
522698d7 1748
da9556a2 1749 comp = zcomp_create(zram->compressor);
522698d7 1750 if (IS_ERR(comp)) {
70864969 1751 pr_err("Cannot initialise %s compressing backend\n",
522698d7
SS
1752 zram->compressor);
1753 err = PTR_ERR(comp);
1754 goto out_free_meta;
1755 }
1756
522698d7
SS
1757 zram->comp = comp;
1758 zram->disksize = disksize;
1759 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
e447a015
MK
1760
1761 revalidate_disk(zram->disk);
e7ccfc4c 1762 up_write(&zram->init_lock);
522698d7
SS
1763
1764 return len;
1765
522698d7 1766out_free_meta:
beb6602c
MK
1767 zram_meta_free(zram, disksize);
1768out_unlock:
1769 up_write(&zram->init_lock);
522698d7 1770 return err;
2f6a3bed
SS
1771}
1772
522698d7
SS
1773static ssize_t reset_store(struct device *dev,
1774 struct device_attribute *attr, const char *buf, size_t len)
4f2109f6 1775{
522698d7
SS
1776 int ret;
1777 unsigned short do_reset;
1778 struct zram *zram;
1779 struct block_device *bdev;
4f2109f6 1780
f405c445
SS
1781 ret = kstrtou16(buf, 10, &do_reset);
1782 if (ret)
1783 return ret;
1784
1785 if (!do_reset)
1786 return -EINVAL;
1787
522698d7
SS
1788 zram = dev_to_zram(dev);
1789 bdev = bdget_disk(zram->disk, 0);
522698d7
SS
1790 if (!bdev)
1791 return -ENOMEM;
4f2109f6 1792
522698d7 1793 mutex_lock(&bdev->bd_mutex);
f405c445
SS
1794 /* Do not reset an active device or claimed device */
1795 if (bdev->bd_openers || zram->claim) {
1796 mutex_unlock(&bdev->bd_mutex);
1797 bdput(bdev);
1798 return -EBUSY;
522698d7
SS
1799 }
1800
f405c445
SS
1801 /* From now on, anyone can't open /dev/zram[0-9] */
1802 zram->claim = true;
1803 mutex_unlock(&bdev->bd_mutex);
522698d7 1804
f405c445 1805 /* Make sure all the pending I/O are finished */
522698d7
SS
1806 fsync_bdev(bdev);
1807 zram_reset_device(zram);
e447a015 1808 revalidate_disk(zram->disk);
522698d7
SS
1809 bdput(bdev);
1810
f405c445
SS
1811 mutex_lock(&bdev->bd_mutex);
1812 zram->claim = false;
1813 mutex_unlock(&bdev->bd_mutex);
1814
522698d7 1815 return len;
f405c445
SS
1816}
1817
1818static int zram_open(struct block_device *bdev, fmode_t mode)
1819{
1820 int ret = 0;
1821 struct zram *zram;
1822
1823 WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1824
1825 zram = bdev->bd_disk->private_data;
1826 /* zram was claimed to reset so open request fails */
1827 if (zram->claim)
1828 ret = -EBUSY;
4f2109f6
SS
1829
1830 return ret;
1831}
1832
522698d7 1833static const struct block_device_operations zram_devops = {
f405c445 1834 .open = zram_open,
522698d7
SS
1835 .swap_slot_free_notify = zram_slot_free_notify,
1836 .rw_page = zram_rw_page,
1837 .owner = THIS_MODULE
1838};
1839
1840static DEVICE_ATTR_WO(compact);
1841static DEVICE_ATTR_RW(disksize);
1842static DEVICE_ATTR_RO(initstate);
1843static DEVICE_ATTR_WO(reset);
c87d1655
SS
1844static DEVICE_ATTR_WO(mem_limit);
1845static DEVICE_ATTR_WO(mem_used_max);
e82592c4 1846static DEVICE_ATTR_WO(idle);
522698d7
SS
1847static DEVICE_ATTR_RW(max_comp_streams);
1848static DEVICE_ATTR_RW(comp_algorithm);
013bf95a
MK
1849#ifdef CONFIG_ZRAM_WRITEBACK
1850static DEVICE_ATTR_RW(backing_dev);
a939888e 1851static DEVICE_ATTR_WO(writeback);
bb416d18 1852static DEVICE_ATTR_RW(writeback_limit);
1d69a3f8 1853static DEVICE_ATTR_RW(writeback_limit_enable);
013bf95a 1854#endif
a68eb3b6 1855
9b3bb7ab
SS
1856static struct attribute *zram_disk_attrs[] = {
1857 &dev_attr_disksize.attr,
1858 &dev_attr_initstate.attr,
1859 &dev_attr_reset.attr,
99ebbd30 1860 &dev_attr_compact.attr,
9ada9da9 1861 &dev_attr_mem_limit.attr,
461a8eee 1862 &dev_attr_mem_used_max.attr,
e82592c4 1863 &dev_attr_idle.attr,
beca3ec7 1864 &dev_attr_max_comp_streams.attr,
e46b8a03 1865 &dev_attr_comp_algorithm.attr,
013bf95a
MK
1866#ifdef CONFIG_ZRAM_WRITEBACK
1867 &dev_attr_backing_dev.attr,
a939888e 1868 &dev_attr_writeback.attr,
bb416d18 1869 &dev_attr_writeback_limit.attr,
1d69a3f8 1870 &dev_attr_writeback_limit_enable.attr,
013bf95a 1871#endif
2f6a3bed 1872 &dev_attr_io_stat.attr,
4f2109f6 1873 &dev_attr_mm_stat.attr,
23eddf39
MK
1874#ifdef CONFIG_ZRAM_WRITEBACK
1875 &dev_attr_bd_stat.attr,
1876#endif
623e47fc 1877 &dev_attr_debug_stat.attr,
9b3bb7ab
SS
1878 NULL,
1879};
1880
bc1bb362 1881static const struct attribute_group zram_disk_attr_group = {
9b3bb7ab
SS
1882 .attrs = zram_disk_attrs,
1883};
1884
98af4d4d
HR
1885static const struct attribute_group *zram_disk_attr_groups[] = {
1886 &zram_disk_attr_group,
1887 NULL,
1888};
1889
92ff1528
SS
1890/*
1891 * Allocate and initialize new zram device. the function returns
1892 * '>= 0' device_id upon success, and negative value otherwise.
1893 */
1894static int zram_add(void)
306b0c95 1895{
85508ec6 1896 struct zram *zram;
ee980160 1897 struct request_queue *queue;
92ff1528 1898 int ret, device_id;
85508ec6
SS
1899
1900 zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1901 if (!zram)
1902 return -ENOMEM;
1903
92ff1528 1904 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
85508ec6
SS
1905 if (ret < 0)
1906 goto out_free_dev;
92ff1528 1907 device_id = ret;
de1a21a0 1908
0900beae 1909 init_rwsem(&zram->init_lock);
1d69a3f8
MK
1910#ifdef CONFIG_ZRAM_WRITEBACK
1911 spin_lock_init(&zram->wb_limit_lock);
1912#endif
ee980160
SS
1913 queue = blk_alloc_queue(GFP_KERNEL);
1914 if (!queue) {
306b0c95
NG
1915 pr_err("Error allocating disk queue for device %d\n",
1916 device_id);
85508ec6
SS
1917 ret = -ENOMEM;
1918 goto out_free_idr;
306b0c95
NG
1919 }
1920
ee980160 1921 blk_queue_make_request(queue, zram_make_request);
306b0c95 1922
85508ec6 1923 /* gendisk structure */
f1e3cfff
NG
1924 zram->disk = alloc_disk(1);
1925 if (!zram->disk) {
70864969 1926 pr_err("Error allocating disk structure for device %d\n",
306b0c95 1927 device_id);
201c7b72 1928 ret = -ENOMEM;
39a9b8ac 1929 goto out_free_queue;
306b0c95
NG
1930 }
1931
f1e3cfff
NG
1932 zram->disk->major = zram_major;
1933 zram->disk->first_minor = device_id;
1934 zram->disk->fops = &zram_devops;
ee980160
SS
1935 zram->disk->queue = queue;
1936 zram->disk->queue->queuedata = zram;
f1e3cfff
NG
1937 zram->disk->private_data = zram;
1938 snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
306b0c95 1939
33863c21 1940 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
f1e3cfff 1941 set_capacity(zram->disk, 0);
b67d1ec1 1942 /* zram devices sort of resembles non-rotational disks */
8b904b5b
BVA
1943 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1944 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
e447a015 1945
a1dd52af
NG
1946 /*
1947 * To ensure that we always get PAGE_SIZE aligned
1948 * and n*PAGE_SIZED sized I/O requests.
1949 */
f1e3cfff 1950 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
7b19b8d4
RJ
1951 blk_queue_logical_block_size(zram->disk->queue,
1952 ZRAM_LOGICAL_BLOCK_SIZE);
f1e3cfff
NG
1953 blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1954 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
f4659d8e 1955 zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
2bb4cd5c 1956 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
8b904b5b 1957 blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
31edeacd 1958
f4659d8e
JK
1959 /*
1960 * zram_bio_discard() will clear all logical blocks if logical block
1961 * size is identical with physical block size(PAGE_SIZE). But if it is
1962 * different, we will skip discarding some parts of logical blocks in
1963 * the part of the request range which isn't aligned to physical block
1964 * size. So we can't ensure that all discarded logical blocks are
1965 * zeroed.
1966 */
1967 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
31edeacd 1968 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
5d83d5a0 1969
e447a015 1970 zram->disk->queue->backing_dev_info->capabilities |=
23c47d2a 1971 (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
98af4d4d
HR
1972 device_add_disk(NULL, zram->disk, zram_disk_attr_groups);
1973
e46b8a03 1974 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
d12b63c9 1975
c0265342 1976 zram_debugfs_register(zram);
d12b63c9 1977 pr_info("Added device: %s\n", zram->disk->disk_name);
92ff1528 1978 return device_id;
de1a21a0 1979
39a9b8ac 1980out_free_queue:
ee980160 1981 blk_cleanup_queue(queue);
85508ec6
SS
1982out_free_idr:
1983 idr_remove(&zram_index_idr, device_id);
1984out_free_dev:
1985 kfree(zram);
de1a21a0 1986 return ret;
306b0c95
NG
1987}
1988
6566d1a3 1989static int zram_remove(struct zram *zram)
306b0c95 1990{
6566d1a3
SS
1991 struct block_device *bdev;
1992
1993 bdev = bdget_disk(zram->disk, 0);
1994 if (!bdev)
1995 return -ENOMEM;
1996
1997 mutex_lock(&bdev->bd_mutex);
1998 if (bdev->bd_openers || zram->claim) {
1999 mutex_unlock(&bdev->bd_mutex);
2000 bdput(bdev);
2001 return -EBUSY;
2002 }
2003
2004 zram->claim = true;
2005 mutex_unlock(&bdev->bd_mutex);
2006
c0265342 2007 zram_debugfs_unregister(zram);
306b0c95 2008
6566d1a3
SS
2009 /* Make sure all the pending I/O are finished */
2010 fsync_bdev(bdev);
85508ec6 2011 zram_reset_device(zram);
6566d1a3
SS
2012 bdput(bdev);
2013
2014 pr_info("Removed device: %s\n", zram->disk->disk_name);
2015
85508ec6 2016 del_gendisk(zram->disk);
392db380 2017 blk_cleanup_queue(zram->disk->queue);
85508ec6
SS
2018 put_disk(zram->disk);
2019 kfree(zram);
6566d1a3
SS
2020 return 0;
2021}
2022
2023/* zram-control sysfs attributes */
27104a53
GKH
2024
2025/*
2026 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
2027 * sense that reading from this file does alter the state of your system -- it
2028 * creates a new un-initialized zram device and returns back this device's
2029 * device_id (or an error code if it fails to create a new device).
2030 */
6566d1a3
SS
2031static ssize_t hot_add_show(struct class *class,
2032 struct class_attribute *attr,
2033 char *buf)
2034{
2035 int ret;
2036
2037 mutex_lock(&zram_index_mutex);
2038 ret = zram_add();
2039 mutex_unlock(&zram_index_mutex);
2040
2041 if (ret < 0)
2042 return ret;
2043 return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
2044}
f40609d1 2045static CLASS_ATTR_RO(hot_add);
6566d1a3
SS
2046
2047static ssize_t hot_remove_store(struct class *class,
2048 struct class_attribute *attr,
2049 const char *buf,
2050 size_t count)
2051{
2052 struct zram *zram;
2053 int ret, dev_id;
2054
2055 /* dev_id is gendisk->first_minor, which is `int' */
2056 ret = kstrtoint(buf, 10, &dev_id);
2057 if (ret)
2058 return ret;
2059 if (dev_id < 0)
2060 return -EINVAL;
2061
2062 mutex_lock(&zram_index_mutex);
2063
2064 zram = idr_find(&zram_index_idr, dev_id);
17ec4cd9 2065 if (zram) {
6566d1a3 2066 ret = zram_remove(zram);
529e71e1
TI
2067 if (!ret)
2068 idr_remove(&zram_index_idr, dev_id);
17ec4cd9 2069 } else {
6566d1a3 2070 ret = -ENODEV;
17ec4cd9 2071 }
6566d1a3
SS
2072
2073 mutex_unlock(&zram_index_mutex);
2074 return ret ? ret : count;
85508ec6 2075}
27104a53 2076static CLASS_ATTR_WO(hot_remove);
a096cafc 2077
27104a53
GKH
2078static struct attribute *zram_control_class_attrs[] = {
2079 &class_attr_hot_add.attr,
2080 &class_attr_hot_remove.attr,
2081 NULL,
6566d1a3 2082};
27104a53 2083ATTRIBUTE_GROUPS(zram_control_class);
6566d1a3
SS
2084
2085static struct class zram_control_class = {
2086 .name = "zram-control",
2087 .owner = THIS_MODULE,
27104a53 2088 .class_groups = zram_control_class_groups,
6566d1a3
SS
2089};
2090
85508ec6
SS
2091static int zram_remove_cb(int id, void *ptr, void *data)
2092{
2093 zram_remove(ptr);
2094 return 0;
2095}
a096cafc 2096
85508ec6
SS
2097static void destroy_devices(void)
2098{
6566d1a3 2099 class_unregister(&zram_control_class);
85508ec6 2100 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
c0265342 2101 zram_debugfs_destroy();
85508ec6 2102 idr_destroy(&zram_index_idr);
a096cafc 2103 unregister_blkdev(zram_major, "zram");
1dd6c834 2104 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
306b0c95
NG
2105}
2106
f1e3cfff 2107static int __init zram_init(void)
306b0c95 2108{
92ff1528 2109 int ret;
306b0c95 2110
1dd6c834
AMG
2111 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
2112 zcomp_cpu_up_prepare, zcomp_cpu_dead);
2113 if (ret < 0)
2114 return ret;
2115
6566d1a3
SS
2116 ret = class_register(&zram_control_class);
2117 if (ret) {
70864969 2118 pr_err("Unable to register zram-control class\n");
1dd6c834 2119 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
6566d1a3
SS
2120 return ret;
2121 }
2122
c0265342 2123 zram_debugfs_create();
f1e3cfff
NG
2124 zram_major = register_blkdev(0, "zram");
2125 if (zram_major <= 0) {
70864969 2126 pr_err("Unable to get major number\n");
6566d1a3 2127 class_unregister(&zram_control_class);
1dd6c834 2128 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
a096cafc 2129 return -EBUSY;
306b0c95
NG
2130 }
2131
92ff1528 2132 while (num_devices != 0) {
6566d1a3 2133 mutex_lock(&zram_index_mutex);
92ff1528 2134 ret = zram_add();
6566d1a3 2135 mutex_unlock(&zram_index_mutex);
92ff1528 2136 if (ret < 0)
a096cafc 2137 goto out_error;
92ff1528 2138 num_devices--;
de1a21a0
NG
2139 }
2140
306b0c95 2141 return 0;
de1a21a0 2142
a096cafc 2143out_error:
85508ec6 2144 destroy_devices();
306b0c95
NG
2145 return ret;
2146}
2147
f1e3cfff 2148static void __exit zram_exit(void)
306b0c95 2149{
85508ec6 2150 destroy_devices();
306b0c95
NG
2151}
2152
f1e3cfff
NG
2153module_init(zram_init);
2154module_exit(zram_exit);
306b0c95 2155
9b3bb7ab 2156module_param(num_devices, uint, 0);
c3cdb40e 2157MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
9b3bb7ab 2158
306b0c95
NG
2159MODULE_LICENSE("Dual BSD/GPL");
2160MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
f1e3cfff 2161MODULE_DESCRIPTION("Compressed RAM Block Device");