Merge tag 'armsoc-drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc
[linux-2.6-block.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #ifdef CONFIG_ZRAM_DEBUG
19 #define DEBUG
20 #endif
21
22 #include <linux/module.h>
23 #include <linux/kernel.h>
24 #include <linux/bio.h>
25 #include <linux/bitops.h>
26 #include <linux/blkdev.h>
27 #include <linux/buffer_head.h>
28 #include <linux/device.h>
29 #include <linux/genhd.h>
30 #include <linux/highmem.h>
31 #include <linux/slab.h>
32 #include <linux/string.h>
33 #include <linux/vmalloc.h>
34 #include <linux/err.h>
35
36 #include "zram_drv.h"
37
38 /* Globals */
39 static int zram_major;
40 static struct zram *zram_devices;
41 static const char *default_compressor = "lzo";
42
43 /* Module params (documentation at end) */
44 static unsigned int num_devices = 1;
45
46 static inline void deprecated_attr_warn(const char *name)
47 {
48         pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
49                         task_pid_nr(current),
50                         current->comm,
51                         name,
52                         "See zram documentation.");
53 }
54
55 #define ZRAM_ATTR_RO(name)                                              \
56 static ssize_t name##_show(struct device *d,            \
57                                 struct device_attribute *attr, char *b) \
58 {                                                                       \
59         struct zram *zram = dev_to_zram(d);                             \
60                                                                         \
61         deprecated_attr_warn(__stringify(name));                        \
62         return scnprintf(b, PAGE_SIZE, "%llu\n",                        \
63                 (u64)atomic64_read(&zram->stats.name));                 \
64 }                                                                       \
65 static DEVICE_ATTR_RO(name);
66
67 static inline bool init_done(struct zram *zram)
68 {
69         return zram->disksize;
70 }
71
72 static inline struct zram *dev_to_zram(struct device *dev)
73 {
74         return (struct zram *)dev_to_disk(dev)->private_data;
75 }
76
77 static ssize_t disksize_show(struct device *dev,
78                 struct device_attribute *attr, char *buf)
79 {
80         struct zram *zram = dev_to_zram(dev);
81
82         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
83 }
84
85 static ssize_t initstate_show(struct device *dev,
86                 struct device_attribute *attr, char *buf)
87 {
88         u32 val;
89         struct zram *zram = dev_to_zram(dev);
90
91         down_read(&zram->init_lock);
92         val = init_done(zram);
93         up_read(&zram->init_lock);
94
95         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
96 }
97
98 static ssize_t orig_data_size_show(struct device *dev,
99                 struct device_attribute *attr, char *buf)
100 {
101         struct zram *zram = dev_to_zram(dev);
102
103         deprecated_attr_warn("orig_data_size");
104         return scnprintf(buf, PAGE_SIZE, "%llu\n",
105                 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
106 }
107
108 static ssize_t mem_used_total_show(struct device *dev,
109                 struct device_attribute *attr, char *buf)
110 {
111         u64 val = 0;
112         struct zram *zram = dev_to_zram(dev);
113
114         deprecated_attr_warn("mem_used_total");
115         down_read(&zram->init_lock);
116         if (init_done(zram)) {
117                 struct zram_meta *meta = zram->meta;
118                 val = zs_get_total_pages(meta->mem_pool);
119         }
120         up_read(&zram->init_lock);
121
122         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
123 }
124
125 static ssize_t max_comp_streams_show(struct device *dev,
126                 struct device_attribute *attr, char *buf)
127 {
128         int val;
129         struct zram *zram = dev_to_zram(dev);
130
131         down_read(&zram->init_lock);
132         val = zram->max_comp_streams;
133         up_read(&zram->init_lock);
134
135         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
136 }
137
138 static ssize_t mem_limit_show(struct device *dev,
139                 struct device_attribute *attr, char *buf)
140 {
141         u64 val;
142         struct zram *zram = dev_to_zram(dev);
143
144         deprecated_attr_warn("mem_limit");
145         down_read(&zram->init_lock);
146         val = zram->limit_pages;
147         up_read(&zram->init_lock);
148
149         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
150 }
151
152 static ssize_t mem_limit_store(struct device *dev,
153                 struct device_attribute *attr, const char *buf, size_t len)
154 {
155         u64 limit;
156         char *tmp;
157         struct zram *zram = dev_to_zram(dev);
158
159         limit = memparse(buf, &tmp);
160         if (buf == tmp) /* no chars parsed, invalid input */
161                 return -EINVAL;
162
163         down_write(&zram->init_lock);
164         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
165         up_write(&zram->init_lock);
166
167         return len;
168 }
169
170 static ssize_t mem_used_max_show(struct device *dev,
171                 struct device_attribute *attr, char *buf)
172 {
173         u64 val = 0;
174         struct zram *zram = dev_to_zram(dev);
175
176         deprecated_attr_warn("mem_used_max");
177         down_read(&zram->init_lock);
178         if (init_done(zram))
179                 val = atomic_long_read(&zram->stats.max_used_pages);
180         up_read(&zram->init_lock);
181
182         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
183 }
184
185 static ssize_t mem_used_max_store(struct device *dev,
186                 struct device_attribute *attr, const char *buf, size_t len)
187 {
188         int err;
189         unsigned long val;
190         struct zram *zram = dev_to_zram(dev);
191
192         err = kstrtoul(buf, 10, &val);
193         if (err || val != 0)
194                 return -EINVAL;
195
196         down_read(&zram->init_lock);
197         if (init_done(zram)) {
198                 struct zram_meta *meta = zram->meta;
199                 atomic_long_set(&zram->stats.max_used_pages,
200                                 zs_get_total_pages(meta->mem_pool));
201         }
202         up_read(&zram->init_lock);
203
204         return len;
205 }
206
207 static ssize_t max_comp_streams_store(struct device *dev,
208                 struct device_attribute *attr, const char *buf, size_t len)
209 {
210         int num;
211         struct zram *zram = dev_to_zram(dev);
212         int ret;
213
214         ret = kstrtoint(buf, 0, &num);
215         if (ret < 0)
216                 return ret;
217         if (num < 1)
218                 return -EINVAL;
219
220         down_write(&zram->init_lock);
221         if (init_done(zram)) {
222                 if (!zcomp_set_max_streams(zram->comp, num)) {
223                         pr_info("Cannot change max compression streams\n");
224                         ret = -EINVAL;
225                         goto out;
226                 }
227         }
228
229         zram->max_comp_streams = num;
230         ret = len;
231 out:
232         up_write(&zram->init_lock);
233         return ret;
234 }
235
236 static ssize_t comp_algorithm_show(struct device *dev,
237                 struct device_attribute *attr, char *buf)
238 {
239         size_t sz;
240         struct zram *zram = dev_to_zram(dev);
241
242         down_read(&zram->init_lock);
243         sz = zcomp_available_show(zram->compressor, buf);
244         up_read(&zram->init_lock);
245
246         return sz;
247 }
248
249 static ssize_t comp_algorithm_store(struct device *dev,
250                 struct device_attribute *attr, const char *buf, size_t len)
251 {
252         struct zram *zram = dev_to_zram(dev);
253         down_write(&zram->init_lock);
254         if (init_done(zram)) {
255                 up_write(&zram->init_lock);
256                 pr_info("Can't change algorithm for initialized device\n");
257                 return -EBUSY;
258         }
259         strlcpy(zram->compressor, buf, sizeof(zram->compressor));
260         up_write(&zram->init_lock);
261         return len;
262 }
263
264 /* flag operations needs meta->tb_lock */
265 static int zram_test_flag(struct zram_meta *meta, u32 index,
266                         enum zram_pageflags flag)
267 {
268         return meta->table[index].value & BIT(flag);
269 }
270
271 static void zram_set_flag(struct zram_meta *meta, u32 index,
272                         enum zram_pageflags flag)
273 {
274         meta->table[index].value |= BIT(flag);
275 }
276
277 static void zram_clear_flag(struct zram_meta *meta, u32 index,
278                         enum zram_pageflags flag)
279 {
280         meta->table[index].value &= ~BIT(flag);
281 }
282
283 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
284 {
285         return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
286 }
287
288 static void zram_set_obj_size(struct zram_meta *meta,
289                                         u32 index, size_t size)
290 {
291         unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
292
293         meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
294 }
295
296 static inline int is_partial_io(struct bio_vec *bvec)
297 {
298         return bvec->bv_len != PAGE_SIZE;
299 }
300
301 /*
302  * Check if request is within bounds and aligned on zram logical blocks.
303  */
304 static inline int valid_io_request(struct zram *zram,
305                 sector_t start, unsigned int size)
306 {
307         u64 end, bound;
308
309         /* unaligned request */
310         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
311                 return 0;
312         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
313                 return 0;
314
315         end = start + (size >> SECTOR_SHIFT);
316         bound = zram->disksize >> SECTOR_SHIFT;
317         /* out of range range */
318         if (unlikely(start >= bound || end > bound || start > end))
319                 return 0;
320
321         /* I/O request is valid */
322         return 1;
323 }
324
325 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
326 {
327         size_t num_pages = disksize >> PAGE_SHIFT;
328         size_t index;
329
330         /* Free all pages that are still in this zram device */
331         for (index = 0; index < num_pages; index++) {
332                 unsigned long handle = meta->table[index].handle;
333
334                 if (!handle)
335                         continue;
336
337                 zs_free(meta->mem_pool, handle);
338         }
339
340         zs_destroy_pool(meta->mem_pool);
341         vfree(meta->table);
342         kfree(meta);
343 }
344
345 static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
346 {
347         size_t num_pages;
348         char pool_name[8];
349         struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
350
351         if (!meta)
352                 return NULL;
353
354         num_pages = disksize >> PAGE_SHIFT;
355         meta->table = vzalloc(num_pages * sizeof(*meta->table));
356         if (!meta->table) {
357                 pr_err("Error allocating zram address table\n");
358                 goto out_error;
359         }
360
361         snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
362         meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
363         if (!meta->mem_pool) {
364                 pr_err("Error creating memory pool\n");
365                 goto out_error;
366         }
367
368         return meta;
369
370 out_error:
371         vfree(meta->table);
372         kfree(meta);
373         return NULL;
374 }
375
376 static inline bool zram_meta_get(struct zram *zram)
377 {
378         if (atomic_inc_not_zero(&zram->refcount))
379                 return true;
380         return false;
381 }
382
383 static inline void zram_meta_put(struct zram *zram)
384 {
385         atomic_dec(&zram->refcount);
386 }
387
388 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
389 {
390         if (*offset + bvec->bv_len >= PAGE_SIZE)
391                 (*index)++;
392         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
393 }
394
395 static int page_zero_filled(void *ptr)
396 {
397         unsigned int pos;
398         unsigned long *page;
399
400         page = (unsigned long *)ptr;
401
402         for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
403                 if (page[pos])
404                         return 0;
405         }
406
407         return 1;
408 }
409
410 static void handle_zero_page(struct bio_vec *bvec)
411 {
412         struct page *page = bvec->bv_page;
413         void *user_mem;
414
415         user_mem = kmap_atomic(page);
416         if (is_partial_io(bvec))
417                 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
418         else
419                 clear_page(user_mem);
420         kunmap_atomic(user_mem);
421
422         flush_dcache_page(page);
423 }
424
425
426 /*
427  * To protect concurrent access to the same index entry,
428  * caller should hold this table index entry's bit_spinlock to
429  * indicate this index entry is accessing.
430  */
431 static void zram_free_page(struct zram *zram, size_t index)
432 {
433         struct zram_meta *meta = zram->meta;
434         unsigned long handle = meta->table[index].handle;
435
436         if (unlikely(!handle)) {
437                 /*
438                  * No memory is allocated for zero filled pages.
439                  * Simply clear zero page flag.
440                  */
441                 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
442                         zram_clear_flag(meta, index, ZRAM_ZERO);
443                         atomic64_dec(&zram->stats.zero_pages);
444                 }
445                 return;
446         }
447
448         zs_free(meta->mem_pool, handle);
449
450         atomic64_sub(zram_get_obj_size(meta, index),
451                         &zram->stats.compr_data_size);
452         atomic64_dec(&zram->stats.pages_stored);
453
454         meta->table[index].handle = 0;
455         zram_set_obj_size(meta, index, 0);
456 }
457
458 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
459 {
460         int ret = 0;
461         unsigned char *cmem;
462         struct zram_meta *meta = zram->meta;
463         unsigned long handle;
464         size_t size;
465
466         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
467         handle = meta->table[index].handle;
468         size = zram_get_obj_size(meta, index);
469
470         if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
471                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
472                 clear_page(mem);
473                 return 0;
474         }
475
476         cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
477         if (size == PAGE_SIZE)
478                 copy_page(mem, cmem);
479         else
480                 ret = zcomp_decompress(zram->comp, cmem, size, mem);
481         zs_unmap_object(meta->mem_pool, handle);
482         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
483
484         /* Should NEVER happen. Return bio error if it does. */
485         if (unlikely(ret)) {
486                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
487                 return ret;
488         }
489
490         return 0;
491 }
492
493 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
494                           u32 index, int offset)
495 {
496         int ret;
497         struct page *page;
498         unsigned char *user_mem, *uncmem = NULL;
499         struct zram_meta *meta = zram->meta;
500         page = bvec->bv_page;
501
502         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
503         if (unlikely(!meta->table[index].handle) ||
504                         zram_test_flag(meta, index, ZRAM_ZERO)) {
505                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
506                 handle_zero_page(bvec);
507                 return 0;
508         }
509         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
510
511         if (is_partial_io(bvec))
512                 /* Use  a temporary buffer to decompress the page */
513                 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
514
515         user_mem = kmap_atomic(page);
516         if (!is_partial_io(bvec))
517                 uncmem = user_mem;
518
519         if (!uncmem) {
520                 pr_info("Unable to allocate temp memory\n");
521                 ret = -ENOMEM;
522                 goto out_cleanup;
523         }
524
525         ret = zram_decompress_page(zram, uncmem, index);
526         /* Should NEVER happen. Return bio error if it does. */
527         if (unlikely(ret))
528                 goto out_cleanup;
529
530         if (is_partial_io(bvec))
531                 memcpy(user_mem + bvec->bv_offset, uncmem + offset,
532                                 bvec->bv_len);
533
534         flush_dcache_page(page);
535         ret = 0;
536 out_cleanup:
537         kunmap_atomic(user_mem);
538         if (is_partial_io(bvec))
539                 kfree(uncmem);
540         return ret;
541 }
542
543 static inline void update_used_max(struct zram *zram,
544                                         const unsigned long pages)
545 {
546         unsigned long old_max, cur_max;
547
548         old_max = atomic_long_read(&zram->stats.max_used_pages);
549
550         do {
551                 cur_max = old_max;
552                 if (pages > cur_max)
553                         old_max = atomic_long_cmpxchg(
554                                 &zram->stats.max_used_pages, cur_max, pages);
555         } while (old_max != cur_max);
556 }
557
558 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
559                            int offset)
560 {
561         int ret = 0;
562         size_t clen;
563         unsigned long handle;
564         struct page *page;
565         unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
566         struct zram_meta *meta = zram->meta;
567         struct zcomp_strm *zstrm;
568         bool locked = false;
569         unsigned long alloced_pages;
570
571         page = bvec->bv_page;
572         if (is_partial_io(bvec)) {
573                 /*
574                  * This is a partial IO. We need to read the full page
575                  * before to write the changes.
576                  */
577                 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
578                 if (!uncmem) {
579                         ret = -ENOMEM;
580                         goto out;
581                 }
582                 ret = zram_decompress_page(zram, uncmem, index);
583                 if (ret)
584                         goto out;
585         }
586
587         zstrm = zcomp_strm_find(zram->comp);
588         locked = true;
589         user_mem = kmap_atomic(page);
590
591         if (is_partial_io(bvec)) {
592                 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
593                        bvec->bv_len);
594                 kunmap_atomic(user_mem);
595                 user_mem = NULL;
596         } else {
597                 uncmem = user_mem;
598         }
599
600         if (page_zero_filled(uncmem)) {
601                 if (user_mem)
602                         kunmap_atomic(user_mem);
603                 /* Free memory associated with this sector now. */
604                 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
605                 zram_free_page(zram, index);
606                 zram_set_flag(meta, index, ZRAM_ZERO);
607                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
608
609                 atomic64_inc(&zram->stats.zero_pages);
610                 ret = 0;
611                 goto out;
612         }
613
614         ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
615         if (!is_partial_io(bvec)) {
616                 kunmap_atomic(user_mem);
617                 user_mem = NULL;
618                 uncmem = NULL;
619         }
620
621         if (unlikely(ret)) {
622                 pr_err("Compression failed! err=%d\n", ret);
623                 goto out;
624         }
625         src = zstrm->buffer;
626         if (unlikely(clen > max_zpage_size)) {
627                 clen = PAGE_SIZE;
628                 if (is_partial_io(bvec))
629                         src = uncmem;
630         }
631
632         handle = zs_malloc(meta->mem_pool, clen);
633         if (!handle) {
634                 pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
635                         index, clen);
636                 ret = -ENOMEM;
637                 goto out;
638         }
639
640         alloced_pages = zs_get_total_pages(meta->mem_pool);
641         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
642                 zs_free(meta->mem_pool, handle);
643                 ret = -ENOMEM;
644                 goto out;
645         }
646
647         update_used_max(zram, alloced_pages);
648
649         cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
650
651         if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
652                 src = kmap_atomic(page);
653                 copy_page(cmem, src);
654                 kunmap_atomic(src);
655         } else {
656                 memcpy(cmem, src, clen);
657         }
658
659         zcomp_strm_release(zram->comp, zstrm);
660         locked = false;
661         zs_unmap_object(meta->mem_pool, handle);
662
663         /*
664          * Free memory associated with this sector
665          * before overwriting unused sectors.
666          */
667         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
668         zram_free_page(zram, index);
669
670         meta->table[index].handle = handle;
671         zram_set_obj_size(meta, index, clen);
672         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
673
674         /* Update stats */
675         atomic64_add(clen, &zram->stats.compr_data_size);
676         atomic64_inc(&zram->stats.pages_stored);
677 out:
678         if (locked)
679                 zcomp_strm_release(zram->comp, zstrm);
680         if (is_partial_io(bvec))
681                 kfree(uncmem);
682         return ret;
683 }
684
685 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
686                         int offset, int rw)
687 {
688         unsigned long start_time = jiffies;
689         int ret;
690
691         generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
692                         &zram->disk->part0);
693
694         if (rw == READ) {
695                 atomic64_inc(&zram->stats.num_reads);
696                 ret = zram_bvec_read(zram, bvec, index, offset);
697         } else {
698                 atomic64_inc(&zram->stats.num_writes);
699                 ret = zram_bvec_write(zram, bvec, index, offset);
700         }
701
702         generic_end_io_acct(rw, &zram->disk->part0, start_time);
703
704         if (unlikely(ret)) {
705                 if (rw == READ)
706                         atomic64_inc(&zram->stats.failed_reads);
707                 else
708                         atomic64_inc(&zram->stats.failed_writes);
709         }
710
711         return ret;
712 }
713
714 /*
715  * zram_bio_discard - handler on discard request
716  * @index: physical block index in PAGE_SIZE units
717  * @offset: byte offset within physical block
718  */
719 static void zram_bio_discard(struct zram *zram, u32 index,
720                              int offset, struct bio *bio)
721 {
722         size_t n = bio->bi_iter.bi_size;
723         struct zram_meta *meta = zram->meta;
724
725         /*
726          * zram manages data in physical block size units. Because logical block
727          * size isn't identical with physical block size on some arch, we
728          * could get a discard request pointing to a specific offset within a
729          * certain physical block.  Although we can handle this request by
730          * reading that physiclal block and decompressing and partially zeroing
731          * and re-compressing and then re-storing it, this isn't reasonable
732          * because our intent with a discard request is to save memory.  So
733          * skipping this logical block is appropriate here.
734          */
735         if (offset) {
736                 if (n <= (PAGE_SIZE - offset))
737                         return;
738
739                 n -= (PAGE_SIZE - offset);
740                 index++;
741         }
742
743         while (n >= PAGE_SIZE) {
744                 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
745                 zram_free_page(zram, index);
746                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
747                 atomic64_inc(&zram->stats.notify_free);
748                 index++;
749                 n -= PAGE_SIZE;
750         }
751 }
752
753 static void zram_reset_device(struct zram *zram)
754 {
755         struct zram_meta *meta;
756         struct zcomp *comp;
757         u64 disksize;
758
759         down_write(&zram->init_lock);
760
761         zram->limit_pages = 0;
762
763         if (!init_done(zram)) {
764                 up_write(&zram->init_lock);
765                 return;
766         }
767
768         meta = zram->meta;
769         comp = zram->comp;
770         disksize = zram->disksize;
771         /*
772          * Refcount will go down to 0 eventually and r/w handler
773          * cannot handle further I/O so it will bail out by
774          * check zram_meta_get.
775          */
776         zram_meta_put(zram);
777         /*
778          * We want to free zram_meta in process context to avoid
779          * deadlock between reclaim path and any other locks.
780          */
781         wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
782
783         /* Reset stats */
784         memset(&zram->stats, 0, sizeof(zram->stats));
785         zram->disksize = 0;
786         zram->max_comp_streams = 1;
787         set_capacity(zram->disk, 0);
788
789         up_write(&zram->init_lock);
790         /* I/O operation under all of CPU are done so let's free */
791         zram_meta_free(meta, disksize);
792         zcomp_destroy(comp);
793 }
794
795 static ssize_t disksize_store(struct device *dev,
796                 struct device_attribute *attr, const char *buf, size_t len)
797 {
798         u64 disksize;
799         struct zcomp *comp;
800         struct zram_meta *meta;
801         struct zram *zram = dev_to_zram(dev);
802         int err;
803
804         disksize = memparse(buf, NULL);
805         if (!disksize)
806                 return -EINVAL;
807
808         disksize = PAGE_ALIGN(disksize);
809         meta = zram_meta_alloc(zram->disk->first_minor, disksize);
810         if (!meta)
811                 return -ENOMEM;
812
813         comp = zcomp_create(zram->compressor, zram->max_comp_streams);
814         if (IS_ERR(comp)) {
815                 pr_info("Cannot initialise %s compressing backend\n",
816                                 zram->compressor);
817                 err = PTR_ERR(comp);
818                 goto out_free_meta;
819         }
820
821         down_write(&zram->init_lock);
822         if (init_done(zram)) {
823                 pr_info("Cannot change disksize for initialized device\n");
824                 err = -EBUSY;
825                 goto out_destroy_comp;
826         }
827
828         init_waitqueue_head(&zram->io_done);
829         atomic_set(&zram->refcount, 1);
830         zram->meta = meta;
831         zram->comp = comp;
832         zram->disksize = disksize;
833         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
834         up_write(&zram->init_lock);
835
836         /*
837          * Revalidate disk out of the init_lock to avoid lockdep splat.
838          * It's okay because disk's capacity is protected by init_lock
839          * so that revalidate_disk always sees up-to-date capacity.
840          */
841         revalidate_disk(zram->disk);
842
843         return len;
844
845 out_destroy_comp:
846         up_write(&zram->init_lock);
847         zcomp_destroy(comp);
848 out_free_meta:
849         zram_meta_free(meta, disksize);
850         return err;
851 }
852
853 static ssize_t reset_store(struct device *dev,
854                 struct device_attribute *attr, const char *buf, size_t len)
855 {
856         int ret;
857         unsigned short do_reset;
858         struct zram *zram;
859         struct block_device *bdev;
860
861         zram = dev_to_zram(dev);
862         bdev = bdget_disk(zram->disk, 0);
863
864         if (!bdev)
865                 return -ENOMEM;
866
867         mutex_lock(&bdev->bd_mutex);
868         /* Do not reset an active device! */
869         if (bdev->bd_openers) {
870                 ret = -EBUSY;
871                 goto out;
872         }
873
874         ret = kstrtou16(buf, 10, &do_reset);
875         if (ret)
876                 goto out;
877
878         if (!do_reset) {
879                 ret = -EINVAL;
880                 goto out;
881         }
882
883         /* Make sure all pending I/O is finished */
884         fsync_bdev(bdev);
885         zram_reset_device(zram);
886
887         mutex_unlock(&bdev->bd_mutex);
888         revalidate_disk(zram->disk);
889         bdput(bdev);
890
891         return len;
892
893 out:
894         mutex_unlock(&bdev->bd_mutex);
895         bdput(bdev);
896         return ret;
897 }
898
899 static void __zram_make_request(struct zram *zram, struct bio *bio)
900 {
901         int offset, rw;
902         u32 index;
903         struct bio_vec bvec;
904         struct bvec_iter iter;
905
906         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
907         offset = (bio->bi_iter.bi_sector &
908                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
909
910         if (unlikely(bio->bi_rw & REQ_DISCARD)) {
911                 zram_bio_discard(zram, index, offset, bio);
912                 bio_endio(bio, 0);
913                 return;
914         }
915
916         rw = bio_data_dir(bio);
917         bio_for_each_segment(bvec, bio, iter) {
918                 int max_transfer_size = PAGE_SIZE - offset;
919
920                 if (bvec.bv_len > max_transfer_size) {
921                         /*
922                          * zram_bvec_rw() can only make operation on a single
923                          * zram page. Split the bio vector.
924                          */
925                         struct bio_vec bv;
926
927                         bv.bv_page = bvec.bv_page;
928                         bv.bv_len = max_transfer_size;
929                         bv.bv_offset = bvec.bv_offset;
930
931                         if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
932                                 goto out;
933
934                         bv.bv_len = bvec.bv_len - max_transfer_size;
935                         bv.bv_offset += max_transfer_size;
936                         if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
937                                 goto out;
938                 } else
939                         if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
940                                 goto out;
941
942                 update_position(&index, &offset, &bvec);
943         }
944
945         set_bit(BIO_UPTODATE, &bio->bi_flags);
946         bio_endio(bio, 0);
947         return;
948
949 out:
950         bio_io_error(bio);
951 }
952
953 /*
954  * Handler function for all zram I/O requests.
955  */
956 static void zram_make_request(struct request_queue *queue, struct bio *bio)
957 {
958         struct zram *zram = queue->queuedata;
959
960         if (unlikely(!zram_meta_get(zram)))
961                 goto error;
962
963         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
964                                         bio->bi_iter.bi_size)) {
965                 atomic64_inc(&zram->stats.invalid_io);
966                 goto put_zram;
967         }
968
969         __zram_make_request(zram, bio);
970         zram_meta_put(zram);
971         return;
972 put_zram:
973         zram_meta_put(zram);
974 error:
975         bio_io_error(bio);
976 }
977
978 static void zram_slot_free_notify(struct block_device *bdev,
979                                 unsigned long index)
980 {
981         struct zram *zram;
982         struct zram_meta *meta;
983
984         zram = bdev->bd_disk->private_data;
985         meta = zram->meta;
986
987         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
988         zram_free_page(zram, index);
989         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
990         atomic64_inc(&zram->stats.notify_free);
991 }
992
993 static int zram_rw_page(struct block_device *bdev, sector_t sector,
994                        struct page *page, int rw)
995 {
996         int offset, err = -EIO;
997         u32 index;
998         struct zram *zram;
999         struct bio_vec bv;
1000
1001         zram = bdev->bd_disk->private_data;
1002         if (unlikely(!zram_meta_get(zram)))
1003                 goto out;
1004
1005         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1006                 atomic64_inc(&zram->stats.invalid_io);
1007                 err = -EINVAL;
1008                 goto put_zram;
1009         }
1010
1011         index = sector >> SECTORS_PER_PAGE_SHIFT;
1012         offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
1013
1014         bv.bv_page = page;
1015         bv.bv_len = PAGE_SIZE;
1016         bv.bv_offset = 0;
1017
1018         err = zram_bvec_rw(zram, &bv, index, offset, rw);
1019 put_zram:
1020         zram_meta_put(zram);
1021 out:
1022         /*
1023          * If I/O fails, just return error(ie, non-zero) without
1024          * calling page_endio.
1025          * It causes resubmit the I/O with bio request by upper functions
1026          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1027          * bio->bi_end_io does things to handle the error
1028          * (e.g., SetPageError, set_page_dirty and extra works).
1029          */
1030         if (err == 0)
1031                 page_endio(page, rw, 0);
1032         return err;
1033 }
1034
1035 static const struct block_device_operations zram_devops = {
1036         .swap_slot_free_notify = zram_slot_free_notify,
1037         .rw_page = zram_rw_page,
1038         .owner = THIS_MODULE
1039 };
1040
1041 static DEVICE_ATTR_RW(disksize);
1042 static DEVICE_ATTR_RO(initstate);
1043 static DEVICE_ATTR_WO(reset);
1044 static DEVICE_ATTR_RO(orig_data_size);
1045 static DEVICE_ATTR_RO(mem_used_total);
1046 static DEVICE_ATTR_RW(mem_limit);
1047 static DEVICE_ATTR_RW(mem_used_max);
1048 static DEVICE_ATTR_RW(max_comp_streams);
1049 static DEVICE_ATTR_RW(comp_algorithm);
1050
1051 static ssize_t io_stat_show(struct device *dev,
1052                 struct device_attribute *attr, char *buf)
1053 {
1054         struct zram *zram = dev_to_zram(dev);
1055         ssize_t ret;
1056
1057         down_read(&zram->init_lock);
1058         ret = scnprintf(buf, PAGE_SIZE,
1059                         "%8llu %8llu %8llu %8llu\n",
1060                         (u64)atomic64_read(&zram->stats.failed_reads),
1061                         (u64)atomic64_read(&zram->stats.failed_writes),
1062                         (u64)atomic64_read(&zram->stats.invalid_io),
1063                         (u64)atomic64_read(&zram->stats.notify_free));
1064         up_read(&zram->init_lock);
1065
1066         return ret;
1067 }
1068
1069 static ssize_t mm_stat_show(struct device *dev,
1070                 struct device_attribute *attr, char *buf)
1071 {
1072         struct zram *zram = dev_to_zram(dev);
1073         u64 orig_size, mem_used = 0;
1074         long max_used;
1075         ssize_t ret;
1076
1077         down_read(&zram->init_lock);
1078         if (init_done(zram))
1079                 mem_used = zs_get_total_pages(zram->meta->mem_pool);
1080
1081         orig_size = atomic64_read(&zram->stats.pages_stored);
1082         max_used = atomic_long_read(&zram->stats.max_used_pages);
1083
1084         ret = scnprintf(buf, PAGE_SIZE,
1085                         "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
1086                         orig_size << PAGE_SHIFT,
1087                         (u64)atomic64_read(&zram->stats.compr_data_size),
1088                         mem_used << PAGE_SHIFT,
1089                         zram->limit_pages << PAGE_SHIFT,
1090                         max_used << PAGE_SHIFT,
1091                         (u64)atomic64_read(&zram->stats.zero_pages),
1092                         (u64)atomic64_read(&zram->stats.num_migrated));
1093         up_read(&zram->init_lock);
1094
1095         return ret;
1096 }
1097
1098 static DEVICE_ATTR_RO(io_stat);
1099 static DEVICE_ATTR_RO(mm_stat);
1100 ZRAM_ATTR_RO(num_reads);
1101 ZRAM_ATTR_RO(num_writes);
1102 ZRAM_ATTR_RO(failed_reads);
1103 ZRAM_ATTR_RO(failed_writes);
1104 ZRAM_ATTR_RO(invalid_io);
1105 ZRAM_ATTR_RO(notify_free);
1106 ZRAM_ATTR_RO(zero_pages);
1107 ZRAM_ATTR_RO(compr_data_size);
1108
1109 static struct attribute *zram_disk_attrs[] = {
1110         &dev_attr_disksize.attr,
1111         &dev_attr_initstate.attr,
1112         &dev_attr_reset.attr,
1113         &dev_attr_num_reads.attr,
1114         &dev_attr_num_writes.attr,
1115         &dev_attr_failed_reads.attr,
1116         &dev_attr_failed_writes.attr,
1117         &dev_attr_invalid_io.attr,
1118         &dev_attr_notify_free.attr,
1119         &dev_attr_zero_pages.attr,
1120         &dev_attr_orig_data_size.attr,
1121         &dev_attr_compr_data_size.attr,
1122         &dev_attr_mem_used_total.attr,
1123         &dev_attr_mem_limit.attr,
1124         &dev_attr_mem_used_max.attr,
1125         &dev_attr_max_comp_streams.attr,
1126         &dev_attr_comp_algorithm.attr,
1127         &dev_attr_io_stat.attr,
1128         &dev_attr_mm_stat.attr,
1129         NULL,
1130 };
1131
1132 static struct attribute_group zram_disk_attr_group = {
1133         .attrs = zram_disk_attrs,
1134 };
1135
1136 static int create_device(struct zram *zram, int device_id)
1137 {
1138         struct request_queue *queue;
1139         int ret = -ENOMEM;
1140
1141         init_rwsem(&zram->init_lock);
1142
1143         queue = blk_alloc_queue(GFP_KERNEL);
1144         if (!queue) {
1145                 pr_err("Error allocating disk queue for device %d\n",
1146                         device_id);
1147                 goto out;
1148         }
1149
1150         blk_queue_make_request(queue, zram_make_request);
1151
1152          /* gendisk structure */
1153         zram->disk = alloc_disk(1);
1154         if (!zram->disk) {
1155                 pr_warn("Error allocating disk structure for device %d\n",
1156                         device_id);
1157                 ret = -ENOMEM;
1158                 goto out_free_queue;
1159         }
1160
1161         zram->disk->major = zram_major;
1162         zram->disk->first_minor = device_id;
1163         zram->disk->fops = &zram_devops;
1164         zram->disk->queue = queue;
1165         zram->disk->queue->queuedata = zram;
1166         zram->disk->private_data = zram;
1167         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1168
1169         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1170         set_capacity(zram->disk, 0);
1171         /* zram devices sort of resembles non-rotational disks */
1172         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1173         queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1174         /*
1175          * To ensure that we always get PAGE_SIZE aligned
1176          * and n*PAGE_SIZED sized I/O requests.
1177          */
1178         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1179         blk_queue_logical_block_size(zram->disk->queue,
1180                                         ZRAM_LOGICAL_BLOCK_SIZE);
1181         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1182         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1183         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1184         zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
1185         /*
1186          * zram_bio_discard() will clear all logical blocks if logical block
1187          * size is identical with physical block size(PAGE_SIZE). But if it is
1188          * different, we will skip discarding some parts of logical blocks in
1189          * the part of the request range which isn't aligned to physical block
1190          * size.  So we can't ensure that all discarded logical blocks are
1191          * zeroed.
1192          */
1193         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1194                 zram->disk->queue->limits.discard_zeroes_data = 1;
1195         else
1196                 zram->disk->queue->limits.discard_zeroes_data = 0;
1197         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1198
1199         add_disk(zram->disk);
1200
1201         ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1202                                 &zram_disk_attr_group);
1203         if (ret < 0) {
1204                 pr_warn("Error creating sysfs group");
1205                 goto out_free_disk;
1206         }
1207         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1208         zram->meta = NULL;
1209         zram->max_comp_streams = 1;
1210         return 0;
1211
1212 out_free_disk:
1213         del_gendisk(zram->disk);
1214         put_disk(zram->disk);
1215 out_free_queue:
1216         blk_cleanup_queue(queue);
1217 out:
1218         return ret;
1219 }
1220
1221 static void destroy_devices(unsigned int nr)
1222 {
1223         struct zram *zram;
1224         unsigned int i;
1225
1226         for (i = 0; i < nr; i++) {
1227                 zram = &zram_devices[i];
1228                 /*
1229                  * Remove sysfs first, so no one will perform a disksize
1230                  * store while we destroy the devices
1231                  */
1232                 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1233                                 &zram_disk_attr_group);
1234
1235                 zram_reset_device(zram);
1236
1237                 blk_cleanup_queue(zram->disk->queue);
1238                 del_gendisk(zram->disk);
1239                 put_disk(zram->disk);
1240         }
1241
1242         kfree(zram_devices);
1243         unregister_blkdev(zram_major, "zram");
1244         pr_info("Destroyed %u device(s)\n", nr);
1245 }
1246
1247 static int __init zram_init(void)
1248 {
1249         int ret, dev_id;
1250
1251         if (num_devices > max_num_devices) {
1252                 pr_warn("Invalid value for num_devices: %u\n",
1253                                 num_devices);
1254                 return -EINVAL;
1255         }
1256
1257         zram_major = register_blkdev(0, "zram");
1258         if (zram_major <= 0) {
1259                 pr_warn("Unable to get major number\n");
1260                 return -EBUSY;
1261         }
1262
1263         /* Allocate the device array and initialize each one */
1264         zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
1265         if (!zram_devices) {
1266                 unregister_blkdev(zram_major, "zram");
1267                 return -ENOMEM;
1268         }
1269
1270         for (dev_id = 0; dev_id < num_devices; dev_id++) {
1271                 ret = create_device(&zram_devices[dev_id], dev_id);
1272                 if (ret)
1273                         goto out_error;
1274         }
1275
1276         pr_info("Created %u device(s)\n", num_devices);
1277         return 0;
1278
1279 out_error:
1280         destroy_devices(dev_id);
1281         return ret;
1282 }
1283
1284 static void __exit zram_exit(void)
1285 {
1286         destroy_devices(num_devices);
1287 }
1288
1289 module_init(zram_init);
1290 module_exit(zram_exit);
1291
1292 module_param(num_devices, uint, 0);
1293 MODULE_PARM_DESC(num_devices, "Number of zram devices");
1294
1295 MODULE_LICENSE("Dual BSD/GPL");
1296 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1297 MODULE_DESCRIPTION("Compressed RAM Block Device");