Merge tag 'mm-stable-2023-04-27-15-30' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / drivers / block / zram / zram_drv.c
index aa490da3cef233409e2b85db33a7f8c88d3cba29..a84c4268257a996f6750ad185577050bc9ccf9dc 100644 (file)
@@ -54,9 +54,8 @@ static size_t huge_class_size;
 static const struct block_device_operations zram_devops;
 
 static void zram_free_page(struct zram *zram, size_t index);
-static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
-                               u32 index, int offset, struct bio *bio);
-
+static int zram_read_page(struct zram *zram, struct page *page, u32 index,
+                         struct bio *parent);
 
 static int zram_slot_trylock(struct zram *zram, u32 index)
 {
@@ -148,6 +147,7 @@ static inline bool is_partial_io(struct bio_vec *bvec)
 {
        return bvec->bv_len != PAGE_SIZE;
 }
+#define ZRAM_PARTIAL_IO                1
 #else
 static inline bool is_partial_io(struct bio_vec *bvec)
 {
@@ -174,36 +174,6 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index)
        return prio & ZRAM_COMP_PRIORITY_MASK;
 }
 
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline bool valid_io_request(struct zram *zram,
-               sector_t start, unsigned int size)
-{
-       u64 end, bound;
-
-       /* unaligned request */
-       if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
-               return false;
-       if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
-               return false;
-
-       end = start + (size >> SECTOR_SHIFT);
-       bound = zram->disksize >> SECTOR_SHIFT;
-       /* out of range */
-       if (unlikely(start >= bound || end > bound || start > end))
-               return false;
-
-       /* I/O request is valid */
-       return true;
-}
-
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
-       *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
-       *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
 static inline void update_used_max(struct zram *zram,
                                        const unsigned long pages)
 {
@@ -606,41 +576,16 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx)
        atomic64_dec(&zram->stats.bd_count);
 }
 
-static void zram_page_end_io(struct bio *bio)
-{
-       struct page *page = bio_first_page_all(bio);
-
-       page_endio(page, op_is_write(bio_op(bio)),
-                       blk_status_to_errno(bio->bi_status));
-       bio_put(bio);
-}
-
-/*
- * Returns 1 if the submission is successful.
- */
-static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
+static void read_from_bdev_async(struct zram *zram, struct page *page,
                        unsigned long entry, struct bio *parent)
 {
        struct bio *bio;
 
-       bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ,
-                       GFP_NOIO);
-       if (!bio)
-               return -ENOMEM;
-
+       bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO);
        bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
-       if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
-               bio_put(bio);
-               return -EIO;
-       }
-
-       if (!parent)
-               bio->bi_end_io = zram_page_end_io;
-       else
-               bio_chain(bio, parent);
-
+       __bio_add_page(bio, page, PAGE_SIZE, 0);
+       bio_chain(bio, parent);
        submit_bio(bio);
-       return 1;
 }
 
 #define PAGE_WB_SIG "page_index="
@@ -701,10 +646,6 @@ static ssize_t writeback_store(struct device *dev,
        }
 
        for (; nr_pages != 0; index++, nr_pages--) {
-               struct bio_vec bvec;
-
-               bvec_set_page(&bvec, page, PAGE_SIZE, 0);
-
                spin_lock(&zram->wb_limit_lock);
                if (zram->wb_limit_enable && !zram->bd_wb_limit) {
                        spin_unlock(&zram->wb_limit_lock);
@@ -748,7 +689,7 @@ static ssize_t writeback_store(struct device *dev,
                /* Need for hugepage writeback racing */
                zram_set_flag(zram, index, ZRAM_IDLE);
                zram_slot_unlock(zram, index);
-               if (zram_bvec_read(zram, &bvec, index, 0, NULL)) {
+               if (zram_read_page(zram, page, index, NULL)) {
                        zram_slot_lock(zram, index);
                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
                        zram_clear_flag(zram, index, ZRAM_IDLE);
@@ -759,9 +700,8 @@ static ssize_t writeback_store(struct device *dev,
                bio_init(&bio, zram->bdev, &bio_vec, 1,
                         REQ_OP_WRITE | REQ_SYNC);
                bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9);
+               bio_add_page(&bio, page, PAGE_SIZE, 0);
 
-               bio_add_page(&bio, bvec.bv_page, bvec.bv_len,
-                               bvec.bv_offset);
                /*
                 * XXX: A single page IO would be inefficient for write
                 * but it would be not bad as starter.
@@ -829,19 +769,20 @@ struct zram_work {
        struct work_struct work;
        struct zram *zram;
        unsigned long entry;
-       struct bio *bio;
-       struct bio_vec bvec;
+       struct page *page;
+       int error;
 };
 
-#if PAGE_SIZE != 4096
 static void zram_sync_read(struct work_struct *work)
 {
        struct zram_work *zw = container_of(work, struct zram_work, work);
-       struct zram *zram = zw->zram;
-       unsigned long entry = zw->entry;
-       struct bio *bio = zw->bio;
+       struct bio_vec bv;
+       struct bio bio;
 
-       read_from_bdev_async(zram, &zw->bvec, entry, bio);
+       bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
+       bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
+       __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
+       zw->error = submit_bio_wait(&bio);
 }
 
 /*
@@ -849,45 +790,39 @@ static void zram_sync_read(struct work_struct *work)
  * chained IO with parent IO in same context, it's a deadlock. To avoid that,
  * use a worker thread context.
  */
-static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
-                               unsigned long entry, struct bio *bio)
+static int read_from_bdev_sync(struct zram *zram, struct page *page,
+                               unsigned long entry)
 {
        struct zram_work work;
 
-       work.bvec = *bvec;
+       work.page = page;
        work.zram = zram;
        work.entry = entry;
-       work.bio = bio;
 
        INIT_WORK_ONSTACK(&work.work, zram_sync_read);
        queue_work(system_unbound_wq, &work.work);
        flush_work(&work.work);
        destroy_work_on_stack(&work.work);
 
-       return 1;
-}
-#else
-static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
-                               unsigned long entry, struct bio *bio)
-{
-       WARN_ON(1);
-       return -EIO;
+       return work.error;
 }
-#endif
 
-static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
-                       unsigned long entry, struct bio *parent, bool sync)
+static int read_from_bdev(struct zram *zram, struct page *page,
+                       unsigned long entry, struct bio *parent)
 {
        atomic64_inc(&zram->stats.bd_reads);
-       if (sync)
-               return read_from_bdev_sync(zram, bvec, entry, parent);
-       else
-               return read_from_bdev_async(zram, bvec, entry, parent);
+       if (!parent) {
+               if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
+                       return -EIO;
+               return read_from_bdev_sync(zram, page, entry);
+       }
+       read_from_bdev_async(zram, page, entry, parent);
+       return 0;
 }
 #else
 static inline void reset_bdev(struct zram *zram) {};
-static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
-                       unsigned long entry, struct bio *parent, bool sync)
+static int read_from_bdev(struct zram *zram, struct page *page,
+                       unsigned long entry, struct bio *parent)
 {
        return -EIO;
 }
@@ -1190,10 +1125,9 @@ static ssize_t io_stat_show(struct device *dev,
 
        down_read(&zram->init_lock);
        ret = scnprintf(buf, PAGE_SIZE,
-                       "%8llu %8llu %8llu %8llu\n",
+                       "%8llu %8llu 0 %8llu\n",
                        (u64)atomic64_read(&zram->stats.failed_reads),
                        (u64)atomic64_read(&zram->stats.failed_writes),
-                       (u64)atomic64_read(&zram->stats.invalid_io),
                        (u64)atomic64_read(&zram->stats.notify_free));
        up_read(&zram->init_lock);
 
@@ -1371,20 +1305,6 @@ out:
                ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
 }
 
-/*
- * Reads a page from the writeback devices. Corresponding ZRAM slot
- * should be unlocked.
- */
-static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
-                                   u32 index, struct bio *bio, bool partial_io)
-{
-       struct bio_vec bvec;
-
-       bvec_set_page(&bvec, page, PAGE_SIZE, 0);
-       return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
-                             partial_io);
-}
-
 /*
  * Reads (decompresses if needed) a page from zspool (zsmalloc).
  * Corresponding ZRAM slot should be locked.
@@ -1434,8 +1354,8 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
        return ret;
 }
 
-static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
-                           struct bio *bio, bool partial_io)
+static int zram_read_page(struct zram *zram, struct page *page, u32 index,
+                         struct bio *parent)
 {
        int ret;
 
@@ -1445,11 +1365,14 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
                ret = zram_read_from_zspool(zram, page, index);
                zram_slot_unlock(zram, index);
        } else {
-               /* Slot should be unlocked before the function call */
+               /*
+                * The slot should be unlocked before reading from the backing
+                * device.
+                */
                zram_slot_unlock(zram, index);
 
-               ret = zram_bvec_read_from_bdev(zram, page, index, bio,
-                                              partial_io);
+               ret = read_from_bdev(zram, page, zram_get_element(zram, index),
+                                    parent);
        }
 
        /* Should NEVER happen. Return bio error if it does. */
@@ -1459,39 +1382,34 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
        return ret;
 }
 
-static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
-                         u32 index, int offset, struct bio *bio)
+/*
+ * Use a temporary buffer to decompress the page, as the decompressor
+ * always expects a full page for the output.
+ */
+static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
+                                 u32 index, int offset)
 {
+       struct page *page = alloc_page(GFP_NOIO);
        int ret;
-       struct page *page;
 
-       page = bvec->bv_page;
-       if (is_partial_io(bvec)) {
-               /* Use a temporary buffer to decompress the page */
-               page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
-               if (!page)
-                       return -ENOMEM;
-       }
-
-       ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
-       if (unlikely(ret))
-               goto out;
-
-       if (is_partial_io(bvec)) {
-               void *src = kmap_atomic(page);
+       if (!page)
+               return -ENOMEM;
+       ret = zram_read_page(zram, page, index, NULL);
+       if (likely(!ret))
+               memcpy_to_bvec(bvec, page_address(page) + offset);
+       __free_page(page);
+       return ret;
+}
 
-               memcpy_to_bvec(bvec, src + offset);
-               kunmap_atomic(src);
-       }
-out:
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+                         u32 index, int offset, struct bio *bio)
+{
        if (is_partial_io(bvec))
-               __free_page(page);
-
-       return ret;
+               return zram_bvec_read_partial(zram, bvec, index, offset);
+       return zram_read_page(zram, bvec->bv_page, index, bio);
 }
 
-static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
-                               u32 index, struct bio *bio)
+static int zram_write_page(struct zram *zram, struct page *page, u32 index)
 {
        int ret = 0;
        unsigned long alloced_pages;
@@ -1499,7 +1417,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
        unsigned int comp_len = 0;
        void *src, *dst, *mem;
        struct zcomp_strm *zstrm;
-       struct page *page = bvec->bv_page;
        unsigned long element = 0;
        enum zram_pageflags flags = 0;
 
@@ -1617,40 +1534,33 @@ out:
        return ret;
 }
 
-static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
-                               u32 index, int offset, struct bio *bio)
+/*
+ * This is a partial IO. Read the full page before writing the changes.
+ */
+static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec,
+                                  u32 index, int offset, struct bio *bio)
 {
+       struct page *page = alloc_page(GFP_NOIO);
        int ret;
-       struct page *page = NULL;
-       struct bio_vec vec;
 
-       vec = *bvec;
-       if (is_partial_io(bvec)) {
-               void *dst;
-               /*
-                * This is a partial IO. We need to read the full page
-                * before to write the changes.
-                */
-               page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
-               if (!page)
-                       return -ENOMEM;
-
-               ret = __zram_bvec_read(zram, page, index, bio, true);
-               if (ret)
-                       goto out;
-
-               dst = kmap_atomic(page);
-               memcpy_from_bvec(dst + offset, bvec);
-               kunmap_atomic(dst);
+       if (!page)
+               return -ENOMEM;
 
-               bvec_set_page(&vec, page, PAGE_SIZE, 0);
+       ret = zram_read_page(zram, page, index, bio);
+       if (!ret) {
+               memcpy_from_bvec(page_address(page) + offset, bvec);
+               ret = zram_write_page(zram, page, index);
        }
+       __free_page(page);
+       return ret;
+}
 
-       ret = __zram_bvec_write(zram, &vec, index, bio);
-out:
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+                          u32 index, int offset, struct bio *bio)
+{
        if (is_partial_io(bvec))
-               __free_page(page);
-       return ret;
+               return zram_bvec_write_partial(zram, bvec, index, offset, bio);
+       return zram_write_page(zram, bvec->bv_page, index);
 }
 
 #ifdef CONFIG_ZRAM_MULTI_COMP
@@ -1761,7 +1671,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 
        /*
         * No direct reclaim (slow path) for handle allocation and no
-        * re-compression attempt (unlike in __zram_bvec_write()) since
+        * re-compression attempt (unlike in zram_write_bvec()) since
         * we already have stored that object in zsmalloc. If we cannot
         * alloc memory for recompressed object then we bail out and
         * simply keep the old (existing) object in zsmalloc.
@@ -1921,15 +1831,12 @@ release_init_lock:
 }
 #endif
 
-/*
- * zram_bio_discard - handler on discard request
- * @index: physical block index in PAGE_SIZE units
- * @offset: byte offset within physical block
- */
-static void zram_bio_discard(struct zram *zram, u32 index,
-                            int offset, struct bio *bio)
+static void zram_bio_discard(struct zram *zram, struct bio *bio)
 {
        size_t n = bio->bi_iter.bi_size;
+       u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+       u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
+                       SECTOR_SHIFT;
 
        /*
         * zram manages data in physical block size units. Because logical block
@@ -1957,80 +1864,58 @@ static void zram_bio_discard(struct zram *zram, u32 index,
                index++;
                n -= PAGE_SIZE;
        }
+
+       bio_endio(bio);
 }
 
-/*
- * Returns errno if it has some problem. Otherwise return 0 or 1.
- * Returns 0 if IO request was done synchronously
- * Returns 1 if IO request was successfully submitted.
- */
-static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-                       int offset, enum req_op op, struct bio *bio)
+static void zram_bio_read(struct zram *zram, struct bio *bio)
 {
-       int ret;
-
-       if (!op_is_write(op)) {
-               ret = zram_bvec_read(zram, bvec, index, offset, bio);
-               flush_dcache_page(bvec->bv_page);
-       } else {
-               ret = zram_bvec_write(zram, bvec, index, offset, bio);
-       }
+       struct bvec_iter iter;
+       struct bio_vec bv;
+       unsigned long start_time;
 
-       zram_slot_lock(zram, index);
-       zram_accessed(zram, index);
-       zram_slot_unlock(zram, index);
+       start_time = bio_start_io_acct(bio);
+       bio_for_each_segment(bv, bio, iter) {
+               u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+               u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
+                               SECTOR_SHIFT;
 
-       if (unlikely(ret < 0)) {
-               if (!op_is_write(op))
+               if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
                        atomic64_inc(&zram->stats.failed_reads);
-               else
-                       atomic64_inc(&zram->stats.failed_writes);
-       }
+                       bio->bi_status = BLK_STS_IOERR;
+                       break;
+               }
+               flush_dcache_page(bv.bv_page);
 
-       return ret;
+               zram_slot_lock(zram, index);
+               zram_accessed(zram, index);
+               zram_slot_unlock(zram, index);
+       }
+       bio_end_io_acct(bio, start_time);
+       bio_endio(bio);
 }
 
-static void __zram_make_request(struct zram *zram, struct bio *bio)
+static void zram_bio_write(struct zram *zram, struct bio *bio)
 {
-       int offset;
-       u32 index;
-       struct bio_vec bvec;
        struct bvec_iter iter;
+       struct bio_vec bv;
        unsigned long start_time;
 
-       index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
-       offset = (bio->bi_iter.bi_sector &
-                 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
-
-       switch (bio_op(bio)) {
-       case REQ_OP_DISCARD:
-       case REQ_OP_WRITE_ZEROES:
-               zram_bio_discard(zram, index, offset, bio);
-               bio_endio(bio);
-               return;
-       default:
-               break;
-       }
-
        start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bvec, bio, iter) {
-               struct bio_vec bv = bvec;
-               unsigned int unwritten = bvec.bv_len;
-
-               do {
-                       bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
-                                                       unwritten);
-                       if (zram_bvec_rw(zram, &bv, index, offset,
-                                        bio_op(bio), bio) < 0) {
-                               bio->bi_status = BLK_STS_IOERR;
-                               break;
-                       }
+       bio_for_each_segment(bv, bio, iter) {
+               u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+               u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
+                               SECTOR_SHIFT;
 
-                       bv.bv_offset += bv.bv_len;
-                       unwritten -= bv.bv_len;
+               if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
+                       atomic64_inc(&zram->stats.failed_writes);
+                       bio->bi_status = BLK_STS_IOERR;
+                       break;
+               }
 
-                       update_position(&index, &offset, &bv);
-               } while (unwritten);
+               zram_slot_lock(zram, index);
+               zram_accessed(zram, index);
+               zram_slot_unlock(zram, index);
        }
        bio_end_io_acct(bio, start_time);
        bio_endio(bio);
@@ -2043,14 +1928,21 @@ static void zram_submit_bio(struct bio *bio)
 {
        struct zram *zram = bio->bi_bdev->bd_disk->private_data;
 
-       if (!valid_io_request(zram, bio->bi_iter.bi_sector,
-                                       bio->bi_iter.bi_size)) {
-               atomic64_inc(&zram->stats.invalid_io);
-               bio_io_error(bio);
-               return;
+       switch (bio_op(bio)) {
+       case REQ_OP_READ:
+               zram_bio_read(zram, bio);
+               break;
+       case REQ_OP_WRITE:
+               zram_bio_write(zram, bio);
+               break;
+       case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
+               zram_bio_discard(zram, bio);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               bio_endio(bio);
        }
-
-       __zram_make_request(zram, bio);
 }
 
 static void zram_slot_free_notify(struct block_device *bdev,
@@ -2424,8 +2316,8 @@ static int zram_remove(struct zram *zram)
  * creates a new un-initialized zram device and returns back this device's
  * device_id (or an error code if it fails to create a new device).
  */
-static ssize_t hot_add_show(struct class *class,
-                       struct class_attribute *attr,
+static ssize_t hot_add_show(const struct class *class,
+                       const struct class_attribute *attr,
                        char *buf)
 {
        int ret;
@@ -2438,11 +2330,12 @@ static ssize_t hot_add_show(struct class *class,
                return ret;
        return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
 }
+/* This attribute must be set to 0400, so CLASS_ATTR_RO() can not be used */
 static struct class_attribute class_attr_hot_add =
        __ATTR(hot_add, 0400, hot_add_show, NULL);
 
-static ssize_t hot_remove_store(struct class *class,
-                       struct class_attribute *attr,
+static ssize_t hot_remove_store(const struct class *class,
+                       const struct class_attribute *attr,
                        const char *buf,
                        size_t count)
 {
@@ -2481,7 +2374,6 @@ ATTRIBUTE_GROUPS(zram_control_class);
 
 static struct class zram_control_class = {
        .name           = "zram-control",
-       .owner          = THIS_MODULE,
        .class_groups   = zram_control_class_groups,
 };