[linux-block.git] / mm / page_io.c

// SPDX-License-Identifier: GPL-2.0
/*
 *  linux/mm/page_io.c
 *
 *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *
 *  Swap reorganised 29.12.95, 
 *  Asynchronous swapping added 30.12.95. Stephen Tweedie
 *  Removed race in async swapping. 14.4.1996. Bruno Haible
 *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
 *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
 */

#include <linux/mm.h>
#include <linux/kernel_stat.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/swapops.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/frontswap.h>
#include <linux/blkdev.h>
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
#include <asm/pgtable.h>

static struct bio *get_swap_bio(gfp_t gfp_flags,
				struct page *page, bio_end_io_t end_io)
{
	struct bio *bio;

	bio = bio_alloc(gfp_flags, 1);
	if (bio) {
		struct block_device *bdev;

		bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
		bio_set_dev(bio, bdev);
		bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
		bio->bi_end_io = end_io;

		bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
	}
	return bio;
}

void end_swap_bio_write(struct bio *bio)
{
	struct page *page = bio_first_page_all(bio);

	if (bio->bi_status) {
		SetPageError(page);
		/*
		 * We failed to write the page out to swap-space.
		 * Re-dirty the page in order to avoid it being reclaimed.
		 * Also print a dire warning that things will go BAD (tm)
		 * very quickly.
		 *
		 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
		 */
		set_page_dirty(page);
		pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
			 MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
			 (unsigned long long)bio->bi_iter.bi_sector);
		ClearPageReclaim(page);
	}
	end_page_writeback(page);
	bio_put(bio);
}

static void swap_slot_free_notify(struct page *page)
{
	struct swap_info_struct *sis;
	struct gendisk *disk;
	swp_entry_t entry;

	/*
	 * There is no guarantee that the page is in swap cache - the software
	 * suspend code (at least) uses end_swap_bio_read() against a non-
	 * swapcache page.  So we must check PG_swapcache before proceeding with
	 * this optimization.
	 */
	if (unlikely(!PageSwapCache(page)))
		return;

	sis = page_swap_info(page);
	if (!(sis->flags & SWP_BLKDEV))
		return;

	/*
	 * The swap subsystem performs lazy swap slot freeing,
	 * expecting that the page will be swapped out again.
	 * So we can avoid an unnecessary write if the page
	 * isn't redirtied.
	 * This is good for real swap storage because we can
	 * reduce unnecessary I/O and enhance wear-leveling
	 * if an SSD is used as the as swap device.
	 * But if in-memory swap device (eg zram) is used,
	 * this causes a duplicated copy between uncompressed
	 * data in VM-owned memory and compressed data in
	 * zram-owned memory.  So let's free zram-owned memory
	 * and make the VM-owned decompressed page *dirty*,
	 * so the page should be swapped out somewhere again if
	 * we again wish to reclaim it.
	 */
	disk = sis->bdev->bd_disk;
	entry.val = page_private(page);
	if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
		unsigned long offset;

		offset = swp_offset(entry);

		SetPageDirty(page);
		disk->fops->swap_slot_free_notify(sis->bdev,
				offset);
	}
}

static void end_swap_bio_read(struct bio *bio)
{
	struct page *page = bio_first_page_all(bio);
	struct task_struct *waiter = bio->bi_private;

	if (bio->bi_status) {
		SetPageError(page);
		ClearPageUptodate(page);
		pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
			 MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
			 (unsigned long long)bio->bi_iter.bi_sector);
		goto out;
	}

	SetPageUptodate(page);
	swap_slot_free_notify(page);
out:
	unlock_page(page);
	WRITE_ONCE(bio->bi_private, NULL);
	bio_put(bio);
	if (waiter) {
		blk_wake_io_task(waiter);
		put_task_struct(waiter);
	}
}

int generic_swapfile_activate(struct swap_info_struct *sis,
				struct file *swap_file,
				sector_t *span)
{
	struct address_space *mapping = swap_file->f_mapping;
	struct inode *inode = mapping->host;
	unsigned blocks_per_page;
	unsigned long page_no;
	unsigned blkbits;
	sector_t probe_block;
	sector_t last_block;
	sector_t lowest_block = -1;
	sector_t highest_block = 0;
	int nr_extents = 0;
	int ret;

	blkbits = inode->i_blkbits;
	blocks_per_page = PAGE_SIZE >> blkbits;

	/*
	 * Map all the blocks into the extent tree.  This code doesn't try
	 * to be very smart.
	 */
	probe_block = 0;
	page_no = 0;
	last_block = i_size_read(inode) >> blkbits;
	while ((probe_block + blocks_per_page) <= last_block &&
			page_no < sis->max) {
		unsigned block_in_page;
		sector_t first_block;

		cond_resched();

		first_block = bmap(inode, probe_block);
		if (first_block == 0)
			goto bad_bmap;

		/*
		 * It must be PAGE_SIZE aligned on-disk
		 */
		if (first_block & (blocks_per_page - 1)) {
			probe_block++;
			goto reprobe;
		}

		for (block_in_page = 1; block_in_page < blocks_per_page;
					block_in_page++) {
			sector_t block;

			block = bmap(inode, probe_block + block_in_page);
			if (block == 0)
				goto bad_bmap;
			if (block != first_block + block_in_page) {
				/* Discontiguity */
				probe_block++;
				goto reprobe;
			}
		}

		first_block >>= (PAGE_SHIFT - blkbits);
		if (page_no) {	/* exclude the header page */
			if (first_block < lowest_block)
				lowest_block = first_block;
			if (first_block > highest_block)
				highest_block = first_block;
		}

		/*
		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
		 */
		ret = add_swap_extent(sis, page_no, 1, first_block);
		if (ret < 0)
			goto out;
		nr_extents += ret;
		page_no++;
		probe_block += blocks_per_page;
reprobe:
		continue;
	}
	ret = nr_extents;
	*span = 1 + highest_block - lowest_block;
	if (page_no == 0)
		page_no = 1;	/* force Empty message */
	sis->max = page_no;
	sis->pages = page_no - 1;
	sis->highest_bit = page_no - 1;
out:
	return ret;
bad_bmap:
	pr_err("swapon: swapfile has holes\n");
	ret = -EINVAL;
	goto out;
}

/*
 * We may have stale swap cache pages in memory: notice
 * them here and get rid of the unnecessary final write.
 */
int swap_writepage(struct page *page, struct writeback_control *wbc)
{
	int ret = 0;

	if (try_to_free_swap(page)) {
		unlock_page(page);
		goto out;
	}
	if (frontswap_store(page) == 0) {
		set_page_writeback(page);
		unlock_page(page);
		end_page_writeback(page);
		goto out;
	}
	ret = __swap_writepage(page, wbc, end_swap_bio_write);
out:
	return ret;
}

static sector_t swap_page_sector(struct page *page)
{
	return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
}

static inline void count_swpout_vm_event(struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	if (unlikely(PageTransHuge(page)))
		count_vm_event(THP_SWPOUT);
#endif
	count_vm_events(PSWPOUT, hpage_nr_pages(page));
}

int __swap_writepage(struct page *page, struct writeback_control *wbc,
		bio_end_io_t end_write_func)
{
	struct bio *bio;
	int ret;
	struct swap_info_struct *sis = page_swap_info(page);

	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
	if (sis->flags & SWP_FS) {
		struct kiocb kiocb;
		struct file *swap_file = sis->swap_file;
		struct address_space *mapping = swap_file->f_mapping;
		struct bio_vec bv = {
			.bv_page = page,
			.bv_len  = PAGE_SIZE,
			.bv_offset = 0
		};
		struct iov_iter from;

		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
		init_sync_kiocb(&kiocb, swap_file);
		kiocb.ki_pos = page_file_offset(page);

		set_page_writeback(page);
		unlock_page(page);
		ret = mapping->a_ops->direct_IO(&kiocb, &from);
		if (ret == PAGE_SIZE) {
			count_vm_event(PSWPOUT);
			ret = 0;
		} else {
			/*
			 * In the case of swap-over-nfs, this can be a
			 * temporary failure if the system has limited
			 * memory for allocating transmit buffers.
			 * Mark the page dirty and avoid
			 * rotate_reclaimable_page but rate-limit the
			 * messages but do not flag PageError like
			 * the normal direct-to-bio case as it could
			 * be temporary.
			 */
			set_page_dirty(page);
			ClearPageReclaim(page);
			pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
					   page_file_offset(page));
		}
		end_page_writeback(page);
		return ret;
	}

	ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
	if (!ret) {
		count_swpout_vm_event(page);
		return 0;
	}

	ret = 0;
	bio = get_swap_bio(GFP_NOIO, page, end_write_func);
	if (bio == NULL) {
		set_page_dirty(page);
		unlock_page(page);
		ret = -ENOMEM;
		goto out;
	}
	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
	bio_associate_blkg_from_page(bio, page);
	count_swpout_vm_event(page);
	set_page_writeback(page);
	unlock_page(page);
	submit_bio(bio);
out:
	return ret;
}

int swap_readpage(struct page *page, bool synchronous)
{
	struct bio *bio;
	int ret = 0;
	struct swap_info_struct *sis = page_swap_info(page);
	blk_qc_t qc;
	struct gendisk *disk;
	unsigned long pflags;

	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
	VM_BUG_ON_PAGE(!PageLocked(page), page);
	VM_BUG_ON_PAGE(PageUptodate(page), page);

	/*
	 * Count submission time as memory stall. When the device is congested,
	 * or the submitting cgroup IO-throttled, submission can be a
	 * significant part of overall IO time.
	 */
	psi_memstall_enter(&pflags);

	if (frontswap_load(page) == 0) {
		SetPageUptodate(page);
		unlock_page(page);
		goto out;
	}

	if (sis->flags & SWP_FS) {
		struct file *swap_file = sis->swap_file;
		struct address_space *mapping = swap_file->f_mapping;

		ret = mapping->a_ops->readpage(swap_file, page);
		if (!ret)
			count_vm_event(PSWPIN);
		goto out;
	}

	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
	if (!ret) {
		if (trylock_page(page)) {
			swap_slot_free_notify(page);
			unlock_page(page);
		}

		count_vm_event(PSWPIN);
		goto out;
	}

	ret = 0;
	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
	if (bio == NULL) {
		unlock_page(page);
		ret = -ENOMEM;
		goto out;
	}
	disk = bio->bi_disk;
	/*
	 * Keep this task valid during swap readpage because the oom killer may
	 * attempt to access it in the page fault retry time check.
	 */
	bio_set_op_attrs(bio, REQ_OP_READ, 0);
	if (synchronous) {
		bio->bi_opf |= REQ_HIPRI;
		get_task_struct(current);
		bio->bi_private = current;
	}
	count_vm_event(PSWPIN);
	bio_get(bio);
	qc = submit_bio(bio);
	while (synchronous) {
		set_current_state(TASK_UNINTERRUPTIBLE);
		if (!READ_ONCE(bio->bi_private))
			break;

		if (!blk_poll(disk->queue, qc, true))
			io_schedule();
	}
	__set_current_state(TASK_RUNNING);
	bio_put(bio);

out:
	psi_memstall_leave(&pflags);
	return ret;
}

int swap_set_page_dirty(struct page *page)
{
	struct swap_info_struct *sis = page_swap_info(page);

	if (sis->flags & SWP_FS) {
		struct address_space *mapping = sis->swap_file->f_mapping;

		VM_BUG_ON_PAGE(!PageSwapCache(page), page);
		return mapping->a_ops->set_page_dirty(page);
	} else {
		return __set_page_dirty_no_writeback(page);
	}
}
Commit	Line	Data
b2441318	1	// SPDX-License-Identifier: GPL-2.0
1da177e4 LT	2	/*
	3	* linux/mm/page_io.c
	4	*
	5	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
	6	*
	7	* Swap reorganised 29.12.95,
	8	* Asynchronous swapping added 30.12.95. Stephen Tweedie
	9	* Removed race in async swapping. 14.4.1996. Bruno Haible
	10	* Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
	11	* Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
	12	*/
	13
	14	#include <linux/mm.h>
	15	#include <linux/kernel_stat.h>
5a0e3ad6	16	#include <linux/gfp.h>
1da177e4 LT	17	#include <linux/pagemap.h>
	18	#include <linux/swap.h>
	19	#include <linux/bio.h>
	20	#include <linux/swapops.h>
62c230bc	21	#include <linux/buffer_head.h>
1da177e4	22	#include <linux/writeback.h>
38b5faf4	23	#include <linux/frontswap.h>
b430e9d1	24	#include <linux/blkdev.h>
93779069	25	#include <linux/psi.h>
e2e40f2c	26	#include <linux/uio.h>
b0ba2d0f	27	#include <linux/sched/task.h>
1da177e4 LT	28	#include <asm/pgtable.h>
1da177e4 LT	29
f29ad6a9	30	static struct bio *get_swap_bio(gfp_t gfp_flags,
1da177e4 LT	31	struct page *page, bio_end_io_t end_io)
	32	{
	33	struct bio *bio;
	34
1a5f439c	35	bio = bio_alloc(gfp_flags, 1);
1da177e4	36	if (bio) {
74d46992 CH	37	struct block_device *bdev;
	38
	39	bio->bi_iter.bi_sector = map_swap_page(page, &bdev);
	40	bio_set_dev(bio, bdev);
4f024f37	41	bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
1da177e4	42	bio->bi_end_io = end_io;
6cf66b4c	43
1a5f439c	44	bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
1da177e4 LT	45	}
	46	return bio;
	47	}
	48
4246a0b6	49	void end_swap_bio_write(struct bio *bio)
1da177e4	50	{
263663cd	51	struct page *page = bio_first_page_all(bio);
1da177e4	52
4e4cbee9	53	if (bio->bi_status) {
1da177e4	54	SetPageError(page);
6ddab3b9 PZ	55	/*
	56	* We failed to write the page out to swap-space.
	57	* Re-dirty the page in order to avoid it being reclaimed.
	58	* Also print a dire warning that things will go BAD (tm)
	59	* very quickly.
	60	*
	61	* Also clear PG_reclaim to avoid rotate_reclaimable_page()
	62	*/
	63	set_page_dirty(page);
1170532b	64	pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
74d46992	65	MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
1170532b	66	(unsigned long long)bio->bi_iter.bi_sector);
6ddab3b9 PZ	67	ClearPageReclaim(page);
6ddab3b9 PZ	68	}
1da177e4 LT	69	end_page_writeback(page);
1da177e4 LT	70	bio_put(bio);
1da177e4 LT	71	}
1da177e4 LT	72
3f2b1a04 MK	73	static void swap_slot_free_notify(struct page *page)
	74	{
	75	struct swap_info_struct *sis;
	76	struct gendisk *disk;
5df373e9	77	swp_entry_t entry;
3f2b1a04 MK	78
	79	/*
	80	* There is no guarantee that the page is in swap cache - the software
	81	* suspend code (at least) uses end_swap_bio_read() against a non-
	82	* swapcache page. So we must check PG_swapcache before proceeding with
	83	* this optimization.
	84	*/
	85	if (unlikely(!PageSwapCache(page)))
	86	return;
	87
	88	sis = page_swap_info(page);
	89	if (!(sis->flags & SWP_BLKDEV))
	90	return;
	91
	92	/*
	93	* The swap subsystem performs lazy swap slot freeing,
	94	* expecting that the page will be swapped out again.
	95	* So we can avoid an unnecessary write if the page
	96	* isn't redirtied.
	97	* This is good for real swap storage because we can
	98	* reduce unnecessary I/O and enhance wear-leveling
	99	* if an SSD is used as the as swap device.
	100	* But if in-memory swap device (eg zram) is used,
	101	* this causes a duplicated copy between uncompressed
	102	* data in VM-owned memory and compressed data in
	103	* zram-owned memory. So let's free zram-owned memory
	104	* and make the VM-owned decompressed page dirty,
	105	* so the page should be swapped out somewhere again if
	106	* we again wish to reclaim it.
	107	*/
	108	disk = sis->bdev->bd_disk;
5df373e9 VM	109	entry.val = page_private(page);
5df373e9 VM	110	if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
3f2b1a04 MK	111	unsigned long offset;
3f2b1a04 MK	112
3f2b1a04 MK	113	offset = swp_offset(entry);
	114
	115	SetPageDirty(page);
	116	disk->fops->swap_slot_free_notify(sis->bdev,
	117	offset);
	118	}
	119	}
	120
4246a0b6	121	static void end_swap_bio_read(struct bio *bio)
1da177e4	122	{
263663cd	123	struct page *page = bio_first_page_all(bio);
23955622	124	struct task_struct *waiter = bio->bi_private;
1da177e4	125
4e4cbee9	126	if (bio->bi_status) {
1da177e4 LT	127	SetPageError(page);
1da177e4 LT	128	ClearPageUptodate(page);
1170532b	129	pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
74d46992	130	MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
1170532b	131	(unsigned long long)bio->bi_iter.bi_sector);
b430e9d1	132	goto out;
1da177e4	133	}
b430e9d1 MK	134
b430e9d1 MK	135	SetPageUptodate(page);
3f2b1a04	136	swap_slot_free_notify(page);
b430e9d1	137	out:
1da177e4	138	unlock_page(page);
23955622	139	WRITE_ONCE(bio->bi_private, NULL);
1da177e4	140	bio_put(bio);
87518530 ON	141	if (waiter) {
	142	blk_wake_io_task(waiter);
	143	put_task_struct(waiter);
	144	}
1da177e4 LT	145	}
1da177e4 LT	146
a509bc1a MG	147	int generic_swapfile_activate(struct swap_info_struct *sis,
	148	struct file *swap_file,
	149	sector_t *span)
	150	{
	151	struct address_space *mapping = swap_file->f_mapping;
	152	struct inode *inode = mapping->host;
	153	unsigned blocks_per_page;
	154	unsigned long page_no;
	155	unsigned blkbits;
	156	sector_t probe_block;
	157	sector_t last_block;
	158	sector_t lowest_block = -1;
	159	sector_t highest_block = 0;
	160	int nr_extents = 0;
	161	int ret;
	162
	163	blkbits = inode->i_blkbits;
	164	blocks_per_page = PAGE_SIZE >> blkbits;
	165
	166	/*
4efaceb1	167	* Map all the blocks into the extent tree. This code doesn't try
a509bc1a MG	168	* to be very smart.
	169	*/
	170	probe_block = 0;
	171	page_no = 0;
	172	last_block = i_size_read(inode) >> blkbits;
	173	while ((probe_block + blocks_per_page) <= last_block &&
	174	page_no < sis->max) {
	175	unsigned block_in_page;
	176	sector_t first_block;
	177
7e4411bf MP	178	cond_resched();
7e4411bf MP	179
a509bc1a MG	180	first_block = bmap(inode, probe_block);
	181	if (first_block == 0)
	182	goto bad_bmap;
	183
	184	/*
	185	* It must be PAGE_SIZE aligned on-disk
	186	*/
	187	if (first_block & (blocks_per_page - 1)) {
	188	probe_block++;
	189	goto reprobe;
	190	}
	191
	192	for (block_in_page = 1; block_in_page < blocks_per_page;
	193	block_in_page++) {
	194	sector_t block;
	195
	196	block = bmap(inode, probe_block + block_in_page);
	197	if (block == 0)
	198	goto bad_bmap;
	199	if (block != first_block + block_in_page) {
	200	/* Discontiguity */
	201	probe_block++;
	202	goto reprobe;
	203	}
	204	}
	205
	206	first_block >>= (PAGE_SHIFT - blkbits);
	207	if (page_no) { /* exclude the header page */
	208	if (first_block < lowest_block)
	209	lowest_block = first_block;
	210	if (first_block > highest_block)
	211	highest_block = first_block;
	212	}
	213
	214	/*
	215	* We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
	216	*/
	217	ret = add_swap_extent(sis, page_no, 1, first_block);
	218	if (ret < 0)
	219	goto out;
	220	nr_extents += ret;
	221	page_no++;
	222	probe_block += blocks_per_page;
	223	reprobe:
	224	continue;
	225	}
	226	ret = nr_extents;
	227	*span = 1 + highest_block - lowest_block;
	228	if (page_no == 0)
	229	page_no = 1; /* force Empty message */
	230	sis->max = page_no;
	231	sis->pages = page_no - 1;
	232	sis->highest_bit = page_no - 1;
	233	out:
	234	return ret;
	235	bad_bmap:
1170532b	236	pr_err("swapon: swapfile has holes\n");
a509bc1a MG	237	ret = -EINVAL;
	238	goto out;
	239	}
	240
1da177e4 LT	241	/*
	242	* We may have stale swap cache pages in memory: notice
	243	* them here and get rid of the unnecessary final write.
	244	*/
	245	int swap_writepage(struct page page, struct writeback_control wbc)
	246	{
2f772e6c	247	int ret = 0;
1da177e4	248
a2c43eed	249	if (try_to_free_swap(page)) {
1da177e4 LT	250	unlock_page(page);
	251	goto out;
	252	}
165c8aed	253	if (frontswap_store(page) == 0) {
38b5faf4 DM	254	set_page_writeback(page);
	255	unlock_page(page);
	256	end_page_writeback(page);
	257	goto out;
	258	}
1eec6702	259	ret = __swap_writepage(page, wbc, end_swap_bio_write);
2f772e6c SJ	260	out:
	261	return ret;
	262	}
	263
dd6bd0d9 MW	264	static sector_t swap_page_sector(struct page *page)
dd6bd0d9 MW	265	{
09cbfeaf	266	return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
dd6bd0d9 MW	267	}
dd6bd0d9 MW	268
225311a4 HY	269	static inline void count_swpout_vm_event(struct page *page)
	270	{
	271	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	272	if (unlikely(PageTransHuge(page)))
	273	count_vm_event(THP_SWPOUT);
	274	#endif
	275	count_vm_events(PSWPOUT, hpage_nr_pages(page));
	276	}
	277
1eec6702	278	int __swap_writepage(struct page page, struct writeback_control wbc,
4246a0b6	279	bio_end_io_t end_write_func)
2f772e6c SJ	280	{
2f772e6c SJ	281	struct bio *bio;
4e49ea4a	282	int ret;
2f772e6c	283	struct swap_info_struct *sis = page_swap_info(page);
62c230bc	284
cc30c5d6	285	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
bc4ae27d	286	if (sis->flags & SWP_FS) {
62c230bc MG	287	struct kiocb kiocb;
	288	struct file *swap_file = sis->swap_file;
	289	struct address_space *mapping = swap_file->f_mapping;
62a8067a AV	290	struct bio_vec bv = {
	291	.bv_page = page,
	292	.bv_len = PAGE_SIZE,
	293	.bv_offset = 0
	294	};
05afcb77	295	struct iov_iter from;
62c230bc	296
aa563d7b	297	iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
62c230bc MG	298	init_sync_kiocb(&kiocb, swap_file);
62c230bc MG	299	kiocb.ki_pos = page_file_offset(page);
62c230bc	300
0cdc444a	301	set_page_writeback(page);
62c230bc	302	unlock_page(page);
c8b8e32d	303	ret = mapping->a_ops->direct_IO(&kiocb, &from);
62c230bc MG	304	if (ret == PAGE_SIZE) {
	305	count_vm_event(PSWPOUT);
	306	ret = 0;
2d30d31e	307	} else {
0cdc444a MG	308	/*
	309	* In the case of swap-over-nfs, this can be a
	310	* temporary failure if the system has limited
	311	* memory for allocating transmit buffers.
	312	* Mark the page dirty and avoid
	313	* rotate_reclaimable_page but rate-limit the
	314	* messages but do not flag PageError like
	315	* the normal direct-to-bio case as it could
	316	* be temporary.
	317	*/
2d30d31e	318	set_page_dirty(page);
0cdc444a	319	ClearPageReclaim(page);
1170532b JP	320	pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
1170532b JP	321	page_file_offset(page));
62c230bc	322	}
0cdc444a	323	end_page_writeback(page);
62c230bc MG	324	return ret;
	325	}
	326
dd6bd0d9 MW	327	ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
dd6bd0d9 MW	328	if (!ret) {
225311a4	329	count_swpout_vm_event(page);
dd6bd0d9 MW	330	return 0;
	331	}
	332
	333	ret = 0;
1eec6702	334	bio = get_swap_bio(GFP_NOIO, page, end_write_func);
1da177e4 LT	335	if (bio == NULL) {
	336	set_page_dirty(page);
	337	unlock_page(page);
	338	ret = -ENOMEM;
	339	goto out;
	340	}
0d1e0c7c	341	bio->bi_opf = REQ_OP_WRITE \| REQ_SWAP \| wbc_to_write_flags(wbc);
6a7f6d86	342	bio_associate_blkg_from_page(bio, page);
225311a4	343	count_swpout_vm_event(page);
1da177e4 LT	344	set_page_writeback(page);
1da177e4 LT	345	unlock_page(page);
4e49ea4a	346	submit_bio(bio);
1da177e4 LT	347	out:
	348	return ret;
	349	}
	350
0bcac06f	351	int swap_readpage(struct page *page, bool synchronous)
1da177e4 LT	352	{
	353	struct bio *bio;
	354	int ret = 0;
62c230bc	355	struct swap_info_struct *sis = page_swap_info(page);
23955622	356	blk_qc_t qc;
74d46992	357	struct gendisk *disk;
93779069	358	unsigned long pflags;
1da177e4	359
0bcac06f	360	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
309381fe SL	361	VM_BUG_ON_PAGE(!PageLocked(page), page);
309381fe SL	362	VM_BUG_ON_PAGE(PageUptodate(page), page);
93779069 MK	363
	364	/*
	365	* Count submission time as memory stall. When the device is congested,
	366	* or the submitting cgroup IO-throttled, submission can be a
	367	* significant part of overall IO time.
	368	*/
	369	psi_memstall_enter(&pflags);
	370
165c8aed	371	if (frontswap_load(page) == 0) {
38b5faf4 DM	372	SetPageUptodate(page);
	373	unlock_page(page);
	374	goto out;
	375	}
62c230bc	376
bc4ae27d	377	if (sis->flags & SWP_FS) {
62c230bc MG	378	struct file *swap_file = sis->swap_file;
	379	struct address_space *mapping = swap_file->f_mapping;
	380
	381	ret = mapping->a_ops->readpage(swap_file, page);
	382	if (!ret)
	383	count_vm_event(PSWPIN);
93779069	384	goto out;
62c230bc MG	385	}
62c230bc MG	386
dd6bd0d9 MW	387	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
dd6bd0d9 MW	388	if (!ret) {
b06bad17 MK	389	if (trylock_page(page)) {
	390	swap_slot_free_notify(page);
	391	unlock_page(page);
	392	}
	393
dd6bd0d9	394	count_vm_event(PSWPIN);
93779069	395	goto out;
dd6bd0d9 MW	396	}
	397
	398	ret = 0;
f29ad6a9	399	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
1da177e4 LT	400	if (bio == NULL) {
	401	unlock_page(page);
	402	ret = -ENOMEM;
	403	goto out;
	404	}
74d46992	405	disk = bio->bi_disk;
b0ba2d0f TH	406	/*
	407	* Keep this task valid during swap readpage because the oom killer may
	408	* attempt to access it in the page fault retry time check.
	409	*/
95fe6c1a	410	bio_set_op_attrs(bio, REQ_OP_READ, 0);
87518530	411	if (synchronous) {
b685a735	412	bio->bi_opf \|= REQ_HIPRI;
87518530 ON	413	get_task_struct(current);
	414	bio->bi_private = current;
	415	}
f8891e5e	416	count_vm_event(PSWPIN);
23955622 SL	417	bio_get(bio);
23955622 SL	418	qc = submit_bio(bio);
0bcac06f	419	while (synchronous) {
1ac5cd49	420	set_current_state(TASK_UNINTERRUPTIBLE);
23955622 SL	421	if (!READ_ONCE(bio->bi_private))
	422	break;
	423
0a1b8b87	424	if (!blk_poll(disk->queue, qc, true))
b685a735	425	io_schedule();
23955622 SL	426	}
	427	__set_current_state(TASK_RUNNING);
	428	bio_put(bio);
	429
1da177e4	430	out:
93779069	431	psi_memstall_leave(&pflags);
1da177e4 LT	432	return ret;
1da177e4 LT	433	}
62c230bc MG	434
	435	int swap_set_page_dirty(struct page *page)
	436	{
	437	struct swap_info_struct *sis = page_swap_info(page);
	438
bc4ae27d	439	if (sis->flags & SWP_FS) {
62c230bc	440	struct address_space *mapping = sis->swap_file->f_mapping;
cc30c5d6 AM	441
cc30c5d6 AM	442	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
62c230bc MG	443	return mapping->a_ops->set_page_dirty(page);
	444	} else {
	445	return __set_page_dirty_no_writeback(page);
	446	}
	447	}