[linux-block.git] / fs / verity / verify.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Data verification functions, i.e. hooks for ->readahead()
 *
 * Copyright 2019 Google LLC
 */

#include "fsverity_private.h"

#include <crypto/hash.h>
#include <linux/bio.h>

static struct workqueue_struct *fsverity_read_workqueue;

static inline int cmp_hashes(const struct fsverity_info *vi,
			     const u8 *want_hash, const u8 *real_hash,
			     u64 data_pos, int level)
{
	const unsigned int hsize = vi->tree_params.digest_size;

	if (memcmp(want_hash, real_hash, hsize) == 0)
		return 0;

	fsverity_err(vi->inode,
		     "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
		     data_pos, level,
		     vi->tree_params.hash_alg->name, hsize, want_hash,
		     vi->tree_params.hash_alg->name, hsize, real_hash);
	return -EBADMSG;
}

static bool data_is_zeroed(struct inode *inode, struct page *page,
			   unsigned int len, unsigned int offset)
{
	void *virt = kmap_local_page(page);

	if (memchr_inv(virt + offset, 0, len)) {
		kunmap_local(virt);
		fsverity_err(inode,
			     "FILE CORRUPTED!  Data past EOF is not zeroed");
		return false;
	}
	kunmap_local(virt);
	return true;
}

/*
 * Returns true if the hash block with index @hblock_idx in the tree, located in
 * @hpage, has already been verified.
 */
static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
				   unsigned long hblock_idx)
{
	bool verified;
	unsigned int blocks_per_page;
	unsigned int i;

	/*
	 * When the Merkle tree block size and page size are the same, then the
	 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
	 * to directly indicate whether the page's block has been verified.
	 *
	 * Using PG_checked also guarantees that we re-verify hash pages that
	 * get evicted and re-instantiated from the backing storage, as new
	 * pages always start out with PG_checked cleared.
	 */
	if (!vi->hash_block_verified)
		return PageChecked(hpage);

	/*
	 * When the Merkle tree block size and page size differ, we use a bitmap
	 * to indicate whether each hash block has been verified.
	 *
	 * However, we still need to ensure that hash pages that get evicted and
	 * re-instantiated from the backing storage are re-verified.  To do
	 * this, we use PG_checked again, but now it doesn't really mean
	 * "checked".  Instead, now it just serves as an indicator for whether
	 * the hash page is newly instantiated or not.
	 *
	 * The first thread that sees PG_checked=0 must clear the corresponding
	 * bitmap bits, then set PG_checked=1.  This requires a spinlock.  To
	 * avoid having to take this spinlock in the common case of
	 * PG_checked=1, we start with an opportunistic lockless read.
	 */
	if (PageChecked(hpage)) {
		/*
		 * A read memory barrier is needed here to give ACQUIRE
		 * semantics to the above PageChecked() test.
		 */
		smp_rmb();
		return test_bit(hblock_idx, vi->hash_block_verified);
	}
	spin_lock(&vi->hash_page_init_lock);
	if (PageChecked(hpage)) {
		verified = test_bit(hblock_idx, vi->hash_block_verified);
	} else {
		blocks_per_page = vi->tree_params.blocks_per_page;
		hblock_idx = round_down(hblock_idx, blocks_per_page);
		for (i = 0; i < blocks_per_page; i++)
			clear_bit(hblock_idx + i, vi->hash_block_verified);
		/*
		 * A write memory barrier is needed here to give RELEASE
		 * semantics to the below SetPageChecked() operation.
		 */
		smp_wmb();
		SetPageChecked(hpage);
		verified = false;
	}
	spin_unlock(&vi->hash_page_init_lock);
	return verified;
}

/*
 * Verify a single data block against the file's Merkle tree.
 *
 * In principle, we need to verify the entire path to the root node.  However,
 * for efficiency the filesystem may cache the hash blocks.  Therefore we need
 * only ascend the tree until an already-verified hash block is seen, and then
 * verify the path to that block.
 *
 * Return: %true if the data block is valid, else %false.
 */
static bool
verify_data_block(struct inode *inode, struct fsverity_info *vi,
		  struct ahash_request *req, struct page *data_page,
		  u64 data_pos, unsigned int dblock_offset_in_page,
		  unsigned long max_ra_pages)
{
	const struct merkle_tree_params *params = &vi->tree_params;
	const unsigned int hsize = params->digest_size;
	int level;
	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
	const u8 *want_hash;
	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
	/* The hash blocks that are traversed, indexed by level */
	struct {
		/* Page containing the hash block */
		struct page *page;
		/* Index of the hash block in the tree overall */
		unsigned long index;
		/* Byte offset of the hash block within @page */
		unsigned int offset_in_page;
		/* Byte offset of the wanted hash within @page */
		unsigned int hoffset;
	} hblocks[FS_VERITY_MAX_LEVELS];
	/*
	 * The index of the previous level's block within that level; also the
	 * index of that block's hash within the current level.
	 */
	u64 hidx = data_pos >> params->log_blocksize;
	int err;

	if (unlikely(data_pos >= inode->i_size)) {
		/*
		 * This can happen in the data page spanning EOF when the Merkle
		 * tree block size is less than the page size.  The Merkle tree
		 * doesn't cover data blocks fully past EOF.  But the entire
		 * page spanning EOF can be visible to userspace via a mmap, and
		 * any part past EOF should be all zeroes.  Therefore, we need
		 * to verify that any data blocks fully past EOF are all zeroes.
		 */
		return data_is_zeroed(inode, data_page, params->block_size,
				      dblock_offset_in_page);
	}

	/*
	 * Starting at the leaf level, ascend the tree saving hash blocks along
	 * the way until we find a hash block that has already been verified, or
	 * until we reach the root.
	 */
	for (level = 0; level < params->num_levels; level++) {
		unsigned long next_hidx;
		unsigned long hblock_idx;
		pgoff_t hpage_idx;
		unsigned int hblock_offset_in_page;
		unsigned int hoffset;
		struct page *hpage;

		/*
		 * The index of the block in the current level; also the index
		 * of that block's hash within the next level.
		 */
		next_hidx = hidx >> params->log_arity;

		/* Index of the hash block in the tree overall */
		hblock_idx = params->level_start[level] + next_hidx;

		/* Index of the hash page in the tree overall */
		hpage_idx = hblock_idx >> params->log_blocks_per_page;

		/* Byte offset of the hash block within the page */
		hblock_offset_in_page =
			(hblock_idx << params->log_blocksize) & ~PAGE_MASK;

		/* Byte offset of the hash within the page */
		hoffset = hblock_offset_in_page +
			  ((hidx << params->log_digestsize) &
			   (params->block_size - 1));

		hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
				hpage_idx, level == 0 ? min(max_ra_pages,
					params->tree_pages - hpage_idx) : 0);
		if (IS_ERR(hpage)) {
			err = PTR_ERR(hpage);
			fsverity_err(inode,
				     "Error %d reading Merkle tree page %lu",
				     err, hpage_idx);
			goto out;
		}
		if (is_hash_block_verified(vi, hpage, hblock_idx)) {
			memcpy_from_page(_want_hash, hpage, hoffset, hsize);
			want_hash = _want_hash;
			put_page(hpage);
			goto descend;
		}
		hblocks[level].page = hpage;
		hblocks[level].index = hblock_idx;
		hblocks[level].offset_in_page = hblock_offset_in_page;
		hblocks[level].hoffset = hoffset;
		hidx = next_hidx;
	}

	want_hash = vi->root_hash;
descend:
	/* Descend the tree verifying hash blocks. */
	for (; level > 0; level--) {
		struct page *hpage = hblocks[level - 1].page;
		unsigned long hblock_idx = hblocks[level - 1].index;
		unsigned int hblock_offset_in_page =
			hblocks[level - 1].offset_in_page;
		unsigned int hoffset = hblocks[level - 1].hoffset;

		err = fsverity_hash_block(params, inode, req, hpage,
					  hblock_offset_in_page, real_hash);
		if (err)
			goto out;
		err = cmp_hashes(vi, want_hash, real_hash, data_pos, level - 1);
		if (err)
			goto out;
		/*
		 * Mark the hash block as verified.  This must be atomic and
		 * idempotent, as the same hash block might be verified by
		 * multiple threads concurrently.
		 */
		if (vi->hash_block_verified)
			set_bit(hblock_idx, vi->hash_block_verified);
		else
			SetPageChecked(hpage);
		memcpy_from_page(_want_hash, hpage, hoffset, hsize);
		want_hash = _want_hash;
		put_page(hpage);
	}

	/* Finally, verify the data block. */
	err = fsverity_hash_block(params, inode, req, data_page,
				  dblock_offset_in_page, real_hash);
	if (err)
		goto out;
	err = cmp_hashes(vi, want_hash, real_hash, data_pos, -1);
out:
	for (; level > 0; level--)
		put_page(hblocks[level - 1].page);

	return err == 0;
}

static bool
verify_data_blocks(struct inode *inode, struct fsverity_info *vi,
		   struct ahash_request *req, struct folio *data_folio,
		   size_t len, size_t offset, unsigned long max_ra_pages)
{
	const unsigned int block_size = vi->tree_params.block_size;
	u64 pos = (u64)data_folio->index << PAGE_SHIFT;

	if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
		return false;
	if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
			 folio_test_uptodate(data_folio)))
		return false;
	do {
		struct page *data_page =
			folio_page(data_folio, offset >> PAGE_SHIFT);

		if (!verify_data_block(inode, vi, req, data_page, pos + offset,
				       offset & ~PAGE_MASK, max_ra_pages))
			return false;
		offset += block_size;
		len -= block_size;
	} while (len);
	return true;
}

/**
 * fsverity_verify_blocks() - verify data in a folio
 * @folio: the folio containing the data to verify
 * @len: the length of the data to verify in the folio
 * @offset: the offset of the data to verify in the folio
 *
 * Verify data that has just been read from a verity file.  The data must be
 * located in a pagecache folio that is still locked and not yet uptodate.  The
 * length and offset of the data must be Merkle tree block size aligned.
 *
 * Return: %true if the data is valid, else %false.
 */
bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
{
	struct inode *inode = folio->mapping->host;
	struct fsverity_info *vi = inode->i_verity_info;
	struct ahash_request *req;
	bool valid;

	/* This allocation never fails, since it's mempool-backed. */
	req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);

	valid = verify_data_blocks(inode, vi, req, folio, len, offset, 0);

	fsverity_free_hash_request(vi->tree_params.hash_alg, req);

	return valid;
}
EXPORT_SYMBOL_GPL(fsverity_verify_blocks);

#ifdef CONFIG_BLOCK
/**
 * fsverity_verify_bio() - verify a 'read' bio that has just completed
 * @bio: the bio to verify
 *
 * Verify the bio's data against the file's Merkle tree.  All bio data segments
 * must be aligned to the file's Merkle tree block size.  If any data fails
 * verification, then bio->bi_status is set to an error status.
 *
 * This is a helper function for use by the ->readahead() method of filesystems
 * that issue bios to read data directly into the page cache.  Filesystems that
 * populate the page cache without issuing bios (e.g. non block-based
 * filesystems) must instead call fsverity_verify_page() directly on each page.
 * All filesystems must also call fsverity_verify_page() on holes.
 */
void fsverity_verify_bio(struct bio *bio)
{
	struct inode *inode = bio_first_page_all(bio)->mapping->host;
	struct fsverity_info *vi = inode->i_verity_info;
	struct ahash_request *req;
	struct folio_iter fi;
	unsigned long max_ra_pages = 0;

	/* This allocation never fails, since it's mempool-backed. */
	req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);

	if (bio->bi_opf & REQ_RAHEAD) {
		/*
		 * If this bio is for data readahead, then we also do readahead
		 * of the first (largest) level of the Merkle tree.  Namely,
		 * when a Merkle tree page is read, we also try to piggy-back on
		 * some additional pages -- up to 1/4 the number of data pages.
		 *
		 * This improves sequential read performance, as it greatly
		 * reduces the number of I/O requests made to the Merkle tree.
		 */
		max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
	}

	bio_for_each_folio_all(fi, bio) {
		if (!verify_data_blocks(inode, vi, req, fi.folio, fi.length,
					fi.offset, max_ra_pages)) {
			bio->bi_status = BLK_STS_IOERR;
			break;
		}
	}

	fsverity_free_hash_request(vi->tree_params.hash_alg, req);
}
EXPORT_SYMBOL_GPL(fsverity_verify_bio);
#endif /* CONFIG_BLOCK */

/**
 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
 * @work: the work to enqueue
 *
 * Enqueue verification work for asynchronous processing.
 */
void fsverity_enqueue_verify_work(struct work_struct *work)
{
	queue_work(fsverity_read_workqueue, work);
}
EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);

int __init fsverity_init_workqueue(void)
{
	/*
	 * Use a high-priority workqueue to prioritize verification work, which
	 * blocks reads from completing, over regular application tasks.
	 *
	 * For performance reasons, don't use an unbound workqueue.  Using an
	 * unbound workqueue for crypto operations causes excessive scheduler
	 * latency on ARM64.
	 */
	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
						  WQ_HIGHPRI,
						  num_online_cpus());
	if (!fsverity_read_workqueue)
		return -ENOMEM;
	return 0;
}

void __init fsverity_exit_workqueue(void)
{
	destroy_workqueue(fsverity_read_workqueue);
	fsverity_read_workqueue = NULL;
}
Commit	Line	Data
8a1d0f9c EB	1	// SPDX-License-Identifier: GPL-2.0
8a1d0f9c EB	2	/*
704528d8	3	* Data verification functions, i.e. hooks for ->readahead()
8a1d0f9c EB	4	*
	5	* Copyright 2019 Google LLC
	6	*/
	7
	8	#include "fsverity_private.h"
	9
	10	#include <crypto/hash.h>
	11	#include <linux/bio.h>
8a1d0f9c EB	12
	13	static struct workqueue_struct *fsverity_read_workqueue;
	14
8a1d0f9c EB	15	static inline int cmp_hashes(const struct fsverity_info *vi,
8a1d0f9c EB	16	const u8 want_hash, const u8 real_hash,
5306892a	17	u64 data_pos, int level)
8a1d0f9c EB	18	{
	19	const unsigned int hsize = vi->tree_params.digest_size;
	20
	21	if (memcmp(want_hash, real_hash, hsize) == 0)
	22	return 0;
	23
	24	fsverity_err(vi->inode,
5306892a EB	25	"FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%phN, real_hash=%s:%phN",
5306892a EB	26	data_pos, level,
8a1d0f9c EB	27	vi->tree_params.hash_alg->name, hsize, want_hash,
	28	vi->tree_params.hash_alg->name, hsize, real_hash);
	29	return -EBADMSG;
	30	}
	31
5306892a EB	32	static bool data_is_zeroed(struct inode inode, struct page page,
	33	unsigned int len, unsigned int offset)
	34	{
	35	void *virt = kmap_local_page(page);
	36
	37	if (memchr_inv(virt + offset, 0, len)) {
	38	kunmap_local(virt);
	39	fsverity_err(inode,
	40	"FILE CORRUPTED! Data past EOF is not zeroed");
	41	return false;
	42	}
	43	kunmap_local(virt);
	44	return true;
	45	}
	46
	47	/*
	48	* Returns true if the hash block with index @hblock_idx in the tree, located in
	49	* @hpage, has already been verified.
	50	*/
	51	static bool is_hash_block_verified(struct fsverity_info vi, struct page hpage,
	52	unsigned long hblock_idx)
	53	{
	54	bool verified;
	55	unsigned int blocks_per_page;
	56	unsigned int i;
	57
	58	/*
	59	* When the Merkle tree block size and page size are the same, then the
	60	* ->hash_block_verified bitmap isn't allocated, and we use PG_checked
	61	* to directly indicate whether the page's block has been verified.
	62	*
	63	* Using PG_checked also guarantees that we re-verify hash pages that
	64	* get evicted and re-instantiated from the backing storage, as new
	65	* pages always start out with PG_checked cleared.
	66	*/
	67	if (!vi->hash_block_verified)
	68	return PageChecked(hpage);
	69
	70	/*
	71	* When the Merkle tree block size and page size differ, we use a bitmap
	72	* to indicate whether each hash block has been verified.
	73	*
	74	* However, we still need to ensure that hash pages that get evicted and
	75	* re-instantiated from the backing storage are re-verified. To do
	76	* this, we use PG_checked again, but now it doesn't really mean
	77	* "checked". Instead, now it just serves as an indicator for whether
	78	* the hash page is newly instantiated or not.
	79	*
	80	* The first thread that sees PG_checked=0 must clear the corresponding
	81	* bitmap bits, then set PG_checked=1. This requires a spinlock. To
	82	* avoid having to take this spinlock in the common case of
	83	* PG_checked=1, we start with an opportunistic lockless read.
	84	*/
	85	if (PageChecked(hpage)) {
	86	/*
	87	* A read memory barrier is needed here to give ACQUIRE
	88	* semantics to the above PageChecked() test.
	89	*/
	90	smp_rmb();
	91	return test_bit(hblock_idx, vi->hash_block_verified);
	92	}
	93	spin_lock(&vi->hash_page_init_lock);
	94	if (PageChecked(hpage)) {
	95	verified = test_bit(hblock_idx, vi->hash_block_verified);
96	} else {
97	blocks_per_page = vi->tree_params.blocks_per_page;
98	hblock_idx = round_down(hblock_idx, blocks_per_page);
99	for (i = 0; i < blocks_per_page; i++)
100	clear_bit(hblock_idx + i, vi->hash_block_verified);
101	/*
102	* A write memory barrier is needed here to give RELEASE
103	* semantics to the below SetPageChecked() operation.
104	*/
105	smp_wmb();
106	SetPageChecked(hpage);
107	verified = false;
108	}
109	spin_unlock(&vi->hash_page_init_lock);
110	return verified;
111	}
112
8a1d0f9c	113	/*
5306892a	114	* Verify a single data block against the file's Merkle tree.
8a1d0f9c EB	115	*
8a1d0f9c EB	116	* In principle, we need to verify the entire path to the root node. However,
5306892a EB	117	* for efficiency the filesystem may cache the hash blocks. Therefore we need
	118	* only ascend the tree until an already-verified hash block is seen, and then
	119	* verify the path to that block.
8a1d0f9c	120	*
5306892a	121	* Return: %true if the data block is valid, else %false.
8a1d0f9c	122	*/
5306892a EB	123	static bool
	124	verify_data_block(struct inode inode, struct fsverity_info vi,
	125	struct ahash_request req, struct page data_page,
	126	u64 data_pos, unsigned int dblock_offset_in_page,
	127	unsigned long max_ra_pages)
8a1d0f9c EB	128	{
	129	const struct merkle_tree_params *params = &vi->tree_params;
	130	const unsigned int hsize = params->digest_size;
8a1d0f9c EB	131	int level;
	132	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
	133	const u8 *want_hash;
	134	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
5306892a EB	135	/* The hash blocks that are traversed, indexed by level */
	136	struct {
	137	/* Page containing the hash block */
	138	struct page *page;
	139	/* Index of the hash block in the tree overall */
	140	unsigned long index;
	141	/* Byte offset of the hash block within @page */
	142	unsigned int offset_in_page;
	143	/* Byte offset of the wanted hash within @page */
	144	unsigned int hoffset;
	145	} hblocks[FS_VERITY_MAX_LEVELS];
	146	/*
	147	* The index of the previous level's block within that level; also the
	148	* index of that block's hash within the current level.
	149	*/
	150	u64 hidx = data_pos >> params->log_blocksize;
8a1d0f9c EB	151	int err;
8a1d0f9c EB	152
5306892a EB	153	if (unlikely(data_pos >= inode->i_size)) {
	154	/*
	155	* This can happen in the data page spanning EOF when the Merkle
	156	* tree block size is less than the page size. The Merkle tree
	157	* doesn't cover data blocks fully past EOF. But the entire
	158	* page spanning EOF can be visible to userspace via a mmap, and
	159	* any part past EOF should be all zeroes. Therefore, we need
	160	* to verify that any data blocks fully past EOF are all zeroes.
	161	*/
	162	return data_is_zeroed(inode, data_page, params->block_size,
	163	dblock_offset_in_page);
	164	}
8a1d0f9c	165
8a1d0f9c	166	/*
5306892a EB	167	* Starting at the leaf level, ascend the tree saving hash blocks along
	168	* the way until we find a hash block that has already been verified, or
	169	* until we reach the root.
8a1d0f9c EB	170	*/
8a1d0f9c EB	171	for (level = 0; level < params->num_levels; level++) {
5306892a EB	172	unsigned long next_hidx;
	173	unsigned long hblock_idx;
	174	pgoff_t hpage_idx;
	175	unsigned int hblock_offset_in_page;
8a1d0f9c EB	176	unsigned int hoffset;
	177	struct page *hpage;
	178
5306892a EB	179	/*
	180	* The index of the block in the current level; also the index
	181	* of that block's hash within the next level.
	182	*/
	183	next_hidx = hidx >> params->log_arity;
	184
	185	/* Index of the hash block in the tree overall */
	186	hblock_idx = params->level_start[level] + next_hidx;
	187
	188	/* Index of the hash page in the tree overall */
	189	hpage_idx = hblock_idx >> params->log_blocks_per_page;
	190
	191	/* Byte offset of the hash block within the page */
	192	hblock_offset_in_page =
	193	(hblock_idx << params->log_blocksize) & ~PAGE_MASK;
	194
	195	/* Byte offset of the hash within the page */
	196	hoffset = hblock_offset_in_page +
	197	((hidx << params->log_digestsize) &
	198	(params->block_size - 1));
8a1d0f9c	199
5306892a EB	200	hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
	201	hpage_idx, level == 0 ? min(max_ra_pages,
	202	params->tree_pages - hpage_idx) : 0);
8a1d0f9c EB	203	if (IS_ERR(hpage)) {
	204	err = PTR_ERR(hpage);
	205	fsverity_err(inode,
	206	"Error %d reading Merkle tree page %lu",
5306892a	207	err, hpage_idx);
8a1d0f9c EB	208	goto out;
8a1d0f9c EB	209	}
5306892a	210	if (is_hash_block_verified(vi, hpage, hblock_idx)) {
c987918a	211	memcpy_from_page(_want_hash, hpage, hoffset, hsize);
8a1d0f9c EB	212	want_hash = _want_hash;
8a1d0f9c EB	213	put_page(hpage);
8a1d0f9c EB	214	goto descend;
8a1d0f9c EB	215	}
5306892a EB	216	hblocks[level].page = hpage;
	217	hblocks[level].index = hblock_idx;
	218	hblocks[level].offset_in_page = hblock_offset_in_page;
	219	hblocks[level].hoffset = hoffset;
	220	hidx = next_hidx;
8a1d0f9c EB	221	}
	222
	223	want_hash = vi->root_hash;
8a1d0f9c	224	descend:
f45555bf	225	/* Descend the tree verifying hash blocks. */
8a1d0f9c	226	for (; level > 0; level--) {
5306892a EB	227	struct page *hpage = hblocks[level - 1].page;
	228	unsigned long hblock_idx = hblocks[level - 1].index;
	229	unsigned int hblock_offset_in_page =
	230	hblocks[level - 1].offset_in_page;
	231	unsigned int hoffset = hblocks[level - 1].hoffset;
8a1d0f9c	232
5306892a EB	233	err = fsverity_hash_block(params, inode, req, hpage,
5306892a EB	234	hblock_offset_in_page, real_hash);
8a1d0f9c EB	235	if (err)
8a1d0f9c EB	236	goto out;
5306892a	237	err = cmp_hashes(vi, want_hash, real_hash, data_pos, level - 1);
8a1d0f9c EB	238	if (err)
8a1d0f9c EB	239	goto out;
5306892a EB	240	/*
	241	* Mark the hash block as verified. This must be atomic and
	242	* idempotent, as the same hash block might be verified by
	243	* multiple threads concurrently.
	244	*/
	245	if (vi->hash_block_verified)
	246	set_bit(hblock_idx, vi->hash_block_verified);
	247	else
	248	SetPageChecked(hpage);
c987918a	249	memcpy_from_page(_want_hash, hpage, hoffset, hsize);
8a1d0f9c EB	250	want_hash = _want_hash;
8a1d0f9c EB	251	put_page(hpage);
8a1d0f9c EB	252	}
8a1d0f9c EB	253
f45555bf	254	/* Finally, verify the data block. */
5306892a EB	255	err = fsverity_hash_block(params, inode, req, data_page,
5306892a EB	256	dblock_offset_in_page, real_hash);
8a1d0f9c EB	257	if (err)
8a1d0f9c EB	258	goto out;
5306892a	259	err = cmp_hashes(vi, want_hash, real_hash, data_pos, -1);
8a1d0f9c EB	260	out:
8a1d0f9c EB	261	for (; level > 0; level--)
5306892a	262	put_page(hblocks[level - 1].page);
8a1d0f9c EB	263
	264	return err == 0;
	265	}
	266
5306892a EB	267	static bool
5306892a EB	268	verify_data_blocks(struct inode inode, struct fsverity_info vi,
5d0f0e57 EB	269	struct ahash_request req, struct folio data_folio,
5d0f0e57 EB	270	size_t len, size_t offset, unsigned long max_ra_pages)
5306892a EB	271	{
5306892a EB	272	const unsigned int block_size = vi->tree_params.block_size;
5d0f0e57	273	u64 pos = (u64)data_folio->index << PAGE_SHIFT;
5306892a EB	274
	275	if (WARN_ON_ONCE(len <= 0 \|\| !IS_ALIGNED(len \| offset, block_size)))
	276	return false;
5d0f0e57 EB	277	if (WARN_ON_ONCE(!folio_test_locked(data_folio) \|\|
5d0f0e57 EB	278	folio_test_uptodate(data_folio)))
5306892a EB	279	return false;
5306892a EB	280	do {
5d0f0e57 EB	281	struct page *data_page =
	282	folio_page(data_folio, offset >> PAGE_SHIFT);
	283
	284	if (!verify_data_block(inode, vi, req, data_page, pos + offset,
	285	offset & ~PAGE_MASK, max_ra_pages))
5306892a EB	286	return false;
	287	offset += block_size;
	288	len -= block_size;
	289	} while (len);
	290	return true;
	291	}
	292
8a1d0f9c	293	/**
5d0f0e57 EB	294	* fsverity_verify_blocks() - verify data in a folio
	295	* @folio: the folio containing the data to verify
	296	* @len: the length of the data to verify in the folio
	297	* @offset: the offset of the data to verify in the folio
8a1d0f9c	298	*
5306892a	299	* Verify data that has just been read from a verity file. The data must be
5d0f0e57	300	* located in a pagecache folio that is still locked and not yet uptodate. The
5306892a	301	* length and offset of the data must be Merkle tree block size aligned.
8a1d0f9c	302	*
5306892a	303	* Return: %true if the data is valid, else %false.
8a1d0f9c	304	*/
5d0f0e57	305	bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
8a1d0f9c	306	{
5d0f0e57	307	struct inode *inode = folio->mapping->host;
5306892a	308	struct fsverity_info *vi = inode->i_verity_info;
8a1d0f9c EB	309	struct ahash_request *req;
	310	bool valid;
	311
439bea10 EB	312	/* This allocation never fails, since it's mempool-backed. */
439bea10 EB	313	req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
8a1d0f9c	314
5d0f0e57	315	valid = verify_data_blocks(inode, vi, req, folio, len, offset, 0);
8a1d0f9c	316
439bea10	317	fsverity_free_hash_request(vi->tree_params.hash_alg, req);
8a1d0f9c EB	318
	319	return valid;
	320	}
5306892a	321	EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
8a1d0f9c EB	322
	323	#ifdef CONFIG_BLOCK
	324	/**
	325	* fsverity_verify_bio() - verify a 'read' bio that has just completed
6377a38b	326	* @bio: the bio to verify
8a1d0f9c	327	*
5306892a EB	328	* Verify the bio's data against the file's Merkle tree. All bio data segments
	329	* must be aligned to the file's Merkle tree block size. If any data fails
	330	* verification, then bio->bi_status is set to an error status.
8a1d0f9c	331	*
704528d8	332	* This is a helper function for use by the ->readahead() method of filesystems
8a1d0f9c EB	333	* that issue bios to read data directly into the page cache. Filesystems that
	334	* populate the page cache without issuing bios (e.g. non block-based
	335	* filesystems) must instead call fsverity_verify_page() directly on each page.
	336	* All filesystems must also call fsverity_verify_page() on holes.
	337	*/
	338	void fsverity_verify_bio(struct bio *bio)
	339	{
	340	struct inode *inode = bio_first_page_all(bio)->mapping->host;
5306892a	341	struct fsverity_info *vi = inode->i_verity_info;
8a1d0f9c	342	struct ahash_request *req;
5d0f0e57	343	struct folio_iter fi;
fd39073d	344	unsigned long max_ra_pages = 0;
8a1d0f9c	345
439bea10	346	/* This allocation never fails, since it's mempool-backed. */
9098f36b	347	req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS);
8a1d0f9c	348
fd39073d EB	349	if (bio->bi_opf & REQ_RAHEAD) {
	350	/*
	351	* If this bio is for data readahead, then we also do readahead
	352	* of the first (largest) level of the Merkle tree. Namely,
	353	* when a Merkle tree page is read, we also try to piggy-back on
	354	* some additional pages -- up to 1/4 the number of data pages.
	355	*
	356	* This improves sequential read performance, as it greatly
	357	* reduces the number of I/O requests made to the Merkle tree.
	358	*/
9098f36b	359	max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
fd39073d EB	360	}
fd39073d EB	361
5d0f0e57 EB	362	bio_for_each_folio_all(fi, bio) {
	363	if (!verify_data_blocks(inode, vi, req, fi.folio, fi.length,
	364	fi.offset, max_ra_pages)) {
98dc08ba EB	365	bio->bi_status = BLK_STS_IOERR;
	366	break;
	367	}
8a1d0f9c EB	368	}
8a1d0f9c EB	369
9098f36b	370	fsverity_free_hash_request(vi->tree_params.hash_alg, req);
8a1d0f9c EB	371	}
	372	EXPORT_SYMBOL_GPL(fsverity_verify_bio);
	373	#endif /* CONFIG_BLOCK */
	374
	375	/**
	376	* fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
6377a38b	377	* @work: the work to enqueue
8a1d0f9c EB	378	*
	379	* Enqueue verification work for asynchronous processing.
	380	*/
	381	void fsverity_enqueue_verify_work(struct work_struct *work)
	382	{
	383	queue_work(fsverity_read_workqueue, work);
	384	}
	385	EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
	386
	387	int __init fsverity_init_workqueue(void)
	388	{
	389	/*
f959325e NH	390	* Use a high-priority workqueue to prioritize verification work, which
f959325e NH	391	* blocks reads from completing, over regular application tasks.
8a1d0f9c	392	*
f959325e NH	393	* For performance reasons, don't use an unbound workqueue. Using an
	394	* unbound workqueue for crypto operations causes excessive scheduler
	395	* latency on ARM64.
8a1d0f9c EB	396	*/
8a1d0f9c EB	397	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
f959325e	398	WQ_HIGHPRI,
8a1d0f9c EB	399	num_online_cpus());
	400	if (!fsverity_read_workqueue)
	401	return -ENOMEM;
	402	return 0;
	403	}
432434c9 EB	404
	405	void __init fsverity_exit_workqueue(void)
	406	{
	407	destroy_workqueue(fsverity_read_workqueue);
	408	fsverity_read_workqueue = NULL;
	409	}