[linux-2.6-block.git] / drivers / md / raid10.h

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RAID10_H
#define _RAID10_H

/* Note: raid10_info.rdev can be set to NULL asynchronously by
 * raid10_remove_disk.
 * There are three safe ways to access raid10_info.rdev.
 * 1/ when holding mddev->reconfig_mutex
 * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
 *    that is called as part of performing resync/recovery/reshape.
 * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
 *    and if it is non-NULL, increment rdev->nr_pending before dropping the
 *    RCU lock.
 * When .rdev is set to NULL, the nr_pending count checked again and if it has
 * been incremented, the pointer is put back in .rdev.
 */

struct raid10_info {
	struct md_rdev	*rdev, *replacement;
	sector_t	head_position;
	int		recovery_disabled;	/* matches
						 * mddev->recovery_disabled
						 * when we shouldn't try
						 * recovering this device.
						 */
};

struct r10conf {
	struct mddev		*mddev;
	struct raid10_info	*mirrors;
	struct raid10_info	*mirrors_new, *mirrors_old;
	spinlock_t		device_lock;

	/* geometry */
	struct geom {
		int		raid_disks;
		int		near_copies;  /* number of copies laid out
					       * raid0 style */
		int		far_copies;   /* number of copies laid out
					       * at large strides across drives
					       */
		int		far_offset;   /* far_copies are offset by 1
					       * stripe instead of many
					       */
		sector_t	stride;	      /* distance between far copies.
					       * This is size / far_copies unless
					       * far_offset, in which case it is
					       * 1 stripe.
					       */
		int             far_set_size; /* The number of devices in a set,
					       * where a 'set' are devices that
					       * contain far/offset copies of
					       * each other.
					       */
		int		chunk_shift; /* shift from chunks to sectors */
		sector_t	chunk_mask;
	} prev, geo;
	int			copies;	      /* near_copies * far_copies.
					       * must be <= raid_disks
					       */

	sector_t		dev_sectors;  /* temp copy of
					       * mddev->dev_sectors */
	sector_t		reshape_progress;
	sector_t		reshape_safe;
	unsigned long		reshape_checkpoint;
	sector_t		offset_diff;

	struct list_head	retry_list;
	/* A separate list of r1bio which just need raid_end_bio_io called.
	 * This mustn't happen for writes which had any errors if the superblock
	 * needs to be written.
	 */
	struct list_head	bio_end_io_list;

	/* queue pending writes and submit them on unplug */
	struct bio_list		pending_bio_list;

	seqlock_t		resync_lock;
	atomic_t		nr_pending;
	int			nr_waiting;
	int			nr_queued;
	int			barrier;
	int			array_freeze_pending;
	sector_t		next_resync;
	int			fullsync;  /* set to 1 if a full sync is needed,
					    * (fresh device added).
					    * Cleared when a sync completes.
					    */
	int			have_replacement; /* There is at least one
						   * replacement device.
						   */
	wait_queue_head_t	wait_barrier;

	mempool_t		r10bio_pool;
	mempool_t		r10buf_pool;
	struct page		*tmppage;
	struct bio_set		bio_split;

	/* When taking over an array from a different personality, we store
	 * the new thread here until we fully activate the array.
	 */
	struct md_thread __rcu	*thread;

	/*
	 * Keep track of cluster resync window to send to other nodes.
	 */
	sector_t		cluster_sync_low;
	sector_t		cluster_sync_high;
};

/*
 * this is our 'private' RAID10 bio.
 *
 * it contains information about what kind of IO operations were started
 * for this RAID10 operation, and about their status:
 */

struct r10bio {
	atomic_t		remaining; /* 'have we finished' count,
					    * used from IRQ handlers
					    */
	sector_t		sector;	/* virtual sector number */
	int			sectors;
	unsigned long		state;
	struct mddev		*mddev;
	/*
	 * original bio going to /dev/mdx
	 */
	struct bio		*master_bio;
	/*
	 * if the IO is in READ direction, then this is where we read
	 */
	int			read_slot;

	struct list_head	retry_list;
	/*
	 * if the IO is in WRITE direction, then multiple bios are used,
	 * one for each copy.
	 * When resyncing we also use one for each copy.
	 * When reconstructing, we use 2 bios, one for read, one for write.
	 * We choose the number when they are allocated.
	 * We sometimes need an extra bio to write to the replacement.
	 */
	struct r10dev {
		struct bio	*bio;
		union {
			struct bio	*repl_bio; /* used for resync and
						    * writes */
			struct md_rdev	*rdev;	   /* used for reads
						    * (read_slot >= 0) */
		};
		sector_t	addr;
		int		devnum;
	} devs[];
};

/* bits for r10bio.state */
enum r10bio_state {
	R10BIO_Uptodate,
	R10BIO_IsSync,
	R10BIO_IsRecover,
	R10BIO_IsReshape,
	R10BIO_Degraded,
/* Set ReadError on bios that experience a read error
 * so that raid10d knows what to do with them.
 */
	R10BIO_ReadError,
/* If a write for this request means we can clear some
 * known-bad-block records, we set this flag.
 */
	R10BIO_MadeGood,
	R10BIO_WriteError,
/* During a reshape we might be performing IO on the
 * 'previous' part of the array, in which case this
 * flag is set
 */
	R10BIO_Previous,
/* failfast devices did receive failfast requests. */
	R10BIO_FailFast,
	R10BIO_Discard,
};
#endif
Commit	Line	Data
b2441318	1	/* SPDX-License-Identifier: GPL-2.0 */
1da177e4 LT	2	#ifndef _RAID10_H
	3	#define _RAID10_H
	4
f2785b52 N	5	/* Note: raid10_info.rdev can be set to NULL asynchronously by
	6	* raid10_remove_disk.
	7	* There are three safe ways to access raid10_info.rdev.
	8	* 1/ when holding mddev->reconfig_mutex
	9	* 2/ when resync/recovery/reshape is known to be happening - i.e. in code
	10	* that is called as part of performing resync/recovery/reshape.
	11	* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
	12	* and if it is non-NULL, increment rdev->nr_pending before dropping the
	13	* RCU lock.
	14	* When .rdev is set to NULL, the nr_pending count checked again and if it has
	15	* been incremented, the pointer is put back in .rdev.
	16	*/
	17
dc280d98	18	struct raid10_info {
69335ef3	19	struct md_rdev rdev, replacement;
1da177e4	20	sector_t head_position;
2bb77736 N	21	int recovery_disabled; /* matches
	22	* mddev->recovery_disabled
	23	* when we shouldn't try
	24	* recovering this device.
	25	*/
1da177e4 LT	26	};
1da177e4 LT	27
e879a879	28	struct r10conf {
fd01b88c	29	struct mddev *mddev;
dc280d98 JB	30	struct raid10_info *mirrors;
dc280d98 JB	31	struct raid10_info mirrors_new, mirrors_old;
1da177e4 LT	32	spinlock_t device_lock;
	33
	34	/* geometry */
5cf00fcd N	35	struct geom {
	36	int raid_disks;
	37	int near_copies; /* number of copies laid out
69335ef3	38	* raid0 style */
5cf00fcd	39	int far_copies; /* number of copies laid out
1da177e4 LT	40	* at large strides across drives
1da177e4 LT	41	*/
5cf00fcd	42	int far_offset; /* far_copies are offset by 1
69335ef3	43	* stripe instead of many
c93983bf	44	*/
5cf00fcd	45	sector_t stride; /* distance between far copies.
c93983bf N	46	* This is size / far_copies unless
	47	* far_offset, in which case it is
	48	* 1 stripe.
1da177e4	49	*/
475901af JB	50	int far_set_size; /* The number of devices in a set,
	51	* where a 'set' are devices that
	52	* contain far/offset copies of
	53	* each other.
	54	*/
5cf00fcd N	55	int chunk_shift; /* shift from chunks to sectors */
5cf00fcd N	56	sector_t chunk_mask;
f8c9e74f	57	} prev, geo;
5cf00fcd N	58	int copies; /* near_copies * far_copies.
	59	* must be <= raid_disks
	60	*/
1da177e4	61
69335ef3 N	62	sector_t dev_sectors; /* temp copy of
69335ef3 N	63	* mddev->dev_sectors */
f8c9e74f	64	sector_t reshape_progress;
3ea7daa5 N	65	sector_t reshape_safe;
	66	unsigned long reshape_checkpoint;
	67	sector_t offset_diff;
dab8b292	68
1da177e4	69	struct list_head retry_list;
95af587e N	70	/* A separate list of r1bio which just need raid_end_bio_io called.
	71	* This mustn't happen for writes which had any errors if the superblock
	72	* needs to be written.
	73	*/
	74	struct list_head bio_end_io_list;
	75
6cce3b23 N	76	/* queue pending writes and submit them on unplug */
6cce3b23 N	77	struct bio_list pending_bio_list;
1da177e4	78
b9b083f9	79	seqlock_t resync_lock;
0e5313e2	80	atomic_t nr_pending;
69335ef3 N	81	int nr_waiting;
	82	int nr_queued;
	83	int barrier;
0e5313e2	84	int array_freeze_pending;
1da177e4	85	sector_t next_resync;
6cce3b23 N	86	int fullsync; /* set to 1 if a full sync is needed,
	87	* (fresh device added).
	88	* Cleared when a sync completes.
	89	*/
69335ef3 N	90	int have_replacement; /* There is at least one
	91	* replacement device.
	92	*/
0a27ec96	93	wait_queue_head_t wait_barrier;
1da177e4	94
afeee514 KO	95	mempool_t r10bio_pool;
afeee514 KO	96	mempool_t r10buf_pool;
4443ae10	97	struct page *tmppage;
afeee514	98	struct bio_set bio_split;
dab8b292 TM	99
	100	/* When taking over an array from a different personality, we store
	101	* the new thread here until we fully activate the array.
	102	*/
44693154	103	struct md_thread __rcu *thread;
8db87912 GJ	104
	105	/*
	106	* Keep track of cluster resync window to send to other nodes.
	107	*/
	108	sector_t cluster_sync_low;
	109	sector_t cluster_sync_high;
1da177e4 LT	110	};
1da177e4 LT	111
1da177e4 LT	112	/*
	113	* this is our 'private' RAID10 bio.
	114	*
	115	* it contains information about what kind of IO operations were started
	116	* for this RAID10 operation, and about their status:
	117	*/
	118
9f2c9d12	119	struct r10bio {
1da177e4 LT	120	atomic_t remaining; /* 'have we finished' count,
	121	* used from IRQ handlers
	122	*/
	123	sector_t sector; /* virtual sector number */
	124	int sectors;
	125	unsigned long state;
fd01b88c	126	struct mddev *mddev;
1da177e4 LT	127	/*
	128	* original bio going to /dev/mdx
	129	*/
	130	struct bio *master_bio;
	131	/*
	132	* if the IO is in READ direction, then this is where we read
	133	*/
	134	int read_slot;
	135
	136	struct list_head retry_list;
	137	/*
	138	* if the IO is in WRITE direction, then multiple bios are used,
	139	* one for each copy.
	140	* When resyncing we also use one for each copy.
	141	* When reconstructing, we use 2 bios, one for read, one for write.
	142	* We choose the number when they are allocated.
69335ef3	143	* We sometimes need an extra bio to write to the replacement.
1da177e4	144	*/
e0ee7785	145	struct r10dev {
69335ef3 N	146	struct bio *bio;
	147	union {
	148	struct bio repl_bio; / used for resync and
	149	* writes */
	150	struct md_rdev rdev; / used for reads
	151	* (read_slot >= 0) */
	152	};
	153	sector_t addr;
	154	int devnum;
358369f0	155	} devs[];
1da177e4 LT	156	};
	157
	158	/* bits for r10bio.state */
69335ef3 N	159	enum r10bio_state {
	160	R10BIO_Uptodate,
	161	R10BIO_IsSync,
	162	R10BIO_IsRecover,
3ea7daa5	163	R10BIO_IsReshape,
69335ef3	164	R10BIO_Degraded,
856e08e2 N	165	/* Set ReadError on bios that experience a read error
	166	* so that raid10d knows what to do with them.
	167	*/
69335ef3	168	R10BIO_ReadError,
749c55e9 N	169	/* If a write for this request means we can clear some
	170	* known-bad-block records, we set this flag.
	171	*/
69335ef3 N	172	R10BIO_MadeGood,
69335ef3 N	173	R10BIO_WriteError,
f8c9e74f N	174	/* During a reshape we might be performing IO on the
	175	* 'previous' part of the array, in which case this
	176	* flag is set
	177	*/
	178	R10BIO_Previous,
8d3ca83d N	179	/* failfast devices did receive failfast requests. */
8d3ca83d N	180	R10BIO_FailFast,
254c271d	181	R10BIO_Discard,
69335ef3	182	};
1da177e4	183	#endif