Commit | Line | Data |
---|---|---|
127186cf YK |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc | |
4 | * ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr> | |
5 | */ | |
6 | ||
7 | #include <linux/blkdev.h> | |
127186cf YK |
8 | #include <linux/seq_file.h> |
9 | #include <linux/module.h> | |
10 | #include <linux/slab.h> | |
11 | #include <trace/events/block.h> | |
12 | #include "md.h" | |
13 | ||
14 | struct dev_info { | |
15 | struct md_rdev *rdev; | |
16 | sector_t end_sector; | |
17 | }; | |
18 | ||
19 | struct linear_conf { | |
20 | struct rcu_head rcu; | |
21 | sector_t array_sectors; | |
22 | /* a copy of mddev->raid_disks */ | |
23 | int raid_disks; | |
24 | struct dev_info disks[] __counted_by(raid_disks); | |
25 | }; | |
26 | ||
27 | /* | |
28 | * find which device holds a particular offset | |
29 | */ | |
30 | static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) | |
31 | { | |
32 | int lo, mid, hi; | |
33 | struct linear_conf *conf; | |
34 | ||
35 | lo = 0; | |
36 | hi = mddev->raid_disks - 1; | |
37 | conf = mddev->private; | |
38 | ||
39 | /* | |
40 | * Binary Search | |
41 | */ | |
42 | ||
43 | while (hi > lo) { | |
44 | ||
45 | mid = (hi + lo) / 2; | |
46 | if (sector < conf->disks[mid].end_sector) | |
47 | hi = mid; | |
48 | else | |
49 | lo = mid + 1; | |
50 | } | |
51 | ||
52 | return conf->disks + lo; | |
53 | } | |
54 | ||
55 | static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) | |
56 | { | |
57 | struct linear_conf *conf; | |
58 | sector_t array_sectors; | |
59 | ||
60 | conf = mddev->private; | |
61 | WARN_ONCE(sectors || raid_disks, | |
62 | "%s does not support generic reshape\n", __func__); | |
63 | array_sectors = conf->array_sectors; | |
64 | ||
65 | return array_sectors; | |
66 | } | |
67 | ||
68 | static int linear_set_limits(struct mddev *mddev) | |
69 | { | |
70 | struct queue_limits lim; | |
71 | int err; | |
72 | ||
73 | md_init_stacking_limits(&lim); | |
74 | lim.max_hw_sectors = mddev->chunk_sectors; | |
75 | lim.max_write_zeroes_sectors = mddev->chunk_sectors; | |
76 | lim.io_min = mddev->chunk_sectors << 9; | |
77 | err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); | |
a572593a | 78 | if (err) |
127186cf | 79 | return err; |
127186cf YK |
80 | |
81 | return queue_limits_set(mddev->gendisk->queue, &lim); | |
82 | } | |
83 | ||
84 | static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) | |
85 | { | |
86 | struct linear_conf *conf; | |
87 | struct md_rdev *rdev; | |
88 | int ret = -EINVAL; | |
89 | int cnt; | |
90 | int i; | |
91 | ||
92 | conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); | |
93 | if (!conf) | |
94 | return ERR_PTR(-ENOMEM); | |
95 | ||
96 | /* | |
97 | * conf->raid_disks is copy of mddev->raid_disks. The reason to | |
98 | * keep a copy of mddev->raid_disks in struct linear_conf is, | |
99 | * mddev->raid_disks may not be consistent with pointers number of | |
100 | * conf->disks[] when it is updated in linear_add() and used to | |
101 | * iterate old conf->disks[] earray in linear_congested(). | |
102 | * Here conf->raid_disks is always consitent with number of | |
103 | * pointers in conf->disks[] array, and mddev->private is updated | |
104 | * with rcu_assign_pointer() in linear_addr(), such race can be | |
105 | * avoided. | |
106 | */ | |
107 | conf->raid_disks = raid_disks; | |
108 | ||
109 | cnt = 0; | |
110 | conf->array_sectors = 0; | |
111 | ||
112 | rdev_for_each(rdev, mddev) { | |
113 | int j = rdev->raid_disk; | |
114 | struct dev_info *disk = conf->disks + j; | |
115 | sector_t sectors; | |
116 | ||
117 | if (j < 0 || j >= raid_disks || disk->rdev) { | |
118 | pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", | |
119 | mdname(mddev)); | |
120 | goto out; | |
121 | } | |
122 | ||
123 | disk->rdev = rdev; | |
124 | if (mddev->chunk_sectors) { | |
125 | sectors = rdev->sectors; | |
126 | sector_div(sectors, mddev->chunk_sectors); | |
127 | rdev->sectors = sectors * mddev->chunk_sectors; | |
128 | } | |
129 | ||
130 | conf->array_sectors += rdev->sectors; | |
131 | cnt++; | |
132 | } | |
133 | if (cnt != raid_disks) { | |
134 | pr_warn("md/linear:%s: not enough drives present. Aborting!\n", | |
135 | mdname(mddev)); | |
136 | goto out; | |
137 | } | |
138 | ||
139 | /* | |
140 | * Here we calculate the device offsets. | |
141 | */ | |
142 | conf->disks[0].end_sector = conf->disks[0].rdev->sectors; | |
143 | ||
144 | for (i = 1; i < raid_disks; i++) | |
145 | conf->disks[i].end_sector = | |
146 | conf->disks[i-1].end_sector + | |
147 | conf->disks[i].rdev->sectors; | |
148 | ||
149 | if (!mddev_is_dm(mddev)) { | |
150 | ret = linear_set_limits(mddev); | |
151 | if (ret) | |
152 | goto out; | |
153 | } | |
154 | ||
155 | return conf; | |
156 | ||
157 | out: | |
158 | kfree(conf); | |
159 | return ERR_PTR(ret); | |
160 | } | |
161 | ||
162 | static int linear_run(struct mddev *mddev) | |
163 | { | |
164 | struct linear_conf *conf; | |
165 | int ret; | |
166 | ||
167 | if (md_check_no_bitmap(mddev)) | |
168 | return -EINVAL; | |
169 | ||
170 | conf = linear_conf(mddev, mddev->raid_disks); | |
171 | if (IS_ERR(conf)) | |
172 | return PTR_ERR(conf); | |
173 | ||
174 | mddev->private = conf; | |
175 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | |
176 | ||
177 | ret = md_integrity_register(mddev); | |
178 | if (ret) { | |
179 | kfree(conf); | |
180 | mddev->private = NULL; | |
181 | } | |
182 | return ret; | |
183 | } | |
184 | ||
185 | static int linear_add(struct mddev *mddev, struct md_rdev *rdev) | |
186 | { | |
187 | /* Adding a drive to a linear array allows the array to grow. | |
188 | * It is permitted if the new drive has a matching superblock | |
189 | * already on it, with raid_disk equal to raid_disks. | |
190 | * It is achieved by creating a new linear_private_data structure | |
191 | * and swapping it in in-place of the current one. | |
192 | * The current one is never freed until the array is stopped. | |
193 | * This avoids races. | |
194 | */ | |
195 | struct linear_conf *newconf, *oldconf; | |
196 | ||
197 | if (rdev->saved_raid_disk != mddev->raid_disks) | |
198 | return -EINVAL; | |
199 | ||
200 | rdev->raid_disk = rdev->saved_raid_disk; | |
201 | rdev->saved_raid_disk = -1; | |
202 | ||
203 | newconf = linear_conf(mddev, mddev->raid_disks + 1); | |
62c55207 DC |
204 | if (IS_ERR(newconf)) |
205 | return PTR_ERR(newconf); | |
127186cf YK |
206 | |
207 | /* newconf->raid_disks already keeps a copy of * the increased | |
208 | * value of mddev->raid_disks, WARN_ONCE() is just used to make | |
209 | * sure of this. It is possible that oldconf is still referenced | |
210 | * in linear_congested(), therefore kfree_rcu() is used to free | |
211 | * oldconf until no one uses it anymore. | |
212 | */ | |
213 | oldconf = rcu_dereference_protected(mddev->private, | |
214 | lockdep_is_held(&mddev->reconfig_mutex)); | |
215 | mddev->raid_disks++; | |
216 | WARN_ONCE(mddev->raid_disks != newconf->raid_disks, | |
217 | "copied raid_disks doesn't match mddev->raid_disks"); | |
218 | rcu_assign_pointer(mddev->private, newconf); | |
219 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | |
220 | set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); | |
221 | kfree_rcu(oldconf, rcu); | |
222 | return 0; | |
223 | } | |
224 | ||
225 | static void linear_free(struct mddev *mddev, void *priv) | |
226 | { | |
227 | struct linear_conf *conf = priv; | |
228 | ||
229 | kfree(conf); | |
230 | } | |
231 | ||
232 | static bool linear_make_request(struct mddev *mddev, struct bio *bio) | |
233 | { | |
234 | struct dev_info *tmp_dev; | |
235 | sector_t start_sector, end_sector, data_offset; | |
236 | sector_t bio_sector = bio->bi_iter.bi_sector; | |
237 | ||
238 | if (unlikely(bio->bi_opf & REQ_PREFLUSH) | |
239 | && md_flush_request(mddev, bio)) | |
240 | return true; | |
241 | ||
242 | tmp_dev = which_dev(mddev, bio_sector); | |
243 | start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; | |
244 | end_sector = tmp_dev->end_sector; | |
245 | data_offset = tmp_dev->rdev->data_offset; | |
246 | ||
247 | if (unlikely(bio_sector >= end_sector || | |
248 | bio_sector < start_sector)) | |
249 | goto out_of_bounds; | |
250 | ||
251 | if (unlikely(is_rdev_broken(tmp_dev->rdev))) { | |
252 | md_error(mddev, tmp_dev->rdev); | |
253 | bio_io_error(bio); | |
254 | return true; | |
255 | } | |
256 | ||
257 | if (unlikely(bio_end_sector(bio) > end_sector)) { | |
258 | /* This bio crosses a device boundary, so we have to split it */ | |
259 | struct bio *split = bio_split(bio, end_sector - bio_sector, | |
260 | GFP_NOIO, &mddev->bio_set); | |
261 | ||
262 | if (IS_ERR(split)) { | |
263 | bio->bi_status = errno_to_blk_status(PTR_ERR(split)); | |
264 | bio_endio(bio); | |
265 | return true; | |
266 | } | |
267 | ||
268 | bio_chain(split, bio); | |
269 | submit_bio_noacct(bio); | |
270 | bio = split; | |
271 | } | |
272 | ||
273 | md_account_bio(mddev, &bio); | |
274 | bio_set_dev(bio, tmp_dev->rdev->bdev); | |
275 | bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - | |
276 | start_sector + data_offset; | |
277 | ||
278 | if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && | |
279 | !bdev_max_discard_sectors(bio->bi_bdev))) { | |
280 | /* Just ignore it */ | |
281 | bio_endio(bio); | |
282 | } else { | |
283 | if (mddev->gendisk) | |
284 | trace_block_bio_remap(bio, disk_devt(mddev->gendisk), | |
285 | bio_sector); | |
286 | mddev_check_write_zeroes(mddev, bio); | |
287 | submit_bio_noacct(bio); | |
288 | } | |
289 | return true; | |
290 | ||
291 | out_of_bounds: | |
292 | pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n", | |
293 | mdname(mddev), | |
294 | (unsigned long long)bio->bi_iter.bi_sector, | |
295 | tmp_dev->rdev->bdev, | |
296 | (unsigned long long)tmp_dev->rdev->sectors, | |
297 | (unsigned long long)start_sector); | |
298 | bio_io_error(bio); | |
299 | return true; | |
300 | } | |
301 | ||
302 | static void linear_status(struct seq_file *seq, struct mddev *mddev) | |
303 | { | |
304 | seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); | |
305 | } | |
306 | ||
307 | static void linear_error(struct mddev *mddev, struct md_rdev *rdev) | |
308 | { | |
309 | if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { | |
310 | char *md_name = mdname(mddev); | |
311 | ||
312 | pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", | |
313 | md_name, rdev->bdev); | |
314 | } | |
315 | } | |
316 | ||
317 | static void linear_quiesce(struct mddev *mddev, int state) | |
318 | { | |
319 | } | |
320 | ||
321 | static struct md_personality linear_personality = { | |
3d44e1d1 YK |
322 | .head = { |
323 | .type = MD_PERSONALITY, | |
324 | .id = ID_LINEAR, | |
325 | .name = "linear", | |
326 | .owner = THIS_MODULE, | |
327 | }, | |
328 | ||
127186cf YK |
329 | .make_request = linear_make_request, |
330 | .run = linear_run, | |
331 | .free = linear_free, | |
332 | .status = linear_status, | |
333 | .hot_add_disk = linear_add, | |
334 | .size = linear_size, | |
335 | .quiesce = linear_quiesce, | |
336 | .error_handler = linear_error, | |
337 | }; | |
338 | ||
339 | static int __init linear_init(void) | |
340 | { | |
3d44e1d1 | 341 | return register_md_submodule(&linear_personality.head); |
127186cf YK |
342 | } |
343 | ||
344 | static void linear_exit(void) | |
345 | { | |
3d44e1d1 | 346 | unregister_md_submodule(&linear_personality.head); |
127186cf YK |
347 | } |
348 | ||
349 | module_init(linear_init); | |
350 | module_exit(linear_exit); | |
351 | MODULE_LICENSE("GPL"); | |
352 | MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); | |
353 | MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ | |
354 | MODULE_ALIAS("md-linear"); | |
355 | MODULE_ALIAS("md-level--1"); |