Commit | Line | Data |
---|---|---|
3bd94003 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
26b9f228 HM |
2 | /* |
3 | * Copyright (C) 2005-2007 Red Hat GmbH | |
4 | * | |
5 | * A target that delays reads and/or writes and can send | |
6 | * them to different devices. | |
7 | * | |
8 | * This file is released under the GPL. | |
9 | */ | |
10 | ||
11 | #include <linux/module.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/blkdev.h> | |
14 | #include <linux/bio.h> | |
15 | #include <linux/slab.h> | |
70bbeb29 | 16 | #include <linux/kthread.h> |
26b9f228 | 17 | |
586e80e6 MP |
18 | #include <linux/device-mapper.h> |
19 | ||
26b9f228 HM |
20 | #define DM_MSG_PREFIX "delay" |
21 | ||
3876ac76 MP |
22 | struct delay_class { |
23 | struct dm_dev *dev; | |
24 | sector_t start; | |
86a3238c HM |
25 | unsigned int delay; |
26 | unsigned int ops; | |
3876ac76 MP |
27 | }; |
28 | ||
26b9f228 HM |
29 | struct delay_c { |
30 | struct timer_list delay_timer; | |
ac818646 | 31 | struct mutex timer_lock; |
718822c1 | 32 | struct workqueue_struct *kdelayd_wq; |
26b9f228 HM |
33 | struct work_struct flush_expired_bios; |
34 | struct list_head delayed_bios; | |
70bbeb29 | 35 | struct task_struct *worker; |
6fc45b6e | 36 | bool may_delay; |
26b9f228 | 37 | |
3876ac76 MP |
38 | struct delay_class read; |
39 | struct delay_class write; | |
cda6b5ab | 40 | struct delay_class flush; |
26b9f228 | 41 | |
3876ac76 | 42 | int argc; |
26b9f228 HM |
43 | }; |
44 | ||
028867ac | 45 | struct dm_delay_info { |
26b9f228 | 46 | struct delay_c *context; |
3876ac76 | 47 | struct delay_class *class; |
26b9f228 | 48 | struct list_head list; |
26b9f228 HM |
49 | unsigned long expires; |
50 | }; | |
51 | ||
52 | static DEFINE_MUTEX(delayed_bios_lock); | |
53 | ||
8376d3c1 | 54 | static void handle_delayed_timer(struct timer_list *t) |
26b9f228 | 55 | { |
8376d3c1 | 56 | struct delay_c *dc = from_timer(dc, t, delay_timer); |
26b9f228 | 57 | |
718822c1 | 58 | queue_work(dc->kdelayd_wq, &dc->flush_expired_bios); |
26b9f228 HM |
59 | } |
60 | ||
61 | static void queue_timeout(struct delay_c *dc, unsigned long expires) | |
62 | { | |
ac818646 | 63 | mutex_lock(&dc->timer_lock); |
26b9f228 HM |
64 | |
65 | if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) | |
66 | mod_timer(&dc->delay_timer, expires); | |
67 | ||
ac818646 | 68 | mutex_unlock(&dc->timer_lock); |
26b9f228 HM |
69 | } |
70 | ||
70bbeb29 CL |
71 | static inline bool delay_is_fast(struct delay_c *dc) |
72 | { | |
73 | return !!dc->worker; | |
74 | } | |
75 | ||
38cfff56 MP |
76 | static void flush_bios(struct bio *bio) |
77 | { | |
78 | struct bio *n; | |
79 | ||
80 | while (bio) { | |
81 | n = bio->bi_next; | |
82 | bio->bi_next = NULL; | |
83 | dm_submit_bio_remap(bio, NULL); | |
84 | bio = n; | |
85 | } | |
86 | } | |
87 | ||
ccadc8a2 | 88 | static void flush_delayed_bios(struct delay_c *dc, bool flush_all) |
70bbeb29 CL |
89 | { |
90 | struct dm_delay_info *delayed, *next; | |
38cfff56 | 91 | struct bio_list flush_bio_list; |
ccadc8a2 MP |
92 | unsigned long next_expires = 0; |
93 | bool start_timer = false; | |
38cfff56 | 94 | bio_list_init(&flush_bio_list); |
70bbeb29 CL |
95 | |
96 | mutex_lock(&delayed_bios_lock); | |
97 | list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { | |
ccadc8a2 | 98 | cond_resched(); |
70bbeb29 CL |
99 | if (flush_all || time_after_eq(jiffies, delayed->expires)) { |
100 | struct bio *bio = dm_bio_from_per_bio_data(delayed, | |
101 | sizeof(struct dm_delay_info)); | |
102 | list_del(&delayed->list); | |
38cfff56 | 103 | bio_list_add(&flush_bio_list, bio); |
70bbeb29 | 104 | delayed->class->ops--; |
ccadc8a2 MP |
105 | continue; |
106 | } | |
107 | ||
108 | if (!delay_is_fast(dc)) { | |
109 | if (!start_timer) { | |
110 | start_timer = true; | |
111 | next_expires = delayed->expires; | |
112 | } else { | |
113 | next_expires = min(next_expires, delayed->expires); | |
114 | } | |
70bbeb29 CL |
115 | } |
116 | } | |
117 | mutex_unlock(&delayed_bios_lock); | |
38cfff56 | 118 | |
ccadc8a2 MP |
119 | if (start_timer) |
120 | queue_timeout(dc, next_expires); | |
121 | ||
38cfff56 | 122 | flush_bios(bio_list_get(&flush_bio_list)); |
70bbeb29 CL |
123 | } |
124 | ||
125 | static int flush_worker_fn(void *data) | |
126 | { | |
127 | struct delay_c *dc = data; | |
128 | ||
38cfff56 | 129 | while (!kthread_should_stop()) { |
ccadc8a2 | 130 | flush_delayed_bios(dc, false); |
38cfff56 | 131 | mutex_lock(&delayed_bios_lock); |
70bbeb29 CL |
132 | if (unlikely(list_empty(&dc->delayed_bios))) { |
133 | set_current_state(TASK_INTERRUPTIBLE); | |
38cfff56 | 134 | mutex_unlock(&delayed_bios_lock); |
70bbeb29 | 135 | schedule(); |
38cfff56 MP |
136 | } else { |
137 | mutex_unlock(&delayed_bios_lock); | |
70bbeb29 | 138 | cond_resched(); |
38cfff56 | 139 | } |
70bbeb29 CL |
140 | } |
141 | ||
142 | return 0; | |
143 | } | |
144 | ||
26b9f228 HM |
145 | static void flush_expired_bios(struct work_struct *work) |
146 | { | |
147 | struct delay_c *dc; | |
148 | ||
149 | dc = container_of(work, struct delay_c, flush_expired_bios); | |
ccadc8a2 | 150 | flush_delayed_bios(dc, false); |
26b9f228 HM |
151 | } |
152 | ||
3876ac76 MP |
153 | static void delay_dtr(struct dm_target *ti) |
154 | { | |
155 | struct delay_c *dc = ti->private; | |
156 | ||
81bc6d15 MP |
157 | if (dc->kdelayd_wq) |
158 | destroy_workqueue(dc->kdelayd_wq); | |
3876ac76 MP |
159 | |
160 | if (dc->read.dev) | |
161 | dm_put_device(ti, dc->read.dev); | |
162 | if (dc->write.dev) | |
163 | dm_put_device(ti, dc->write.dev); | |
cda6b5ab MP |
164 | if (dc->flush.dev) |
165 | dm_put_device(ti, dc->flush.dev); | |
70bbeb29 CL |
166 | if (dc->worker) |
167 | kthread_stop(dc->worker); | |
3876ac76 | 168 | |
38cfff56 | 169 | mutex_destroy(&dc->timer_lock); |
3876ac76 MP |
170 | |
171 | kfree(dc); | |
172 | } | |
173 | ||
174 | static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv) | |
175 | { | |
176 | int ret; | |
177 | unsigned long long tmpll; | |
178 | char dummy; | |
179 | ||
ef87bfc2 | 180 | if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) { |
3876ac76 MP |
181 | ti->error = "Invalid device sector"; |
182 | return -EINVAL; | |
183 | } | |
184 | c->start = tmpll; | |
185 | ||
186 | if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) { | |
187 | ti->error = "Invalid delay"; | |
188 | return -EINVAL; | |
189 | } | |
190 | ||
191 | ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev); | |
192 | if (ret) { | |
193 | ti->error = "Device lookup failed"; | |
194 | return ret; | |
195 | } | |
196 | ||
197 | return 0; | |
198 | } | |
199 | ||
26b9f228 HM |
200 | /* |
201 | * Mapping parameters: | |
202 | * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>] | |
203 | * | |
204 | * With separate write parameters, the first set is only used for reads. | |
f49e869a | 205 | * Offsets are specified in sectors. |
26b9f228 HM |
206 | * Delays are specified in milliseconds. |
207 | */ | |
208 | static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |
209 | { | |
210 | struct delay_c *dc; | |
e80d1c80 | 211 | int ret; |
70bbeb29 | 212 | unsigned int max_delay; |
26b9f228 | 213 | |
cda6b5ab MP |
214 | if (argc != 3 && argc != 6 && argc != 9) { |
215 | ti->error = "Requires exactly 3, 6 or 9 arguments"; | |
26b9f228 HM |
216 | return -EINVAL; |
217 | } | |
218 | ||
3876ac76 | 219 | dc = kzalloc(sizeof(*dc), GFP_KERNEL); |
26b9f228 HM |
220 | if (!dc) { |
221 | ti->error = "Cannot allocate context"; | |
222 | return -ENOMEM; | |
223 | } | |
224 | ||
3876ac76 | 225 | ti->private = dc; |
3876ac76 | 226 | INIT_LIST_HEAD(&dc->delayed_bios); |
38cfff56 | 227 | mutex_init(&dc->timer_lock); |
6fc45b6e | 228 | dc->may_delay = true; |
3876ac76 | 229 | dc->argc = argc; |
26b9f228 | 230 | |
3876ac76 MP |
231 | ret = delay_class_ctr(ti, &dc->read, argv); |
232 | if (ret) | |
26b9f228 | 233 | goto bad; |
70bbeb29 | 234 | max_delay = dc->read.delay; |
26b9f228 | 235 | |
3876ac76 MP |
236 | if (argc == 3) { |
237 | ret = delay_class_ctr(ti, &dc->write, argv); | |
cda6b5ab MP |
238 | if (ret) |
239 | goto bad; | |
240 | ret = delay_class_ctr(ti, &dc->flush, argv); | |
3876ac76 MP |
241 | if (ret) |
242 | goto bad; | |
70bbeb29 CL |
243 | max_delay = max(max_delay, dc->write.delay); |
244 | max_delay = max(max_delay, dc->flush.delay); | |
26b9f228 | 245 | goto out; |
26b9f228 | 246 | } |
26b9f228 | 247 | |
3876ac76 MP |
248 | ret = delay_class_ctr(ti, &dc->write, argv + 3); |
249 | if (ret) | |
250 | goto bad; | |
cda6b5ab MP |
251 | if (argc == 6) { |
252 | ret = delay_class_ctr(ti, &dc->flush, argv + 3); | |
253 | if (ret) | |
254 | goto bad; | |
70bbeb29 | 255 | max_delay = max(max_delay, dc->flush.delay); |
cda6b5ab MP |
256 | goto out; |
257 | } | |
258 | ||
259 | ret = delay_class_ctr(ti, &dc->flush, argv + 6); | |
260 | if (ret) | |
261 | goto bad; | |
70bbeb29 | 262 | max_delay = max(max_delay, dc->flush.delay); |
26b9f228 HM |
263 | |
264 | out: | |
70bbeb29 CL |
265 | if (max_delay < 50) { |
266 | /* | |
267 | * In case of small requested delays, use kthread instead of | |
268 | * timers and workqueue to achieve better latency. | |
269 | */ | |
270 | dc->worker = kthread_create(&flush_worker_fn, dc, | |
271 | "dm-delay-flush-worker"); | |
272 | if (IS_ERR(dc->worker)) { | |
273 | ret = PTR_ERR(dc->worker); | |
38cfff56 | 274 | dc->worker = NULL; |
70bbeb29 CL |
275 | goto bad; |
276 | } | |
277 | } else { | |
278 | timer_setup(&dc->delay_timer, handle_delayed_timer, 0); | |
279 | INIT_WORK(&dc->flush_expired_bios, flush_expired_bios); | |
70bbeb29 CL |
280 | dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0); |
281 | if (!dc->kdelayd_wq) { | |
282 | ret = -EINVAL; | |
283 | DMERR("Couldn't start kdelayd"); | |
284 | goto bad; | |
285 | } | |
718822c1 MP |
286 | } |
287 | ||
55a62eef AK |
288 | ti->num_flush_bios = 1; |
289 | ti->num_discard_bios = 1; | |
c3573421 | 290 | ti->accounts_remapped_io = true; |
30187e1d | 291 | ti->per_io_data_size = sizeof(struct dm_delay_info); |
26b9f228 HM |
292 | return 0; |
293 | ||
294 | bad: | |
3876ac76 | 295 | delay_dtr(ti); |
e80d1c80 | 296 | return ret; |
26b9f228 HM |
297 | } |
298 | ||
3876ac76 | 299 | static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio) |
26b9f228 | 300 | { |
028867ac | 301 | struct dm_delay_info *delayed; |
26b9f228 HM |
302 | unsigned long expires = 0; |
303 | ||
6fc45b6e | 304 | if (!c->delay) |
340c9ec0 | 305 | return DM_MAPIO_REMAPPED; |
26b9f228 | 306 | |
42065460 | 307 | delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); |
26b9f228 HM |
308 | |
309 | delayed->context = dc; | |
3876ac76 | 310 | delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay); |
26b9f228 HM |
311 | |
312 | mutex_lock(&delayed_bios_lock); | |
6fc45b6e MP |
313 | if (unlikely(!dc->may_delay)) { |
314 | mutex_unlock(&delayed_bios_lock); | |
315 | return DM_MAPIO_REMAPPED; | |
316 | } | |
3876ac76 | 317 | c->ops++; |
26b9f228 | 318 | list_add_tail(&delayed->list, &dc->delayed_bios); |
26b9f228 HM |
319 | mutex_unlock(&delayed_bios_lock); |
320 | ||
70bbeb29 CL |
321 | if (delay_is_fast(dc)) |
322 | wake_up_process(dc->worker); | |
323 | else | |
324 | queue_timeout(dc, expires); | |
26b9f228 | 325 | |
340c9ec0 | 326 | return DM_MAPIO_SUBMITTED; |
26b9f228 HM |
327 | } |
328 | ||
329 | static void delay_presuspend(struct dm_target *ti) | |
330 | { | |
331 | struct delay_c *dc = ti->private; | |
332 | ||
6fc45b6e MP |
333 | mutex_lock(&delayed_bios_lock); |
334 | dc->may_delay = false; | |
335 | mutex_unlock(&delayed_bios_lock); | |
70bbeb29 | 336 | |
ccadc8a2 | 337 | if (!delay_is_fast(dc)) |
70bbeb29 | 338 | del_timer_sync(&dc->delay_timer); |
ccadc8a2 | 339 | flush_delayed_bios(dc, true); |
26b9f228 HM |
340 | } |
341 | ||
342 | static void delay_resume(struct dm_target *ti) | |
343 | { | |
344 | struct delay_c *dc = ti->private; | |
345 | ||
6fc45b6e | 346 | dc->may_delay = true; |
26b9f228 HM |
347 | } |
348 | ||
7de3ee57 | 349 | static int delay_map(struct dm_target *ti, struct bio *bio) |
26b9f228 HM |
350 | { |
351 | struct delay_c *dc = ti->private; | |
3876ac76 MP |
352 | struct delay_class *c; |
353 | struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info)); | |
26b9f228 | 354 | |
3876ac76 | 355 | if (bio_data_dir(bio) == WRITE) { |
cda6b5ab MP |
356 | if (unlikely(bio->bi_opf & REQ_PREFLUSH)) |
357 | c = &dc->flush; | |
358 | else | |
359 | c = &dc->write; | |
3876ac76 MP |
360 | } else { |
361 | c = &dc->read; | |
26b9f228 | 362 | } |
3876ac76 MP |
363 | delayed->class = c; |
364 | bio_set_dev(bio, c->dev->bdev); | |
e86f2b00 | 365 | bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector); |
26b9f228 | 366 | |
3876ac76 | 367 | return delay_bio(dc, c, bio); |
26b9f228 HM |
368 | } |
369 | ||
3876ac76 MP |
370 | #define DMEMIT_DELAY_CLASS(c) \ |
371 | DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay) | |
372 | ||
fd7c092e | 373 | static void delay_status(struct dm_target *ti, status_type_t type, |
86a3238c | 374 | unsigned int status_flags, char *result, unsigned int maxlen) |
26b9f228 HM |
375 | { |
376 | struct delay_c *dc = ti->private; | |
377 | int sz = 0; | |
378 | ||
379 | switch (type) { | |
380 | case STATUSTYPE_INFO: | |
cda6b5ab | 381 | DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops); |
26b9f228 HM |
382 | break; |
383 | ||
384 | case STATUSTYPE_TABLE: | |
3876ac76 MP |
385 | DMEMIT_DELAY_CLASS(&dc->read); |
386 | if (dc->argc >= 6) { | |
387 | DMEMIT(" "); | |
388 | DMEMIT_DELAY_CLASS(&dc->write); | |
389 | } | |
cda6b5ab MP |
390 | if (dc->argc >= 9) { |
391 | DMEMIT(" "); | |
392 | DMEMIT_DELAY_CLASS(&dc->flush); | |
393 | } | |
26b9f228 | 394 | break; |
8ec45662 TS |
395 | |
396 | case STATUSTYPE_IMA: | |
397 | *result = '\0'; | |
398 | break; | |
26b9f228 | 399 | } |
26b9f228 HM |
400 | } |
401 | ||
af4874e0 MS |
402 | static int delay_iterate_devices(struct dm_target *ti, |
403 | iterate_devices_callout_fn fn, void *data) | |
404 | { | |
405 | struct delay_c *dc = ti->private; | |
406 | int ret = 0; | |
407 | ||
3876ac76 MP |
408 | ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data); |
409 | if (ret) | |
410 | goto out; | |
411 | ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data); | |
af4874e0 MS |
412 | if (ret) |
413 | goto out; | |
cda6b5ab MP |
414 | ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data); |
415 | if (ret) | |
416 | goto out; | |
af4874e0 MS |
417 | |
418 | out: | |
419 | return ret; | |
420 | } | |
421 | ||
26b9f228 HM |
422 | static struct target_type delay_target = { |
423 | .name = "delay", | |
70bbeb29 | 424 | .version = {1, 4, 0}, |
e2460f2a | 425 | .features = DM_TARGET_PASSES_INTEGRITY, |
26b9f228 HM |
426 | .module = THIS_MODULE, |
427 | .ctr = delay_ctr, | |
428 | .dtr = delay_dtr, | |
429 | .map = delay_map, | |
430 | .presuspend = delay_presuspend, | |
431 | .resume = delay_resume, | |
432 | .status = delay_status, | |
af4874e0 | 433 | .iterate_devices = delay_iterate_devices, |
26b9f228 | 434 | }; |
3664ff82 | 435 | module_dm(delay); |
26b9f228 HM |
436 | |
437 | MODULE_DESCRIPTION(DM_NAME " delay target"); | |
438 | MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>"); | |
439 | MODULE_LICENSE("GPL"); |