Commit | Line | Data |
---|---|---|
3bd94003 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
7fc18728 DLM |
2 | /* |
3 | * Copyright (C) 2021 Western Digital Corporation or its affiliates. | |
4 | */ | |
5 | ||
6 | #include <linux/blkdev.h> | |
bb37d772 DLM |
7 | #include <linux/mm.h> |
8 | #include <linux/sched/mm.h> | |
9 | #include <linux/slab.h> | |
e118029c | 10 | #include <linux/bitmap.h> |
7fc18728 DLM |
11 | |
12 | #include "dm-core.h" | |
13 | ||
bb37d772 DLM |
14 | #define DM_MSG_PREFIX "zone" |
15 | ||
16 | #define DM_ZONE_INVALID_WP_OFST UINT_MAX | |
17 | ||
7fc18728 | 18 | /* |
bb37d772 | 19 | * For internal zone reports bypassing the top BIO submission path. |
7fc18728 | 20 | */ |
bb37d772 DLM |
21 | static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, |
22 | sector_t sector, unsigned int nr_zones, | |
23 | report_zones_cb cb, void *data) | |
7fc18728 | 24 | { |
bb37d772 DLM |
25 | struct gendisk *disk = md->disk; |
26 | int ret; | |
7fc18728 DLM |
27 | struct dm_report_zones_args args = { |
28 | .next_sector = sector, | |
29 | .orig_data = data, | |
30 | .orig_cb = cb, | |
31 | }; | |
32 | ||
7fc18728 DLM |
33 | do { |
34 | struct dm_target *tgt; | |
35 | ||
bb37d772 DLM |
36 | tgt = dm_table_find_target(t, args.next_sector); |
37 | if (WARN_ON_ONCE(!tgt->type->report_zones)) | |
38 | return -EIO; | |
7fc18728 DLM |
39 | |
40 | args.tgt = tgt; | |
41 | ret = tgt->type->report_zones(tgt, &args, | |
42 | nr_zones - args.zone_idx); | |
43 | if (ret < 0) | |
bb37d772 | 44 | return ret; |
7fc18728 DLM |
45 | } while (args.zone_idx < nr_zones && |
46 | args.next_sector < get_capacity(disk)); | |
47 | ||
bb37d772 DLM |
48 | return args.zone_idx; |
49 | } | |
50 | ||
51 | /* | |
52 | * User facing dm device block device report zone operation. This calls the | |
53 | * report_zones operation for each target of a device table. This operation is | |
54 | * generally implemented by targets using dm_report_zones(). | |
55 | */ | |
56 | int dm_blk_report_zones(struct gendisk *disk, sector_t sector, | |
57 | unsigned int nr_zones, report_zones_cb cb, void *data) | |
58 | { | |
59 | struct mapped_device *md = disk->private_data; | |
60 | struct dm_table *map; | |
61 | int srcu_idx, ret; | |
62 | ||
63 | if (dm_suspended_md(md)) | |
64 | return -EAGAIN; | |
65 | ||
66 | map = dm_get_live_table(md, &srcu_idx); | |
67 | if (!map) | |
68 | return -EIO; | |
69 | ||
70 | ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); | |
71 | ||
7fc18728 | 72 | dm_put_live_table(md, srcu_idx); |
bb37d772 | 73 | |
7fc18728 DLM |
74 | return ret; |
75 | } | |
76 | ||
912e8875 DLM |
77 | static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, |
78 | void *data) | |
7fc18728 DLM |
79 | { |
80 | struct dm_report_zones_args *args = data; | |
81 | sector_t sector_diff = args->tgt->begin - args->start; | |
82 | ||
83 | /* | |
84 | * Ignore zones beyond the target range. | |
85 | */ | |
86 | if (zone->start >= args->start + args->tgt->len) | |
87 | return 0; | |
88 | ||
89 | /* | |
90 | * Remap the start sector and write pointer position of the zone | |
91 | * to match its position in the target range. | |
92 | */ | |
93 | zone->start += sector_diff; | |
94 | if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { | |
95 | if (zone->cond == BLK_ZONE_COND_FULL) | |
96 | zone->wp = zone->start + zone->len; | |
97 | else if (zone->cond == BLK_ZONE_COND_EMPTY) | |
98 | zone->wp = zone->start; | |
99 | else | |
100 | zone->wp += sector_diff; | |
101 | } | |
102 | ||
103 | args->next_sector = zone->start + zone->len; | |
104 | return args->orig_cb(zone, args->zone_idx++, args->orig_data); | |
105 | } | |
912e8875 DLM |
106 | |
107 | /* | |
108 | * Helper for drivers of zoned targets to implement struct target_type | |
109 | * report_zones operation. | |
110 | */ | |
111 | int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, | |
112 | struct dm_report_zones_args *args, unsigned int nr_zones) | |
113 | { | |
114 | /* | |
115 | * Set the target mapping start sector first so that | |
116 | * dm_report_zones_cb() can correctly remap zone information. | |
117 | */ | |
118 | args->start = start; | |
119 | ||
120 | return blkdev_report_zones(bdev, sector, nr_zones, | |
121 | dm_report_zones_cb, args); | |
122 | } | |
123 | EXPORT_SYMBOL_GPL(dm_report_zones); | |
7fc18728 | 124 | |
bf14e2b2 DLM |
125 | bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) |
126 | { | |
127 | struct request_queue *q = md->queue; | |
128 | ||
129 | if (!blk_queue_is_zoned(q)) | |
130 | return false; | |
131 | ||
132 | switch (bio_op(bio)) { | |
133 | case REQ_OP_WRITE_ZEROES: | |
bf14e2b2 DLM |
134 | case REQ_OP_WRITE: |
135 | return !op_is_flush(bio->bi_opf) && bio_sectors(bio); | |
136 | default: | |
137 | return false; | |
138 | } | |
139 | } | |
140 | ||
bb37d772 | 141 | void dm_cleanup_zoned_dev(struct mapped_device *md) |
7fc18728 | 142 | { |
d86e716a | 143 | if (md->disk) { |
e118029c | 144 | bitmap_free(md->disk->conv_zones_bitmap); |
d86e716a | 145 | md->disk->conv_zones_bitmap = NULL; |
e118029c | 146 | bitmap_free(md->disk->seq_zones_wlock); |
d86e716a | 147 | md->disk->seq_zones_wlock = NULL; |
bb37d772 DLM |
148 | } |
149 | ||
150 | kvfree(md->zwp_offset); | |
151 | md->zwp_offset = NULL; | |
152 | md->nr_zones = 0; | |
153 | } | |
154 | ||
155 | static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) | |
156 | { | |
157 | switch (zone->cond) { | |
158 | case BLK_ZONE_COND_IMP_OPEN: | |
159 | case BLK_ZONE_COND_EXP_OPEN: | |
160 | case BLK_ZONE_COND_CLOSED: | |
161 | return zone->wp - zone->start; | |
162 | case BLK_ZONE_COND_FULL: | |
163 | return zone->len; | |
164 | case BLK_ZONE_COND_EMPTY: | |
165 | case BLK_ZONE_COND_NOT_WP: | |
166 | case BLK_ZONE_COND_OFFLINE: | |
167 | case BLK_ZONE_COND_READONLY: | |
168 | default: | |
169 | /* | |
170 | * Conventional, offline and read-only zones do not have a valid | |
171 | * write pointer. Use 0 as for an empty zone. | |
172 | */ | |
173 | return 0; | |
174 | } | |
175 | } | |
176 | ||
177 | static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, | |
178 | void *data) | |
179 | { | |
180 | struct mapped_device *md = data; | |
d86e716a | 181 | struct gendisk *disk = md->disk; |
bb37d772 DLM |
182 | |
183 | switch (zone->type) { | |
184 | case BLK_ZONE_TYPE_CONVENTIONAL: | |
d86e716a | 185 | if (!disk->conv_zones_bitmap) { |
e118029c CJ |
186 | disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones, |
187 | GFP_NOIO); | |
d86e716a | 188 | if (!disk->conv_zones_bitmap) |
bb37d772 DLM |
189 | return -ENOMEM; |
190 | } | |
d86e716a | 191 | set_bit(idx, disk->conv_zones_bitmap); |
bb37d772 DLM |
192 | break; |
193 | case BLK_ZONE_TYPE_SEQWRITE_REQ: | |
194 | case BLK_ZONE_TYPE_SEQWRITE_PREF: | |
d86e716a | 195 | if (!disk->seq_zones_wlock) { |
e118029c CJ |
196 | disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones, |
197 | GFP_NOIO); | |
d86e716a | 198 | if (!disk->seq_zones_wlock) |
bb37d772 DLM |
199 | return -ENOMEM; |
200 | } | |
201 | if (!md->zwp_offset) { | |
202 | md->zwp_offset = | |
d86e716a | 203 | kvcalloc(disk->nr_zones, sizeof(unsigned int), |
28436ba3 | 204 | GFP_KERNEL); |
bb37d772 DLM |
205 | if (!md->zwp_offset) |
206 | return -ENOMEM; | |
207 | } | |
208 | md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); | |
209 | ||
210 | break; | |
211 | default: | |
212 | DMERR("Invalid zone type 0x%x at sectors %llu", | |
213 | (int)zone->type, zone->start); | |
214 | return -ENODEV; | |
215 | } | |
216 | ||
217 | return 0; | |
218 | } | |
219 | ||
220 | /* | |
221 | * Revalidate the zones of a mapped device to initialize resource necessary | |
222 | * for zone append emulation. Note that we cannot simply use the block layer | |
223 | * blk_revalidate_disk_zones() function here as the mapped device is suspended | |
224 | * (this is called from __bind() context). | |
225 | */ | |
226 | static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) | |
227 | { | |
d86e716a | 228 | struct gendisk *disk = md->disk; |
28436ba3 | 229 | unsigned int noio_flag; |
bb37d772 DLM |
230 | int ret; |
231 | ||
232 | /* | |
233 | * Check if something changed. If yes, cleanup the current resources | |
234 | * and reallocate everything. | |
235 | */ | |
d86e716a | 236 | if (!disk->nr_zones || disk->nr_zones != md->nr_zones) |
bb37d772 DLM |
237 | dm_cleanup_zoned_dev(md); |
238 | if (md->nr_zones) | |
239 | return 0; | |
240 | ||
28436ba3 DLM |
241 | /* |
242 | * Scan all zones to initialize everything. Ensure that all vmalloc | |
243 | * operations in this context are done as if GFP_NOIO was specified. | |
244 | */ | |
245 | noio_flag = memalloc_noio_save(); | |
d86e716a | 246 | ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones, |
bb37d772 | 247 | dm_zone_revalidate_cb, md); |
28436ba3 | 248 | memalloc_noio_restore(noio_flag); |
bb37d772 DLM |
249 | if (ret < 0) |
250 | goto err; | |
d86e716a | 251 | if (ret != disk->nr_zones) { |
bb37d772 DLM |
252 | ret = -EIO; |
253 | goto err; | |
254 | } | |
255 | ||
d86e716a | 256 | md->nr_zones = disk->nr_zones; |
bb37d772 DLM |
257 | |
258 | return 0; | |
259 | ||
260 | err: | |
261 | DMERR("Revalidate zones failed %d", ret); | |
262 | dm_cleanup_zoned_dev(md); | |
263 | return ret; | |
264 | } | |
265 | ||
266 | static int device_not_zone_append_capable(struct dm_target *ti, | |
267 | struct dm_dev *dev, sector_t start, | |
268 | sector_t len, void *data) | |
269 | { | |
edd1dbc8 | 270 | return !bdev_is_zoned(dev->bdev); |
bb37d772 DLM |
271 | } |
272 | ||
273 | static bool dm_table_supports_zone_append(struct dm_table *t) | |
274 | { | |
564b5c54 MS |
275 | for (unsigned int i = 0; i < t->num_targets; i++) { |
276 | struct dm_target *ti = dm_table_get_target(t, i); | |
bb37d772 DLM |
277 | |
278 | if (ti->emulate_zone_append) | |
279 | return false; | |
280 | ||
281 | if (!ti->type->iterate_devices || | |
282 | ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) | |
283 | return false; | |
284 | } | |
285 | ||
286 | return true; | |
287 | } | |
288 | ||
289 | int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) | |
290 | { | |
291 | struct mapped_device *md = t->md; | |
7fc18728 DLM |
292 | |
293 | /* | |
294 | * For a zoned target, the number of zones should be updated for the | |
bb37d772 | 295 | * correct value to be exposed in sysfs queue/nr_zones. |
7fc18728 DLM |
296 | */ |
297 | WARN_ON_ONCE(queue_is_mq(q)); | |
d86e716a | 298 | md->disk->nr_zones = bdev_nr_zones(md->disk->part0); |
bb37d772 DLM |
299 | |
300 | /* Check if zone append is natively supported */ | |
301 | if (dm_table_supports_zone_append(t)) { | |
302 | clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
303 | dm_cleanup_zoned_dev(md); | |
304 | return 0; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Mark the mapped device as needing zone append emulation and | |
309 | * initialize the emulation resources once the capacity is set. | |
310 | */ | |
311 | set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
312 | if (!get_capacity(md->disk)) | |
313 | return 0; | |
314 | ||
315 | return dm_revalidate_zones(md, t); | |
316 | } | |
317 | ||
318 | static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, | |
319 | void *data) | |
320 | { | |
321 | unsigned int *wp_offset = data; | |
322 | ||
323 | *wp_offset = dm_get_zone_wp_offset(zone); | |
324 | ||
325 | return 0; | |
326 | } | |
327 | ||
328 | static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, | |
329 | unsigned int *wp_ofst) | |
330 | { | |
de71973c | 331 | sector_t sector = zno * bdev_zone_sectors(md->disk->part0); |
bb37d772 DLM |
332 | unsigned int noio_flag; |
333 | struct dm_table *t; | |
334 | int srcu_idx, ret; | |
335 | ||
336 | t = dm_get_live_table(md, &srcu_idx); | |
337 | if (!t) | |
338 | return -EIO; | |
339 | ||
340 | /* | |
341 | * Ensure that all memory allocations in this context are done as if | |
342 | * GFP_NOIO was specified. | |
343 | */ | |
344 | noio_flag = memalloc_noio_save(); | |
345 | ret = dm_blk_do_report_zones(md, t, sector, 1, | |
346 | dm_update_zone_wp_offset_cb, wp_ofst); | |
347 | memalloc_noio_restore(noio_flag); | |
348 | ||
349 | dm_put_live_table(md, srcu_idx); | |
350 | ||
351 | if (ret != 1) | |
352 | return -EIO; | |
353 | ||
354 | return 0; | |
355 | } | |
356 | ||
73d7b06e | 357 | struct orig_bio_details { |
8a5a7ce8 | 358 | enum req_op op; |
73d7b06e MS |
359 | unsigned int nr_sectors; |
360 | }; | |
361 | ||
bb37d772 DLM |
362 | /* |
363 | * First phase of BIO mapping for targets with zone append emulation: | |
364 | * check all BIO that change a zone writer pointer and change zone | |
365 | * append operations into regular write operations. | |
366 | */ | |
367 | static bool dm_zone_map_bio_begin(struct mapped_device *md, | |
73d7b06e | 368 | unsigned int zno, struct bio *clone) |
bb37d772 | 369 | { |
de71973c | 370 | sector_t zsectors = bdev_zone_sectors(md->disk->part0); |
bb37d772 DLM |
371 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
372 | ||
373 | /* | |
374 | * If the target zone is in an error state, recover by inspecting the | |
375 | * zone to get its current write pointer position. Note that since the | |
376 | * target zone is already locked, a BIO issuing context should never | |
377 | * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. | |
378 | */ | |
379 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { | |
380 | if (dm_update_zone_wp_offset(md, zno, &zwp_offset)) | |
381 | return false; | |
382 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset); | |
383 | } | |
384 | ||
73d7b06e | 385 | switch (bio_op(clone)) { |
bb37d772 DLM |
386 | case REQ_OP_ZONE_RESET: |
387 | case REQ_OP_ZONE_FINISH: | |
388 | return true; | |
389 | case REQ_OP_WRITE_ZEROES: | |
bb37d772 DLM |
390 | case REQ_OP_WRITE: |
391 | /* Writes must be aligned to the zone write pointer */ | |
392 | if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) | |
393 | return false; | |
394 | break; | |
395 | case REQ_OP_ZONE_APPEND: | |
396 | /* | |
397 | * Change zone append operations into a non-mergeable regular | |
398 | * writes directed at the current write pointer position of the | |
399 | * target zone. | |
400 | */ | |
401 | clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | | |
73d7b06e MS |
402 | (clone->bi_opf & (~REQ_OP_MASK)); |
403 | clone->bi_iter.bi_sector += zwp_offset; | |
bb37d772 DLM |
404 | break; |
405 | default: | |
406 | DMWARN_LIMIT("Invalid BIO operation"); | |
407 | return false; | |
408 | } | |
409 | ||
410 | /* Cannot write to a full zone */ | |
411 | if (zwp_offset >= zsectors) | |
412 | return false; | |
413 | ||
414 | return true; | |
415 | } | |
416 | ||
417 | /* | |
418 | * Second phase of BIO mapping for targets with zone append emulation: | |
419 | * update the zone write pointer offset array to account for the additional | |
420 | * data written to a zone. Note that at this point, the remapped clone BIO | |
421 | * may already have completed, so we do not touch it. | |
422 | */ | |
73d7b06e MS |
423 | static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, |
424 | struct orig_bio_details *orig_bio_details, | |
bb37d772 DLM |
425 | unsigned int nr_sectors) |
426 | { | |
bb37d772 DLM |
427 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
428 | ||
429 | /* The clone BIO may already have been completed and failed */ | |
430 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) | |
431 | return BLK_STS_IOERR; | |
432 | ||
433 | /* Update the zone wp offset */ | |
73d7b06e | 434 | switch (orig_bio_details->op) { |
bb37d772 DLM |
435 | case REQ_OP_ZONE_RESET: |
436 | WRITE_ONCE(md->zwp_offset[zno], 0); | |
437 | return BLK_STS_OK; | |
438 | case REQ_OP_ZONE_FINISH: | |
439 | WRITE_ONCE(md->zwp_offset[zno], | |
de71973c | 440 | bdev_zone_sectors(md->disk->part0)); |
bb37d772 DLM |
441 | return BLK_STS_OK; |
442 | case REQ_OP_WRITE_ZEROES: | |
bb37d772 DLM |
443 | case REQ_OP_WRITE: |
444 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
445 | return BLK_STS_OK; | |
446 | case REQ_OP_ZONE_APPEND: | |
447 | /* | |
448 | * Check that the target did not truncate the write operation | |
449 | * emulating a zone append. | |
450 | */ | |
73d7b06e | 451 | if (nr_sectors != orig_bio_details->nr_sectors) { |
bb37d772 DLM |
452 | DMWARN_LIMIT("Truncated write for zone append"); |
453 | return BLK_STS_IOERR; | |
454 | } | |
455 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
456 | return BLK_STS_OK; | |
457 | default: | |
458 | DMWARN_LIMIT("Invalid BIO operation"); | |
459 | return BLK_STS_IOERR; | |
460 | } | |
461 | } | |
462 | ||
d86e716a CH |
463 | static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, |
464 | struct bio *clone) | |
bb37d772 DLM |
465 | { |
466 | if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) | |
467 | return; | |
468 | ||
d86e716a | 469 | wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); |
bb37d772 DLM |
470 | bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); |
471 | } | |
472 | ||
d86e716a CH |
473 | static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, |
474 | struct bio *clone) | |
bb37d772 DLM |
475 | { |
476 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
477 | return; | |
478 | ||
d86e716a CH |
479 | WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); |
480 | clear_bit_unlock(zno, disk->seq_zones_wlock); | |
bb37d772 | 481 | smp_mb__after_atomic(); |
d86e716a | 482 | wake_up_bit(disk->seq_zones_wlock, zno); |
bb37d772 DLM |
483 | |
484 | bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); | |
485 | } | |
486 | ||
73d7b06e | 487 | static bool dm_need_zone_wp_tracking(struct bio *bio) |
bb37d772 DLM |
488 | { |
489 | /* | |
490 | * Special processing is not needed for operations that do not need the | |
491 | * zone write lock, that is, all operations that target conventional | |
492 | * zones and all operations that do not modify directly a sequential | |
493 | * zone write pointer. | |
494 | */ | |
73d7b06e | 495 | if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) |
bb37d772 | 496 | return false; |
73d7b06e | 497 | switch (bio_op(bio)) { |
bb37d772 | 498 | case REQ_OP_WRITE_ZEROES: |
bb37d772 DLM |
499 | case REQ_OP_WRITE: |
500 | case REQ_OP_ZONE_RESET: | |
501 | case REQ_OP_ZONE_FINISH: | |
502 | case REQ_OP_ZONE_APPEND: | |
73d7b06e | 503 | return bio_zone_is_seq(bio); |
bb37d772 DLM |
504 | default: |
505 | return false; | |
506 | } | |
507 | } | |
508 | ||
509 | /* | |
510 | * Special IO mapping for targets needing zone append emulation. | |
511 | */ | |
512 | int dm_zone_map_bio(struct dm_target_io *tio) | |
513 | { | |
514 | struct dm_io *io = tio->io; | |
515 | struct dm_target *ti = tio->ti; | |
516 | struct mapped_device *md = io->md; | |
bb37d772 | 517 | struct bio *clone = &tio->clone; |
73d7b06e | 518 | struct orig_bio_details orig_bio_details; |
bb37d772 DLM |
519 | unsigned int zno; |
520 | blk_status_t sts; | |
521 | int r; | |
522 | ||
523 | /* | |
524 | * IOs that do not change a zone write pointer do not need | |
525 | * any additional special processing. | |
526 | */ | |
73d7b06e | 527 | if (!dm_need_zone_wp_tracking(clone)) |
bb37d772 DLM |
528 | return ti->type->map(ti, clone); |
529 | ||
530 | /* Lock the target zone */ | |
73d7b06e | 531 | zno = bio_zone_no(clone); |
d86e716a | 532 | dm_zone_lock(md->disk, zno, clone); |
bb37d772 | 533 | |
73d7b06e MS |
534 | orig_bio_details.nr_sectors = bio_sectors(clone); |
535 | orig_bio_details.op = bio_op(clone); | |
536 | ||
bb37d772 DLM |
537 | /* |
538 | * Check that the bio and the target zone write pointer offset are | |
539 | * both valid, and if the bio is a zone append, remap it to a write. | |
540 | */ | |
73d7b06e | 541 | if (!dm_zone_map_bio_begin(md, zno, clone)) { |
d86e716a | 542 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
543 | return DM_MAPIO_KILL; |
544 | } | |
545 | ||
bb37d772 DLM |
546 | /* Let the target do its work */ |
547 | r = ti->type->map(ti, clone); | |
548 | switch (r) { | |
549 | case DM_MAPIO_SUBMITTED: | |
550 | /* | |
551 | * The target submitted the clone BIO. The target zone will | |
552 | * be unlocked on completion of the clone. | |
553 | */ | |
73d7b06e MS |
554 | sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, |
555 | *tio->len_ptr); | |
bb37d772 DLM |
556 | break; |
557 | case DM_MAPIO_REMAPPED: | |
558 | /* | |
559 | * The target only remapped the clone BIO. In case of error, | |
560 | * unlock the target zone here as the clone will not be | |
561 | * submitted. | |
562 | */ | |
73d7b06e MS |
563 | sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, |
564 | *tio->len_ptr); | |
bb37d772 | 565 | if (sts != BLK_STS_OK) |
d86e716a | 566 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
567 | break; |
568 | case DM_MAPIO_REQUEUE: | |
569 | case DM_MAPIO_KILL: | |
570 | default: | |
d86e716a | 571 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
572 | sts = BLK_STS_IOERR; |
573 | break; | |
574 | } | |
575 | ||
bb37d772 DLM |
576 | if (sts != BLK_STS_OK) |
577 | return DM_MAPIO_KILL; | |
578 | ||
579 | return r; | |
580 | } | |
581 | ||
582 | /* | |
583 | * IO completion callback called from clone_endio(). | |
584 | */ | |
585 | void dm_zone_endio(struct dm_io *io, struct bio *clone) | |
586 | { | |
587 | struct mapped_device *md = io->md; | |
de71973c | 588 | struct gendisk *disk = md->disk; |
bb37d772 DLM |
589 | struct bio *orig_bio = io->orig_bio; |
590 | unsigned int zwp_offset; | |
591 | unsigned int zno; | |
592 | ||
593 | /* | |
594 | * For targets that do not emulate zone append, we only need to | |
595 | * handle native zone-append bios. | |
596 | */ | |
597 | if (!dm_emulate_zone_append(md)) { | |
598 | /* | |
599 | * Get the offset within the zone of the written sector | |
600 | * and add that to the original bio sector position. | |
601 | */ | |
602 | if (clone->bi_status == BLK_STS_OK && | |
603 | bio_op(clone) == REQ_OP_ZONE_APPEND) { | |
de71973c CH |
604 | sector_t mask = |
605 | (sector_t)bdev_zone_sectors(disk->part0) - 1; | |
bb37d772 DLM |
606 | |
607 | orig_bio->bi_iter.bi_sector += | |
608 | clone->bi_iter.bi_sector & mask; | |
609 | } | |
610 | ||
611 | return; | |
612 | } | |
613 | ||
614 | /* | |
615 | * For targets that do emulate zone append, if the clone BIO does not | |
616 | * own the target zone write lock, we have nothing to do. | |
617 | */ | |
618 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
619 | return; | |
620 | ||
621 | zno = bio_zone_no(orig_bio); | |
622 | ||
623 | if (clone->bi_status != BLK_STS_OK) { | |
624 | /* | |
625 | * BIOs that modify a zone write pointer may leave the zone | |
626 | * in an unknown state in case of failure (e.g. the write | |
627 | * pointer was only partially advanced). In this case, set | |
628 | * the target zone write pointer as invalid unless it is | |
629 | * already being updated. | |
630 | */ | |
631 | WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); | |
632 | } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { | |
633 | /* | |
634 | * Get the written sector for zone append operation that were | |
635 | * emulated using regular write operations. | |
636 | */ | |
637 | zwp_offset = READ_ONCE(md->zwp_offset[zno]); | |
638 | if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) | |
639 | WRITE_ONCE(md->zwp_offset[zno], | |
640 | DM_ZONE_INVALID_WP_OFST); | |
641 | else | |
642 | orig_bio->bi_iter.bi_sector += | |
643 | zwp_offset - bio_sectors(orig_bio); | |
644 | } | |
645 | ||
d86e716a | 646 | dm_zone_unlock(disk, zno, clone); |
7fc18728 | 647 | } |