Commit | Line | Data |
---|---|---|
3bd94003 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
7fc18728 DLM |
2 | /* |
3 | * Copyright (C) 2021 Western Digital Corporation or its affiliates. | |
4 | */ | |
5 | ||
6 | #include <linux/blkdev.h> | |
bb37d772 DLM |
7 | #include <linux/mm.h> |
8 | #include <linux/sched/mm.h> | |
9 | #include <linux/slab.h> | |
7fc18728 DLM |
10 | |
11 | #include "dm-core.h" | |
12 | ||
bb37d772 DLM |
13 | #define DM_MSG_PREFIX "zone" |
14 | ||
15 | #define DM_ZONE_INVALID_WP_OFST UINT_MAX | |
16 | ||
7fc18728 | 17 | /* |
bb37d772 | 18 | * For internal zone reports bypassing the top BIO submission path. |
7fc18728 | 19 | */ |
bb37d772 DLM |
20 | static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, |
21 | sector_t sector, unsigned int nr_zones, | |
22 | report_zones_cb cb, void *data) | |
7fc18728 | 23 | { |
bb37d772 DLM |
24 | struct gendisk *disk = md->disk; |
25 | int ret; | |
7fc18728 DLM |
26 | struct dm_report_zones_args args = { |
27 | .next_sector = sector, | |
28 | .orig_data = data, | |
29 | .orig_cb = cb, | |
30 | }; | |
31 | ||
7fc18728 DLM |
32 | do { |
33 | struct dm_target *tgt; | |
34 | ||
bb37d772 DLM |
35 | tgt = dm_table_find_target(t, args.next_sector); |
36 | if (WARN_ON_ONCE(!tgt->type->report_zones)) | |
37 | return -EIO; | |
7fc18728 DLM |
38 | |
39 | args.tgt = tgt; | |
40 | ret = tgt->type->report_zones(tgt, &args, | |
41 | nr_zones - args.zone_idx); | |
42 | if (ret < 0) | |
bb37d772 | 43 | return ret; |
7fc18728 DLM |
44 | } while (args.zone_idx < nr_zones && |
45 | args.next_sector < get_capacity(disk)); | |
46 | ||
bb37d772 DLM |
47 | return args.zone_idx; |
48 | } | |
49 | ||
50 | /* | |
51 | * User facing dm device block device report zone operation. This calls the | |
52 | * report_zones operation for each target of a device table. This operation is | |
53 | * generally implemented by targets using dm_report_zones(). | |
54 | */ | |
55 | int dm_blk_report_zones(struct gendisk *disk, sector_t sector, | |
56 | unsigned int nr_zones, report_zones_cb cb, void *data) | |
57 | { | |
58 | struct mapped_device *md = disk->private_data; | |
59 | struct dm_table *map; | |
60 | int srcu_idx, ret; | |
61 | ||
62 | if (dm_suspended_md(md)) | |
63 | return -EAGAIN; | |
64 | ||
65 | map = dm_get_live_table(md, &srcu_idx); | |
66 | if (!map) | |
67 | return -EIO; | |
68 | ||
69 | ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); | |
70 | ||
7fc18728 | 71 | dm_put_live_table(md, srcu_idx); |
bb37d772 | 72 | |
7fc18728 DLM |
73 | return ret; |
74 | } | |
75 | ||
912e8875 DLM |
76 | static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, |
77 | void *data) | |
7fc18728 DLM |
78 | { |
79 | struct dm_report_zones_args *args = data; | |
80 | sector_t sector_diff = args->tgt->begin - args->start; | |
81 | ||
82 | /* | |
83 | * Ignore zones beyond the target range. | |
84 | */ | |
85 | if (zone->start >= args->start + args->tgt->len) | |
86 | return 0; | |
87 | ||
88 | /* | |
89 | * Remap the start sector and write pointer position of the zone | |
90 | * to match its position in the target range. | |
91 | */ | |
92 | zone->start += sector_diff; | |
93 | if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { | |
94 | if (zone->cond == BLK_ZONE_COND_FULL) | |
95 | zone->wp = zone->start + zone->len; | |
96 | else if (zone->cond == BLK_ZONE_COND_EMPTY) | |
97 | zone->wp = zone->start; | |
98 | else | |
99 | zone->wp += sector_diff; | |
100 | } | |
101 | ||
102 | args->next_sector = zone->start + zone->len; | |
103 | return args->orig_cb(zone, args->zone_idx++, args->orig_data); | |
104 | } | |
912e8875 DLM |
105 | |
106 | /* | |
107 | * Helper for drivers of zoned targets to implement struct target_type | |
108 | * report_zones operation. | |
109 | */ | |
110 | int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, | |
111 | struct dm_report_zones_args *args, unsigned int nr_zones) | |
112 | { | |
113 | /* | |
114 | * Set the target mapping start sector first so that | |
115 | * dm_report_zones_cb() can correctly remap zone information. | |
116 | */ | |
117 | args->start = start; | |
118 | ||
119 | return blkdev_report_zones(bdev, sector, nr_zones, | |
120 | dm_report_zones_cb, args); | |
121 | } | |
122 | EXPORT_SYMBOL_GPL(dm_report_zones); | |
7fc18728 | 123 | |
bf14e2b2 DLM |
124 | bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) |
125 | { | |
126 | struct request_queue *q = md->queue; | |
127 | ||
128 | if (!blk_queue_is_zoned(q)) | |
129 | return false; | |
130 | ||
131 | switch (bio_op(bio)) { | |
132 | case REQ_OP_WRITE_ZEROES: | |
bf14e2b2 DLM |
133 | case REQ_OP_WRITE: |
134 | return !op_is_flush(bio->bi_opf) && bio_sectors(bio); | |
135 | default: | |
136 | return false; | |
137 | } | |
138 | } | |
139 | ||
bb37d772 | 140 | void dm_cleanup_zoned_dev(struct mapped_device *md) |
7fc18728 | 141 | { |
d86e716a CH |
142 | if (md->disk) { |
143 | kfree(md->disk->conv_zones_bitmap); | |
144 | md->disk->conv_zones_bitmap = NULL; | |
145 | kfree(md->disk->seq_zones_wlock); | |
146 | md->disk->seq_zones_wlock = NULL; | |
bb37d772 DLM |
147 | } |
148 | ||
149 | kvfree(md->zwp_offset); | |
150 | md->zwp_offset = NULL; | |
151 | md->nr_zones = 0; | |
152 | } | |
153 | ||
154 | static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone) | |
155 | { | |
156 | switch (zone->cond) { | |
157 | case BLK_ZONE_COND_IMP_OPEN: | |
158 | case BLK_ZONE_COND_EXP_OPEN: | |
159 | case BLK_ZONE_COND_CLOSED: | |
160 | return zone->wp - zone->start; | |
161 | case BLK_ZONE_COND_FULL: | |
162 | return zone->len; | |
163 | case BLK_ZONE_COND_EMPTY: | |
164 | case BLK_ZONE_COND_NOT_WP: | |
165 | case BLK_ZONE_COND_OFFLINE: | |
166 | case BLK_ZONE_COND_READONLY: | |
167 | default: | |
168 | /* | |
169 | * Conventional, offline and read-only zones do not have a valid | |
170 | * write pointer. Use 0 as for an empty zone. | |
171 | */ | |
172 | return 0; | |
173 | } | |
174 | } | |
175 | ||
176 | static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, | |
177 | void *data) | |
178 | { | |
179 | struct mapped_device *md = data; | |
d86e716a | 180 | struct gendisk *disk = md->disk; |
bb37d772 DLM |
181 | |
182 | switch (zone->type) { | |
183 | case BLK_ZONE_TYPE_CONVENTIONAL: | |
d86e716a CH |
184 | if (!disk->conv_zones_bitmap) { |
185 | disk->conv_zones_bitmap = | |
186 | kcalloc(BITS_TO_LONGS(disk->nr_zones), | |
bb37d772 | 187 | sizeof(unsigned long), GFP_NOIO); |
d86e716a | 188 | if (!disk->conv_zones_bitmap) |
bb37d772 DLM |
189 | return -ENOMEM; |
190 | } | |
d86e716a | 191 | set_bit(idx, disk->conv_zones_bitmap); |
bb37d772 DLM |
192 | break; |
193 | case BLK_ZONE_TYPE_SEQWRITE_REQ: | |
194 | case BLK_ZONE_TYPE_SEQWRITE_PREF: | |
d86e716a CH |
195 | if (!disk->seq_zones_wlock) { |
196 | disk->seq_zones_wlock = | |
197 | kcalloc(BITS_TO_LONGS(disk->nr_zones), | |
bb37d772 | 198 | sizeof(unsigned long), GFP_NOIO); |
d86e716a | 199 | if (!disk->seq_zones_wlock) |
bb37d772 DLM |
200 | return -ENOMEM; |
201 | } | |
202 | if (!md->zwp_offset) { | |
203 | md->zwp_offset = | |
d86e716a | 204 | kvcalloc(disk->nr_zones, sizeof(unsigned int), |
28436ba3 | 205 | GFP_KERNEL); |
bb37d772 DLM |
206 | if (!md->zwp_offset) |
207 | return -ENOMEM; | |
208 | } | |
209 | md->zwp_offset[idx] = dm_get_zone_wp_offset(zone); | |
210 | ||
211 | break; | |
212 | default: | |
213 | DMERR("Invalid zone type 0x%x at sectors %llu", | |
214 | (int)zone->type, zone->start); | |
215 | return -ENODEV; | |
216 | } | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
221 | /* | |
222 | * Revalidate the zones of a mapped device to initialize resource necessary | |
223 | * for zone append emulation. Note that we cannot simply use the block layer | |
224 | * blk_revalidate_disk_zones() function here as the mapped device is suspended | |
225 | * (this is called from __bind() context). | |
226 | */ | |
227 | static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) | |
228 | { | |
d86e716a | 229 | struct gendisk *disk = md->disk; |
28436ba3 | 230 | unsigned int noio_flag; |
bb37d772 DLM |
231 | int ret; |
232 | ||
233 | /* | |
234 | * Check if something changed. If yes, cleanup the current resources | |
235 | * and reallocate everything. | |
236 | */ | |
d86e716a | 237 | if (!disk->nr_zones || disk->nr_zones != md->nr_zones) |
bb37d772 DLM |
238 | dm_cleanup_zoned_dev(md); |
239 | if (md->nr_zones) | |
240 | return 0; | |
241 | ||
28436ba3 DLM |
242 | /* |
243 | * Scan all zones to initialize everything. Ensure that all vmalloc | |
244 | * operations in this context are done as if GFP_NOIO was specified. | |
245 | */ | |
246 | noio_flag = memalloc_noio_save(); | |
d86e716a | 247 | ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones, |
bb37d772 | 248 | dm_zone_revalidate_cb, md); |
28436ba3 | 249 | memalloc_noio_restore(noio_flag); |
bb37d772 DLM |
250 | if (ret < 0) |
251 | goto err; | |
d86e716a | 252 | if (ret != disk->nr_zones) { |
bb37d772 DLM |
253 | ret = -EIO; |
254 | goto err; | |
255 | } | |
256 | ||
d86e716a | 257 | md->nr_zones = disk->nr_zones; |
bb37d772 DLM |
258 | |
259 | return 0; | |
260 | ||
261 | err: | |
262 | DMERR("Revalidate zones failed %d", ret); | |
263 | dm_cleanup_zoned_dev(md); | |
264 | return ret; | |
265 | } | |
266 | ||
267 | static int device_not_zone_append_capable(struct dm_target *ti, | |
268 | struct dm_dev *dev, sector_t start, | |
269 | sector_t len, void *data) | |
270 | { | |
edd1dbc8 | 271 | return !bdev_is_zoned(dev->bdev); |
bb37d772 DLM |
272 | } |
273 | ||
274 | static bool dm_table_supports_zone_append(struct dm_table *t) | |
275 | { | |
564b5c54 MS |
276 | for (unsigned int i = 0; i < t->num_targets; i++) { |
277 | struct dm_target *ti = dm_table_get_target(t, i); | |
bb37d772 DLM |
278 | |
279 | if (ti->emulate_zone_append) | |
280 | return false; | |
281 | ||
282 | if (!ti->type->iterate_devices || | |
283 | ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) | |
284 | return false; | |
285 | } | |
286 | ||
287 | return true; | |
288 | } | |
289 | ||
290 | int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) | |
291 | { | |
292 | struct mapped_device *md = t->md; | |
7fc18728 DLM |
293 | |
294 | /* | |
295 | * For a zoned target, the number of zones should be updated for the | |
bb37d772 | 296 | * correct value to be exposed in sysfs queue/nr_zones. |
7fc18728 DLM |
297 | */ |
298 | WARN_ON_ONCE(queue_is_mq(q)); | |
d86e716a | 299 | md->disk->nr_zones = bdev_nr_zones(md->disk->part0); |
bb37d772 DLM |
300 | |
301 | /* Check if zone append is natively supported */ | |
302 | if (dm_table_supports_zone_append(t)) { | |
303 | clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
304 | dm_cleanup_zoned_dev(md); | |
305 | return 0; | |
306 | } | |
307 | ||
308 | /* | |
309 | * Mark the mapped device as needing zone append emulation and | |
310 | * initialize the emulation resources once the capacity is set. | |
311 | */ | |
312 | set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); | |
313 | if (!get_capacity(md->disk)) | |
314 | return 0; | |
315 | ||
316 | return dm_revalidate_zones(md, t); | |
317 | } | |
318 | ||
319 | static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, | |
320 | void *data) | |
321 | { | |
322 | unsigned int *wp_offset = data; | |
323 | ||
324 | *wp_offset = dm_get_zone_wp_offset(zone); | |
325 | ||
326 | return 0; | |
327 | } | |
328 | ||
329 | static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, | |
330 | unsigned int *wp_ofst) | |
331 | { | |
de71973c | 332 | sector_t sector = zno * bdev_zone_sectors(md->disk->part0); |
bb37d772 DLM |
333 | unsigned int noio_flag; |
334 | struct dm_table *t; | |
335 | int srcu_idx, ret; | |
336 | ||
337 | t = dm_get_live_table(md, &srcu_idx); | |
338 | if (!t) | |
339 | return -EIO; | |
340 | ||
341 | /* | |
342 | * Ensure that all memory allocations in this context are done as if | |
343 | * GFP_NOIO was specified. | |
344 | */ | |
345 | noio_flag = memalloc_noio_save(); | |
346 | ret = dm_blk_do_report_zones(md, t, sector, 1, | |
347 | dm_update_zone_wp_offset_cb, wp_ofst); | |
348 | memalloc_noio_restore(noio_flag); | |
349 | ||
350 | dm_put_live_table(md, srcu_idx); | |
351 | ||
352 | if (ret != 1) | |
353 | return -EIO; | |
354 | ||
355 | return 0; | |
356 | } | |
357 | ||
73d7b06e | 358 | struct orig_bio_details { |
8a5a7ce8 | 359 | enum req_op op; |
73d7b06e MS |
360 | unsigned int nr_sectors; |
361 | }; | |
362 | ||
bb37d772 DLM |
363 | /* |
364 | * First phase of BIO mapping for targets with zone append emulation: | |
365 | * check all BIO that change a zone writer pointer and change zone | |
366 | * append operations into regular write operations. | |
367 | */ | |
368 | static bool dm_zone_map_bio_begin(struct mapped_device *md, | |
73d7b06e | 369 | unsigned int zno, struct bio *clone) |
bb37d772 | 370 | { |
de71973c | 371 | sector_t zsectors = bdev_zone_sectors(md->disk->part0); |
bb37d772 DLM |
372 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
373 | ||
374 | /* | |
375 | * If the target zone is in an error state, recover by inspecting the | |
376 | * zone to get its current write pointer position. Note that since the | |
377 | * target zone is already locked, a BIO issuing context should never | |
378 | * see the zone write in the DM_ZONE_UPDATING_WP_OFST state. | |
379 | */ | |
380 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) { | |
381 | if (dm_update_zone_wp_offset(md, zno, &zwp_offset)) | |
382 | return false; | |
383 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset); | |
384 | } | |
385 | ||
73d7b06e | 386 | switch (bio_op(clone)) { |
bb37d772 DLM |
387 | case REQ_OP_ZONE_RESET: |
388 | case REQ_OP_ZONE_FINISH: | |
389 | return true; | |
390 | case REQ_OP_WRITE_ZEROES: | |
bb37d772 DLM |
391 | case REQ_OP_WRITE: |
392 | /* Writes must be aligned to the zone write pointer */ | |
393 | if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset) | |
394 | return false; | |
395 | break; | |
396 | case REQ_OP_ZONE_APPEND: | |
397 | /* | |
398 | * Change zone append operations into a non-mergeable regular | |
399 | * writes directed at the current write pointer position of the | |
400 | * target zone. | |
401 | */ | |
402 | clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE | | |
73d7b06e MS |
403 | (clone->bi_opf & (~REQ_OP_MASK)); |
404 | clone->bi_iter.bi_sector += zwp_offset; | |
bb37d772 DLM |
405 | break; |
406 | default: | |
407 | DMWARN_LIMIT("Invalid BIO operation"); | |
408 | return false; | |
409 | } | |
410 | ||
411 | /* Cannot write to a full zone */ | |
412 | if (zwp_offset >= zsectors) | |
413 | return false; | |
414 | ||
415 | return true; | |
416 | } | |
417 | ||
418 | /* | |
419 | * Second phase of BIO mapping for targets with zone append emulation: | |
420 | * update the zone write pointer offset array to account for the additional | |
421 | * data written to a zone. Note that at this point, the remapped clone BIO | |
422 | * may already have completed, so we do not touch it. | |
423 | */ | |
73d7b06e MS |
424 | static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno, |
425 | struct orig_bio_details *orig_bio_details, | |
bb37d772 DLM |
426 | unsigned int nr_sectors) |
427 | { | |
bb37d772 DLM |
428 | unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); |
429 | ||
430 | /* The clone BIO may already have been completed and failed */ | |
431 | if (zwp_offset == DM_ZONE_INVALID_WP_OFST) | |
432 | return BLK_STS_IOERR; | |
433 | ||
434 | /* Update the zone wp offset */ | |
73d7b06e | 435 | switch (orig_bio_details->op) { |
bb37d772 DLM |
436 | case REQ_OP_ZONE_RESET: |
437 | WRITE_ONCE(md->zwp_offset[zno], 0); | |
438 | return BLK_STS_OK; | |
439 | case REQ_OP_ZONE_FINISH: | |
440 | WRITE_ONCE(md->zwp_offset[zno], | |
de71973c | 441 | bdev_zone_sectors(md->disk->part0)); |
bb37d772 DLM |
442 | return BLK_STS_OK; |
443 | case REQ_OP_WRITE_ZEROES: | |
bb37d772 DLM |
444 | case REQ_OP_WRITE: |
445 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
446 | return BLK_STS_OK; | |
447 | case REQ_OP_ZONE_APPEND: | |
448 | /* | |
449 | * Check that the target did not truncate the write operation | |
450 | * emulating a zone append. | |
451 | */ | |
73d7b06e | 452 | if (nr_sectors != orig_bio_details->nr_sectors) { |
bb37d772 DLM |
453 | DMWARN_LIMIT("Truncated write for zone append"); |
454 | return BLK_STS_IOERR; | |
455 | } | |
456 | WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors); | |
457 | return BLK_STS_OK; | |
458 | default: | |
459 | DMWARN_LIMIT("Invalid BIO operation"); | |
460 | return BLK_STS_IOERR; | |
461 | } | |
462 | } | |
463 | ||
d86e716a CH |
464 | static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, |
465 | struct bio *clone) | |
bb37d772 DLM |
466 | { |
467 | if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) | |
468 | return; | |
469 | ||
d86e716a | 470 | wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); |
bb37d772 DLM |
471 | bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); |
472 | } | |
473 | ||
d86e716a CH |
474 | static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, |
475 | struct bio *clone) | |
bb37d772 DLM |
476 | { |
477 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
478 | return; | |
479 | ||
d86e716a CH |
480 | WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); |
481 | clear_bit_unlock(zno, disk->seq_zones_wlock); | |
bb37d772 | 482 | smp_mb__after_atomic(); |
d86e716a | 483 | wake_up_bit(disk->seq_zones_wlock, zno); |
bb37d772 DLM |
484 | |
485 | bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); | |
486 | } | |
487 | ||
73d7b06e | 488 | static bool dm_need_zone_wp_tracking(struct bio *bio) |
bb37d772 DLM |
489 | { |
490 | /* | |
491 | * Special processing is not needed for operations that do not need the | |
492 | * zone write lock, that is, all operations that target conventional | |
493 | * zones and all operations that do not modify directly a sequential | |
494 | * zone write pointer. | |
495 | */ | |
73d7b06e | 496 | if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) |
bb37d772 | 497 | return false; |
73d7b06e | 498 | switch (bio_op(bio)) { |
bb37d772 | 499 | case REQ_OP_WRITE_ZEROES: |
bb37d772 DLM |
500 | case REQ_OP_WRITE: |
501 | case REQ_OP_ZONE_RESET: | |
502 | case REQ_OP_ZONE_FINISH: | |
503 | case REQ_OP_ZONE_APPEND: | |
73d7b06e | 504 | return bio_zone_is_seq(bio); |
bb37d772 DLM |
505 | default: |
506 | return false; | |
507 | } | |
508 | } | |
509 | ||
510 | /* | |
511 | * Special IO mapping for targets needing zone append emulation. | |
512 | */ | |
513 | int dm_zone_map_bio(struct dm_target_io *tio) | |
514 | { | |
515 | struct dm_io *io = tio->io; | |
516 | struct dm_target *ti = tio->ti; | |
517 | struct mapped_device *md = io->md; | |
bb37d772 | 518 | struct bio *clone = &tio->clone; |
73d7b06e | 519 | struct orig_bio_details orig_bio_details; |
bb37d772 DLM |
520 | unsigned int zno; |
521 | blk_status_t sts; | |
522 | int r; | |
523 | ||
524 | /* | |
525 | * IOs that do not change a zone write pointer do not need | |
526 | * any additional special processing. | |
527 | */ | |
73d7b06e | 528 | if (!dm_need_zone_wp_tracking(clone)) |
bb37d772 DLM |
529 | return ti->type->map(ti, clone); |
530 | ||
531 | /* Lock the target zone */ | |
73d7b06e | 532 | zno = bio_zone_no(clone); |
d86e716a | 533 | dm_zone_lock(md->disk, zno, clone); |
bb37d772 | 534 | |
73d7b06e MS |
535 | orig_bio_details.nr_sectors = bio_sectors(clone); |
536 | orig_bio_details.op = bio_op(clone); | |
537 | ||
bb37d772 DLM |
538 | /* |
539 | * Check that the bio and the target zone write pointer offset are | |
540 | * both valid, and if the bio is a zone append, remap it to a write. | |
541 | */ | |
73d7b06e | 542 | if (!dm_zone_map_bio_begin(md, zno, clone)) { |
d86e716a | 543 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
544 | return DM_MAPIO_KILL; |
545 | } | |
546 | ||
bb37d772 DLM |
547 | /* Let the target do its work */ |
548 | r = ti->type->map(ti, clone); | |
549 | switch (r) { | |
550 | case DM_MAPIO_SUBMITTED: | |
551 | /* | |
552 | * The target submitted the clone BIO. The target zone will | |
553 | * be unlocked on completion of the clone. | |
554 | */ | |
73d7b06e MS |
555 | sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, |
556 | *tio->len_ptr); | |
bb37d772 DLM |
557 | break; |
558 | case DM_MAPIO_REMAPPED: | |
559 | /* | |
560 | * The target only remapped the clone BIO. In case of error, | |
561 | * unlock the target zone here as the clone will not be | |
562 | * submitted. | |
563 | */ | |
73d7b06e MS |
564 | sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, |
565 | *tio->len_ptr); | |
bb37d772 | 566 | if (sts != BLK_STS_OK) |
d86e716a | 567 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
568 | break; |
569 | case DM_MAPIO_REQUEUE: | |
570 | case DM_MAPIO_KILL: | |
571 | default: | |
d86e716a | 572 | dm_zone_unlock(md->disk, zno, clone); |
bb37d772 DLM |
573 | sts = BLK_STS_IOERR; |
574 | break; | |
575 | } | |
576 | ||
bb37d772 DLM |
577 | if (sts != BLK_STS_OK) |
578 | return DM_MAPIO_KILL; | |
579 | ||
580 | return r; | |
581 | } | |
582 | ||
583 | /* | |
584 | * IO completion callback called from clone_endio(). | |
585 | */ | |
586 | void dm_zone_endio(struct dm_io *io, struct bio *clone) | |
587 | { | |
588 | struct mapped_device *md = io->md; | |
de71973c | 589 | struct gendisk *disk = md->disk; |
bb37d772 DLM |
590 | struct bio *orig_bio = io->orig_bio; |
591 | unsigned int zwp_offset; | |
592 | unsigned int zno; | |
593 | ||
594 | /* | |
595 | * For targets that do not emulate zone append, we only need to | |
596 | * handle native zone-append bios. | |
597 | */ | |
598 | if (!dm_emulate_zone_append(md)) { | |
599 | /* | |
600 | * Get the offset within the zone of the written sector | |
601 | * and add that to the original bio sector position. | |
602 | */ | |
603 | if (clone->bi_status == BLK_STS_OK && | |
604 | bio_op(clone) == REQ_OP_ZONE_APPEND) { | |
de71973c CH |
605 | sector_t mask = |
606 | (sector_t)bdev_zone_sectors(disk->part0) - 1; | |
bb37d772 DLM |
607 | |
608 | orig_bio->bi_iter.bi_sector += | |
609 | clone->bi_iter.bi_sector & mask; | |
610 | } | |
611 | ||
612 | return; | |
613 | } | |
614 | ||
615 | /* | |
616 | * For targets that do emulate zone append, if the clone BIO does not | |
617 | * own the target zone write lock, we have nothing to do. | |
618 | */ | |
619 | if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) | |
620 | return; | |
621 | ||
622 | zno = bio_zone_no(orig_bio); | |
623 | ||
624 | if (clone->bi_status != BLK_STS_OK) { | |
625 | /* | |
626 | * BIOs that modify a zone write pointer may leave the zone | |
627 | * in an unknown state in case of failure (e.g. the write | |
628 | * pointer was only partially advanced). In this case, set | |
629 | * the target zone write pointer as invalid unless it is | |
630 | * already being updated. | |
631 | */ | |
632 | WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST); | |
633 | } else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) { | |
634 | /* | |
635 | * Get the written sector for zone append operation that were | |
636 | * emulated using regular write operations. | |
637 | */ | |
638 | zwp_offset = READ_ONCE(md->zwp_offset[zno]); | |
639 | if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio))) | |
640 | WRITE_ONCE(md->zwp_offset[zno], | |
641 | DM_ZONE_INVALID_WP_OFST); | |
642 | else | |
643 | orig_bio->bi_iter.bi_sector += | |
644 | zwp_offset - bio_sectors(orig_bio); | |
645 | } | |
646 | ||
d86e716a | 647 | dm_zone_unlock(disk, zno, clone); |
7fc18728 | 648 | } |