Commit | Line | Data |
---|---|---|
a98c5b19 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
89d94756 HR |
2 | /* |
3 | * SCSI Zoned Block commands | |
4 | * | |
5 | * Copyright (C) 2014-2015 SUSE Linux GmbH | |
6 | * Written by: Hannes Reinecke <hare@suse.de> | |
7 | * Modified by: Damien Le Moal <damien.lemoal@hgst.com> | |
8 | * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> | |
89d94756 HR |
9 | */ |
10 | ||
11 | #include <linux/blkdev.h> | |
b091ac61 DLM |
12 | #include <linux/vmalloc.h> |
13 | #include <linux/sched/mm.h> | |
5795eb44 | 14 | #include <linux/mutex.h> |
89d94756 HR |
15 | |
16 | #include <asm/unaligned.h> | |
17 | ||
18 | #include <scsi/scsi.h> | |
19 | #include <scsi/scsi_cmnd.h> | |
89d94756 HR |
20 | |
21 | #include "sd.h" | |
89d94756 | 22 | |
5795eb44 JT |
23 | static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone) |
24 | { | |
25 | if (zone->type == ZBC_ZONE_TYPE_CONV) | |
26 | return 0; | |
27 | ||
28 | switch (zone->cond) { | |
29 | case BLK_ZONE_COND_IMP_OPEN: | |
30 | case BLK_ZONE_COND_EXP_OPEN: | |
31 | case BLK_ZONE_COND_CLOSED: | |
32 | return zone->wp - zone->start; | |
33 | case BLK_ZONE_COND_FULL: | |
34 | return zone->len; | |
35 | case BLK_ZONE_COND_EMPTY: | |
36 | case BLK_ZONE_COND_OFFLINE: | |
37 | case BLK_ZONE_COND_READONLY: | |
38 | default: | |
39 | /* | |
40 | * Offline and read-only zones do not have a valid | |
41 | * write pointer. Use 0 as for an empty zone. | |
42 | */ | |
43 | return 0; | |
44 | } | |
45 | } | |
46 | ||
d4100351 CH |
47 | static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, |
48 | unsigned int idx, report_zones_cb cb, void *data) | |
89d94756 HR |
49 | { |
50 | struct scsi_device *sdp = sdkp->device; | |
d4100351 | 51 | struct blk_zone zone = { 0 }; |
5795eb44 | 52 | int ret; |
89d94756 | 53 | |
d4100351 CH |
54 | zone.type = buf[0] & 0x0f; |
55 | zone.cond = (buf[1] >> 4) & 0xf; | |
89d94756 | 56 | if (buf[1] & 0x01) |
d4100351 | 57 | zone.reset = 1; |
89d94756 | 58 | if (buf[1] & 0x02) |
d4100351 CH |
59 | zone.non_seq = 1; |
60 | ||
61 | zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); | |
82394db7 | 62 | zone.capacity = zone.len; |
d4100351 | 63 | zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); |
13202ebf | 64 | if (zone.cond == ZBC_ZONE_COND_FULL) |
d4100351 | 65 | zone.wp = zone.start + zone.len; |
bf3f120f NC |
66 | else |
67 | zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); | |
d4100351 | 68 | |
5795eb44 JT |
69 | ret = cb(&zone, idx, data); |
70 | if (ret) | |
71 | return ret; | |
72 | ||
73 | if (sdkp->rev_wp_offset) | |
74 | sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone); | |
75 | ||
76 | return 0; | |
89d94756 HR |
77 | } |
78 | ||
79 | /** | |
e76239a3 | 80 | * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. |
e98f42bc | 81 | * @sdkp: The target disk |
b091ac61 | 82 | * @buf: vmalloc-ed buffer to use for the reply |
e98f42bc DLM |
83 | * @buflen: the buffer size |
84 | * @lba: Start LBA of the report | |
d2e428e4 | 85 | * @partial: Do partial report |
e98f42bc DLM |
86 | * |
87 | * For internal use during device validation. | |
d2e428e4 DLM |
88 | * Using partial=true can significantly speed up execution of a report zones |
89 | * command because the disk does not have to count all possible report matching | |
90 | * zones and will only report the count of zones fitting in the command reply | |
91 | * buffer. | |
89d94756 | 92 | */ |
e76239a3 CH |
93 | static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, |
94 | unsigned int buflen, sector_t lba, | |
95 | bool partial) | |
89d94756 HR |
96 | { |
97 | struct scsi_device *sdp = sdkp->device; | |
98 | const int timeout = sdp->request_queue->rq_timeout; | |
99 | struct scsi_sense_hdr sshdr; | |
100 | unsigned char cmd[16]; | |
101 | unsigned int rep_len; | |
102 | int result; | |
103 | ||
104 | memset(cmd, 0, 16); | |
105 | cmd[0] = ZBC_IN; | |
106 | cmd[1] = ZI_REPORT_ZONES; | |
107 | put_unaligned_be64(lba, &cmd[2]); | |
108 | put_unaligned_be32(buflen, &cmd[10]); | |
d2e428e4 DLM |
109 | if (partial) |
110 | cmd[14] = ZBC_REPORT_ZONE_PARTIAL; | |
89d94756 HR |
111 | |
112 | result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, | |
113 | buf, buflen, &sshdr, | |
114 | timeout, SD_MAX_RETRIES, NULL); | |
115 | if (result) { | |
116 | sd_printk(KERN_ERR, sdkp, | |
a35989a0 DLM |
117 | "REPORT ZONES start lba %llu failed\n", lba); |
118 | sd_print_result(sdkp, "REPORT ZONES", result); | |
464a00c9 | 119 | if (result > 0 && scsi_sense_valid(&sshdr)) |
a35989a0 | 120 | sd_print_sense_hdr(sdkp, &sshdr); |
89d94756 HR |
121 | return -EIO; |
122 | } | |
123 | ||
124 | rep_len = get_unaligned_be32(&buf[0]); | |
125 | if (rep_len < 64) { | |
126 | sd_printk(KERN_ERR, sdkp, | |
127 | "REPORT ZONES report invalid length %u\n", | |
128 | rep_len); | |
129 | return -EIO; | |
130 | } | |
131 | ||
132 | return 0; | |
133 | } | |
134 | ||
b091ac61 | 135 | /** |
59863cb5 | 136 | * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. |
b091ac61 DLM |
137 | * @sdkp: The target disk |
138 | * @nr_zones: Maximum number of zones to report | |
139 | * @buflen: Size of the buffer allocated | |
140 | * | |
141 | * Try to allocate a reply buffer for the number of requested zones. | |
142 | * The size of the buffer allocated may be smaller than requested to | |
143 | * satify the device constraint (max_hw_sectors, max_segments, etc). | |
144 | * | |
145 | * Return the address of the allocated buffer and update @buflen with | |
146 | * the size of the allocated buffer. | |
147 | */ | |
148 | static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, | |
149 | unsigned int nr_zones, size_t *buflen) | |
150 | { | |
151 | struct request_queue *q = sdkp->disk->queue; | |
152 | size_t bufsize; | |
153 | void *buf; | |
154 | ||
155 | /* | |
156 | * Report zone buffer size should be at most 64B times the number of | |
7215e909 NA |
157 | * zones requested plus the 64B reply header, but should be aligned |
158 | * to SECTOR_SIZE for ATA devices. | |
b091ac61 DLM |
159 | * Make sure that this size does not exceed the hardware capabilities. |
160 | * Furthermore, since the report zone command cannot be split, make | |
161 | * sure that the allocated buffer can always be mapped by limiting the | |
162 | * number of pages allocated to the HBA max segments limit. | |
163 | */ | |
23a50861 DLM |
164 | nr_zones = min(nr_zones, sdkp->nr_zones); |
165 | bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); | |
b091ac61 DLM |
166 | bufsize = min_t(size_t, bufsize, |
167 | queue_max_hw_sectors(q) << SECTOR_SHIFT); | |
168 | bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); | |
169 | ||
23a50861 DLM |
170 | while (bufsize >= SECTOR_SIZE) { |
171 | buf = __vmalloc(bufsize, | |
88dca4ca | 172 | GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); |
23a50861 DLM |
173 | if (buf) { |
174 | *buflen = bufsize; | |
175 | return buf; | |
176 | } | |
7215e909 | 177 | bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); |
23a50861 | 178 | } |
b091ac61 | 179 | |
23a50861 | 180 | return NULL; |
b091ac61 DLM |
181 | } |
182 | ||
e98f42bc | 183 | /** |
d4100351 CH |
184 | * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. |
185 | * @sdkp: The target disk | |
e98f42bc | 186 | */ |
d4100351 CH |
187 | static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) |
188 | { | |
189 | return logical_to_sectors(sdkp->device, sdkp->zone_blocks); | |
190 | } | |
191 | ||
e76239a3 | 192 | int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, |
d4100351 | 193 | unsigned int nr_zones, report_zones_cb cb, void *data) |
89d94756 | 194 | { |
e76239a3 | 195 | struct scsi_disk *sdkp = scsi_disk(disk); |
51fdaa04 | 196 | sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity); |
d4100351 | 197 | unsigned int nr, i; |
e76239a3 | 198 | unsigned char *buf; |
d4100351 CH |
199 | size_t offset, buflen = 0; |
200 | int zone_idx = 0; | |
201 | int ret; | |
89d94756 HR |
202 | |
203 | if (!sd_is_zoned(sdkp)) | |
204 | /* Not a zoned device */ | |
e76239a3 | 205 | return -EOPNOTSUPP; |
89d94756 | 206 | |
51fdaa04 DLM |
207 | if (!capacity) |
208 | /* Device gone or invalid */ | |
209 | return -ENODEV; | |
210 | ||
d4100351 | 211 | buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); |
e76239a3 CH |
212 | if (!buf) |
213 | return -ENOMEM; | |
89d94756 | 214 | |
51fdaa04 | 215 | while (zone_idx < nr_zones && sector < capacity) { |
d4100351 CH |
216 | ret = sd_zbc_do_report_zones(sdkp, buf, buflen, |
217 | sectors_to_logical(sdkp->device, sector), true); | |
218 | if (ret) | |
219 | goto out; | |
220 | ||
221 | offset = 0; | |
222 | nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); | |
223 | if (!nr) | |
224 | break; | |
225 | ||
226 | for (i = 0; i < nr && zone_idx < nr_zones; i++) { | |
227 | offset += 64; | |
228 | ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, | |
229 | cb, data); | |
230 | if (ret) | |
231 | goto out; | |
232 | zone_idx++; | |
233 | } | |
89d94756 | 234 | |
d4100351 | 235 | sector += sd_zbc_zone_sectors(sdkp) * i; |
e76239a3 | 236 | } |
89d94756 | 237 | |
d4100351 | 238 | ret = zone_idx; |
b091ac61 DLM |
239 | out: |
240 | kvfree(buf); | |
e76239a3 | 241 | return ret; |
89d94756 HR |
242 | } |
243 | ||
02494d35 JT |
244 | static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) |
245 | { | |
5999ccff | 246 | struct request *rq = scsi_cmd_to_rq(cmd); |
f3fa33ac | 247 | struct scsi_disk *sdkp = scsi_disk(rq->q->disk); |
02494d35 JT |
248 | sector_t sector = blk_rq_pos(rq); |
249 | ||
250 | if (!sd_is_zoned(sdkp)) | |
251 | /* Not a zoned device */ | |
252 | return BLK_STS_IOERR; | |
253 | ||
254 | if (sdkp->device->changed) | |
255 | return BLK_STS_IOERR; | |
256 | ||
257 | if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) | |
258 | /* Unaligned request */ | |
259 | return BLK_STS_IOERR; | |
260 | ||
261 | return BLK_STS_OK; | |
262 | } | |
263 | ||
5795eb44 JT |
264 | #define SD_ZBC_INVALID_WP_OFST (~0u) |
265 | #define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1) | |
266 | ||
267 | static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, | |
268 | void *data) | |
269 | { | |
270 | struct scsi_disk *sdkp = data; | |
271 | ||
272 | lockdep_assert_held(&sdkp->zones_wp_offset_lock); | |
273 | ||
274 | sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone); | |
275 | ||
276 | return 0; | |
277 | } | |
278 | ||
279 | static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) | |
280 | { | |
281 | struct scsi_disk *sdkp; | |
2db4215f | 282 | unsigned long flags; |
1d479e6c | 283 | sector_t zno; |
5795eb44 JT |
284 | int ret; |
285 | ||
286 | sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); | |
287 | ||
2db4215f | 288 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
289 | for (zno = 0; zno < sdkp->nr_zones; zno++) { |
290 | if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) | |
291 | continue; | |
292 | ||
2db4215f | 293 | spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
294 | ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf, |
295 | SD_BUF_SIZE, | |
296 | zno * sdkp->zone_blocks, true); | |
2db4215f | 297 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
298 | if (!ret) |
299 | sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64, | |
300 | zno, sd_zbc_update_wp_offset_cb, | |
301 | sdkp); | |
302 | } | |
2db4215f | 303 | spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
304 | |
305 | scsi_device_put(sdkp->device); | |
306 | } | |
307 | ||
308 | /** | |
309 | * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command. | |
310 | * @cmd: the command to setup | |
311 | * @lba: the LBA to patch | |
312 | * @nr_blocks: the number of LBAs to be written | |
313 | * | |
314 | * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND. | |
315 | * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and | |
316 | * patching of the lba for an emulated ZONE_APPEND command. | |
317 | * | |
318 | * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will | |
319 | * schedule a REPORT ZONES command and return BLK_STS_IOERR. | |
320 | */ | |
321 | blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, | |
322 | unsigned int nr_blocks) | |
323 | { | |
5999ccff | 324 | struct request *rq = scsi_cmd_to_rq(cmd); |
f3fa33ac | 325 | struct scsi_disk *sdkp = scsi_disk(rq->q->disk); |
5795eb44 | 326 | unsigned int wp_offset, zno = blk_rq_zone_no(rq); |
2db4215f | 327 | unsigned long flags; |
5795eb44 JT |
328 | blk_status_t ret; |
329 | ||
330 | ret = sd_zbc_cmnd_checks(cmd); | |
331 | if (ret != BLK_STS_OK) | |
332 | return ret; | |
333 | ||
334 | if (!blk_rq_zone_is_seq(rq)) | |
335 | return BLK_STS_IOERR; | |
336 | ||
337 | /* Unlock of the write lock will happen in sd_zbc_complete() */ | |
338 | if (!blk_req_zone_write_trylock(rq)) | |
339 | return BLK_STS_ZONE_RESOURCE; | |
340 | ||
2db4215f | 341 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
342 | wp_offset = sdkp->zones_wp_offset[zno]; |
343 | switch (wp_offset) { | |
344 | case SD_ZBC_INVALID_WP_OFST: | |
345 | /* | |
346 | * We are about to schedule work to update a zone write pointer | |
347 | * offset, which will cause the zone append command to be | |
348 | * requeued. So make sure that the scsi device does not go away | |
349 | * while the work is being processed. | |
350 | */ | |
351 | if (scsi_device_get(sdkp->device)) { | |
352 | ret = BLK_STS_IOERR; | |
353 | break; | |
354 | } | |
355 | sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST; | |
356 | schedule_work(&sdkp->zone_wp_offset_work); | |
357 | fallthrough; | |
358 | case SD_ZBC_UPDATING_WP_OFST: | |
359 | ret = BLK_STS_DEV_RESOURCE; | |
360 | break; | |
361 | default: | |
362 | wp_offset = sectors_to_logical(sdkp->device, wp_offset); | |
363 | if (wp_offset + nr_blocks > sdkp->zone_blocks) { | |
364 | ret = BLK_STS_IOERR; | |
365 | break; | |
366 | } | |
367 | ||
368 | *lba += wp_offset; | |
369 | } | |
2db4215f | 370 | spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
371 | if (ret) |
372 | blk_req_zone_write_unlock(rq); | |
373 | return ret; | |
374 | } | |
375 | ||
e98f42bc | 376 | /** |
ad512f20 AJ |
377 | * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations |
378 | * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. | |
e98f42bc | 379 | * @cmd: the command to setup |
ad512f20 AJ |
380 | * @op: Operation to be performed |
381 | * @all: All zones control | |
e98f42bc | 382 | * |
ad512f20 AJ |
383 | * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, |
384 | * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. | |
e98f42bc | 385 | */ |
ad512f20 AJ |
386 | blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, |
387 | unsigned char op, bool all) | |
89d94756 | 388 | { |
5999ccff | 389 | struct request *rq = scsi_cmd_to_rq(cmd); |
89d94756 | 390 | sector_t sector = blk_rq_pos(rq); |
f3fa33ac | 391 | struct scsi_disk *sdkp = scsi_disk(rq->q->disk); |
89d94756 | 392 | sector_t block = sectors_to_logical(sdkp->device, sector); |
02494d35 | 393 | blk_status_t ret; |
89d94756 | 394 | |
02494d35 JT |
395 | ret = sd_zbc_cmnd_checks(cmd); |
396 | if (ret != BLK_STS_OK) | |
397 | return ret; | |
89d94756 | 398 | |
89d94756 HR |
399 | cmd->cmd_len = 16; |
400 | memset(cmd->cmnd, 0, cmd->cmd_len); | |
401 | cmd->cmnd[0] = ZBC_OUT; | |
ad512f20 | 402 | cmd->cmnd[1] = op; |
d81e9d49 CK |
403 | if (all) |
404 | cmd->cmnd[14] = 0x1; | |
405 | else | |
406 | put_unaligned_be64(block, &cmd->cmnd[2]); | |
89d94756 HR |
407 | |
408 | rq->timeout = SD_TIMEOUT; | |
409 | cmd->sc_data_direction = DMA_NONE; | |
410 | cmd->transfersize = 0; | |
411 | cmd->allowed = 0; | |
412 | ||
159b2cbf | 413 | return BLK_STS_OK; |
89d94756 HR |
414 | } |
415 | ||
5795eb44 JT |
416 | static bool sd_zbc_need_zone_wp_update(struct request *rq) |
417 | { | |
418 | switch (req_op(rq)) { | |
419 | case REQ_OP_ZONE_APPEND: | |
420 | case REQ_OP_ZONE_FINISH: | |
421 | case REQ_OP_ZONE_RESET: | |
422 | case REQ_OP_ZONE_RESET_ALL: | |
423 | return true; | |
424 | case REQ_OP_WRITE: | |
425 | case REQ_OP_WRITE_ZEROES: | |
426 | case REQ_OP_WRITE_SAME: | |
427 | return blk_rq_zone_is_seq(rq); | |
428 | default: | |
429 | return false; | |
430 | } | |
431 | } | |
432 | ||
433 | /** | |
434 | * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion | |
435 | * @cmd: Completed command | |
436 | * @good_bytes: Command reply bytes | |
437 | * | |
438 | * Called from sd_zbc_complete() to handle the update of the cached zone write | |
439 | * pointer value in case an update is needed. | |
440 | */ | |
441 | static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, | |
442 | unsigned int good_bytes) | |
443 | { | |
444 | int result = cmd->result; | |
5999ccff | 445 | struct request *rq = scsi_cmd_to_rq(cmd); |
f3fa33ac | 446 | struct scsi_disk *sdkp = scsi_disk(rq->q->disk); |
5795eb44 JT |
447 | unsigned int zno = blk_rq_zone_no(rq); |
448 | enum req_opf op = req_op(rq); | |
2db4215f | 449 | unsigned long flags; |
5795eb44 JT |
450 | |
451 | /* | |
452 | * If we got an error for a command that needs updating the write | |
453 | * pointer offset cache, we must mark the zone wp offset entry as | |
454 | * invalid to force an update from disk the next time a zone append | |
455 | * command is issued. | |
456 | */ | |
2db4215f | 457 | spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
458 | |
459 | if (result && op != REQ_OP_ZONE_RESET_ALL) { | |
460 | if (op == REQ_OP_ZONE_APPEND) { | |
461 | /* Force complete completion (no retry) */ | |
462 | good_bytes = 0; | |
463 | scsi_set_resid(cmd, blk_rq_bytes(rq)); | |
464 | } | |
465 | ||
466 | /* | |
467 | * Force an update of the zone write pointer offset on | |
468 | * the next zone append access. | |
469 | */ | |
470 | if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) | |
471 | sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST; | |
472 | goto unlock_wp_offset; | |
473 | } | |
474 | ||
475 | switch (op) { | |
476 | case REQ_OP_ZONE_APPEND: | |
477 | rq->__sector += sdkp->zones_wp_offset[zno]; | |
478 | fallthrough; | |
479 | case REQ_OP_WRITE_ZEROES: | |
480 | case REQ_OP_WRITE_SAME: | |
481 | case REQ_OP_WRITE: | |
482 | if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp)) | |
483 | sdkp->zones_wp_offset[zno] += | |
484 | good_bytes >> SECTOR_SHIFT; | |
485 | break; | |
486 | case REQ_OP_ZONE_RESET: | |
487 | sdkp->zones_wp_offset[zno] = 0; | |
488 | break; | |
489 | case REQ_OP_ZONE_FINISH: | |
490 | sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp); | |
491 | break; | |
492 | case REQ_OP_ZONE_RESET_ALL: | |
493 | memset(sdkp->zones_wp_offset, 0, | |
494 | sdkp->nr_zones * sizeof(unsigned int)); | |
495 | break; | |
496 | default: | |
497 | break; | |
498 | } | |
499 | ||
500 | unlock_wp_offset: | |
2db4215f | 501 | spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); |
5795eb44 JT |
502 | |
503 | return good_bytes; | |
504 | } | |
505 | ||
e98f42bc DLM |
506 | /** |
507 | * sd_zbc_complete - ZBC command post processing. | |
508 | * @cmd: Completed command | |
509 | * @good_bytes: Command reply bytes | |
510 | * @sshdr: command sense header | |
511 | * | |
5795eb44 JT |
512 | * Called from sd_done() to handle zone commands errors and updates to the |
513 | * device queue zone write pointer offset cahce. | |
e98f42bc | 514 | */ |
5795eb44 | 515 | unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, |
89d94756 HR |
516 | struct scsi_sense_hdr *sshdr) |
517 | { | |
518 | int result = cmd->result; | |
5999ccff | 519 | struct request *rq = scsi_cmd_to_rq(cmd); |
89d94756 | 520 | |
ad512f20 | 521 | if (op_is_zone_mgmt(req_op(rq)) && |
edc1f543 DLM |
522 | result && |
523 | sshdr->sense_key == ILLEGAL_REQUEST && | |
524 | sshdr->asc == 0x24) { | |
525 | /* | |
ad512f20 AJ |
526 | * INVALID FIELD IN CDB error: a zone management command was |
527 | * attempted on a conventional zone. Nothing to worry about, | |
528 | * so be quiet about the error. | |
edc1f543 DLM |
529 | */ |
530 | rq->rq_flags |= RQF_QUIET; | |
5795eb44 JT |
531 | } else if (sd_zbc_need_zone_wp_update(rq)) |
532 | good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes); | |
533 | ||
534 | if (req_op(rq) == REQ_OP_ZONE_APPEND) | |
535 | blk_req_zone_write_unlock(rq); | |
536 | ||
537 | return good_bytes; | |
89d94756 HR |
538 | } |
539 | ||
540 | /** | |
7f9d35d2 | 541 | * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics |
e98f42bc DLM |
542 | * @sdkp: Target disk |
543 | * @buf: Buffer where to store the VPD page data | |
544 | * | |
7f9d35d2 | 545 | * Read VPD page B6, get information and check that reads are unconstrained. |
89d94756 | 546 | */ |
7f9d35d2 DLM |
547 | static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, |
548 | unsigned char *buf) | |
89d94756 HR |
549 | { |
550 | ||
551 | if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { | |
552 | sd_printk(KERN_NOTICE, sdkp, | |
7f9d35d2 | 553 | "Read zoned characteristics VPD page failed\n"); |
89d94756 HR |
554 | return -ENODEV; |
555 | } | |
556 | ||
557 | if (sdkp->device->type != TYPE_ZBC) { | |
558 | /* Host-aware */ | |
559 | sdkp->urswrz = 1; | |
4a109032 DLM |
560 | sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]); |
561 | sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]); | |
89d94756 HR |
562 | sdkp->zones_max_open = 0; |
563 | } else { | |
564 | /* Host-managed */ | |
565 | sdkp->urswrz = buf[4] & 1; | |
566 | sdkp->zones_optimal_open = 0; | |
567 | sdkp->zones_optimal_nonseq = 0; | |
4a109032 | 568 | sdkp->zones_max_open = get_unaligned_be32(&buf[16]); |
89d94756 HR |
569 | } |
570 | ||
7f9d35d2 DLM |
571 | /* |
572 | * Check for unconstrained reads: host-managed devices with | |
573 | * constrained reads (drives failing read after write pointer) | |
574 | * are not supported. | |
575 | */ | |
576 | if (!sdkp->urswrz) { | |
577 | if (sdkp->first_scan) | |
578 | sd_printk(KERN_NOTICE, sdkp, | |
579 | "constrained reads devices are not supported\n"); | |
580 | return -ENODEV; | |
581 | } | |
582 | ||
89d94756 HR |
583 | return 0; |
584 | } | |
585 | ||
e98f42bc | 586 | /** |
dbfc5626 | 587 | * sd_zbc_check_capacity - Check the device capacity |
e98f42bc | 588 | * @sdkp: Target disk |
dbfc5626 | 589 | * @buf: command buffer |
8df513da | 590 | * @zblocks: zone size in number of blocks |
e98f42bc | 591 | * |
dbfc5626 DLM |
592 | * Get the device zone size and check that the device capacity as reported |
593 | * by READ CAPACITY matches the max_lba value (plus one) of the report zones | |
594 | * command reply for devices with RC_BASIS == 0. | |
ccce20fc | 595 | * |
dbfc5626 | 596 | * Returns 0 upon success or an error code upon failure. |
e98f42bc | 597 | */ |
dbfc5626 DLM |
598 | static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, |
599 | u32 *zblocks) | |
89d94756 | 600 | { |
dbfc5626 | 601 | u64 zone_blocks; |
d9dd7308 | 602 | sector_t max_lba; |
89d94756 | 603 | unsigned char *rec; |
5f832a39 | 604 | int ret; |
b091ac61 | 605 | |
d9dd7308 DLM |
606 | /* Do a report zone to get max_lba and the size of the first zone */ |
607 | ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false); | |
4b433924 | 608 | if (ret) |
d9dd7308 | 609 | return ret; |
89d94756 | 610 | |
d2e428e4 DLM |
611 | if (sdkp->rc_basis == 0) { |
612 | /* The max_lba field is the capacity of this device */ | |
613 | max_lba = get_unaligned_be64(&buf[8]); | |
614 | if (sdkp->capacity != max_lba + 1) { | |
615 | if (sdkp->first_scan) | |
616 | sd_printk(KERN_WARNING, sdkp, | |
617 | "Changing capacity from %llu to max LBA+1 %llu\n", | |
618 | (unsigned long long)sdkp->capacity, | |
619 | (unsigned long long)max_lba + 1); | |
620 | sdkp->capacity = max_lba + 1; | |
621 | } | |
622 | } | |
623 | ||
dbfc5626 | 624 | /* Get the size of the first reported zone */ |
d9dd7308 DLM |
625 | rec = buf + 64; |
626 | zone_blocks = get_unaligned_be64(&rec[8]); | |
d9dd7308 | 627 | if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { |
89d94756 HR |
628 | if (sdkp->first_scan) |
629 | sd_printk(KERN_NOTICE, sdkp, | |
630 | "Zone size too large\n"); | |
d9dd7308 | 631 | return -EFBIG; |
89d94756 HR |
632 | } |
633 | ||
d9dd7308 | 634 | *zblocks = zone_blocks; |
89d94756 | 635 | |
d9dd7308 | 636 | return 0; |
89d94756 HR |
637 | } |
638 | ||
a3d8a257 DLM |
639 | static void sd_zbc_print_zones(struct scsi_disk *sdkp) |
640 | { | |
641 | if (!sd_is_zoned(sdkp) || !sdkp->capacity) | |
642 | return; | |
643 | ||
644 | if (sdkp->capacity & (sdkp->zone_blocks - 1)) | |
645 | sd_printk(KERN_NOTICE, sdkp, | |
646 | "%u zones of %u logical blocks + 1 runt zone\n", | |
647 | sdkp->nr_zones - 1, | |
648 | sdkp->zone_blocks); | |
649 | else | |
650 | sd_printk(KERN_NOTICE, sdkp, | |
651 | "%u zones of %u logical blocks\n", | |
652 | sdkp->nr_zones, | |
653 | sdkp->zone_blocks); | |
654 | } | |
655 | ||
6c5dee18 DLM |
656 | static int sd_zbc_init_disk(struct scsi_disk *sdkp) |
657 | { | |
658 | sdkp->zones_wp_offset = NULL; | |
659 | spin_lock_init(&sdkp->zones_wp_offset_lock); | |
660 | sdkp->rev_wp_offset = NULL; | |
661 | mutex_init(&sdkp->rev_mutex); | |
662 | INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn); | |
663 | sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL); | |
664 | if (!sdkp->zone_wp_update_buf) | |
665 | return -ENOMEM; | |
666 | ||
667 | return 0; | |
668 | } | |
669 | ||
78e1663f | 670 | static void sd_zbc_clear_zone_info(struct scsi_disk *sdkp) |
6c5dee18 | 671 | { |
78e1663f DLM |
672 | /* Serialize against revalidate zones */ |
673 | mutex_lock(&sdkp->rev_mutex); | |
674 | ||
6c5dee18 DLM |
675 | kvfree(sdkp->zones_wp_offset); |
676 | sdkp->zones_wp_offset = NULL; | |
677 | kfree(sdkp->zone_wp_update_buf); | |
678 | sdkp->zone_wp_update_buf = NULL; | |
78e1663f DLM |
679 | |
680 | sdkp->nr_zones = 0; | |
681 | sdkp->rev_nr_zones = 0; | |
682 | sdkp->zone_blocks = 0; | |
683 | sdkp->rev_zone_blocks = 0; | |
684 | ||
685 | mutex_unlock(&sdkp->rev_mutex); | |
686 | } | |
687 | ||
688 | void sd_zbc_release_disk(struct scsi_disk *sdkp) | |
689 | { | |
690 | if (sd_is_zoned(sdkp)) | |
691 | sd_zbc_clear_zone_info(sdkp); | |
6c5dee18 DLM |
692 | } |
693 | ||
5795eb44 JT |
694 | static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) |
695 | { | |
696 | struct scsi_disk *sdkp = scsi_disk(disk); | |
697 | ||
698 | swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset); | |
699 | } | |
700 | ||
a3d8a257 | 701 | int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) |
5795eb44 JT |
702 | { |
703 | struct gendisk *disk = sdkp->disk; | |
a3d8a257 DLM |
704 | struct request_queue *q = disk->queue; |
705 | u32 zone_blocks = sdkp->rev_zone_blocks; | |
706 | unsigned int nr_zones = sdkp->rev_nr_zones; | |
707 | u32 max_append; | |
5795eb44 | 708 | int ret = 0; |
9acced3f | 709 | unsigned int flags; |
5795eb44 | 710 | |
6c5dee18 DLM |
711 | /* |
712 | * For all zoned disks, initialize zone append emulation data if not | |
713 | * already done. This is necessary also for host-aware disks used as | |
714 | * regular disks due to the presence of partitions as these partitions | |
715 | * may be deleted and the disk zoned model changed back from | |
716 | * BLK_ZONED_NONE to BLK_ZONED_HA. | |
717 | */ | |
718 | if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) { | |
719 | ret = sd_zbc_init_disk(sdkp); | |
720 | if (ret) | |
721 | return ret; | |
722 | } | |
723 | ||
27ba3e8f DLM |
724 | /* |
725 | * There is nothing to do for regular disks, including host-aware disks | |
726 | * that have partitions. | |
727 | */ | |
728 | if (!blk_queue_is_zoned(q)) | |
a3d8a257 DLM |
729 | return 0; |
730 | ||
5795eb44 JT |
731 | /* |
732 | * Make sure revalidate zones are serialized to ensure exclusive | |
733 | * updates of the scsi disk data. | |
734 | */ | |
735 | mutex_lock(&sdkp->rev_mutex); | |
736 | ||
5795eb44 JT |
737 | if (sdkp->zone_blocks == zone_blocks && |
738 | sdkp->nr_zones == nr_zones && | |
739 | disk->queue->nr_zones == nr_zones) | |
740 | goto unlock; | |
741 | ||
9acced3f | 742 | flags = memalloc_noio_save(); |
a3d8a257 DLM |
743 | sdkp->zone_blocks = zone_blocks; |
744 | sdkp->nr_zones = nr_zones; | |
9acced3f | 745 | sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL); |
5795eb44 JT |
746 | if (!sdkp->rev_wp_offset) { |
747 | ret = -ENOMEM; | |
9acced3f | 748 | memalloc_noio_restore(flags); |
5795eb44 JT |
749 | goto unlock; |
750 | } | |
751 | ||
752 | ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb); | |
753 | ||
9acced3f | 754 | memalloc_noio_restore(flags); |
5795eb44 JT |
755 | kvfree(sdkp->rev_wp_offset); |
756 | sdkp->rev_wp_offset = NULL; | |
757 | ||
a3d8a257 DLM |
758 | if (ret) { |
759 | sdkp->zone_blocks = 0; | |
760 | sdkp->nr_zones = 0; | |
761 | sdkp->capacity = 0; | |
762 | goto unlock; | |
763 | } | |
764 | ||
765 | max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks), | |
766 | q->limits.max_segments << (PAGE_SHIFT - 9)); | |
767 | max_append = min_t(u32, max_append, queue_max_hw_sectors(q)); | |
768 | ||
769 | blk_queue_max_zone_append_sectors(q, max_append); | |
770 | ||
771 | sd_zbc_print_zones(sdkp); | |
772 | ||
5795eb44 JT |
773 | unlock: |
774 | mutex_unlock(&sdkp->rev_mutex); | |
775 | ||
776 | return ret; | |
777 | } | |
778 | ||
e98f42bc | 779 | int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) |
89d94756 | 780 | { |
bf505456 | 781 | struct gendisk *disk = sdkp->disk; |
5795eb44 | 782 | struct request_queue *q = disk->queue; |
bf505456 | 783 | unsigned int nr_zones; |
0cdc5858 | 784 | u32 zone_blocks = 0; |
f7053240 | 785 | int ret; |
89d94756 HR |
786 | |
787 | if (!sd_is_zoned(sdkp)) | |
788 | /* | |
789 | * Device managed or normal SCSI disk, | |
790 | * no special handling required | |
791 | */ | |
792 | return 0; | |
793 | ||
78e1663f DLM |
794 | /* READ16/WRITE16 is mandatory for ZBC disks */ |
795 | sdkp->device->use_16_for_rw = 1; | |
796 | sdkp->device->use_10_for_rw = 0; | |
797 | ||
798 | if (!blk_queue_is_zoned(q)) { | |
799 | /* | |
800 | * This can happen for a host aware disk with partitions. | |
801 | * The block device zone information was already cleared | |
802 | * by blk_queue_set_zoned(). Only clear the scsi disk zone | |
803 | * information and exit early. | |
804 | */ | |
805 | sd_zbc_clear_zone_info(sdkp); | |
806 | return 0; | |
807 | } | |
808 | ||
7f9d35d2 DLM |
809 | /* Check zoned block device characteristics (unconstrained reads) */ |
810 | ret = sd_zbc_check_zoned_characteristics(sdkp, buf); | |
89d94756 HR |
811 | if (ret) |
812 | goto err; | |
813 | ||
dbfc5626 DLM |
814 | /* Check the device capacity reported by report zones */ |
815 | ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks); | |
5f832a39 | 816 | if (ret != 0) |
89d94756 HR |
817 | goto err; |
818 | ||
819 | /* The drive satisfies the kernel restrictions: set it up */ | |
5795eb44 JT |
820 | blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); |
821 | blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); | |
e15864f8 NC |
822 | if (sdkp->zones_max_open == U32_MAX) |
823 | blk_queue_max_open_zones(q, 0); | |
824 | else | |
825 | blk_queue_max_open_zones(q, sdkp->zones_max_open); | |
659bf827 | 826 | blk_queue_max_active_zones(q, 0); |
bf505456 DLM |
827 | nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); |
828 | ||
a805a4fa DLM |
829 | /* |
830 | * Per ZBC and ZAC specifications, writes in sequential write required | |
831 | * zones of host-managed devices must be aligned to the device physical | |
832 | * block size. | |
833 | */ | |
834 | if (blk_queue_zoned_model(q) == BLK_ZONED_HM) | |
835 | blk_queue_zone_write_granularity(q, sdkp->physical_block_size); | |
836 | ||
a3d8a257 DLM |
837 | sdkp->rev_nr_zones = nr_zones; |
838 | sdkp->rev_zone_blocks = zone_blocks; | |
89d94756 HR |
839 | |
840 | return 0; | |
841 | ||
842 | err: | |
843 | sdkp->capacity = 0; | |
844 | ||
845 | return ret; | |
846 | } |