block: Expose queue nr_zones in sysfs
[linux-2.6-block.git] / block / blk-zoned.c
CommitLineData
6a0cb1bc
HR
1/*
2 * Zoned block device handling
3 *
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
6 *
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/rbtree.h>
14#include <linux/blkdev.h>
15
a2d6b3a2
DLM
16#include "blk.h"
17
6a0cb1bc
HR
18static inline sector_t blk_zone_start(struct request_queue *q,
19 sector_t sector)
20{
f99e8648 21 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
6a0cb1bc
HR
22
23 return sector & ~zone_mask;
24}
25
6cc77e9c
CH
26/*
27 * Return true if a request is a write requests that needs zone write locking.
28 */
29bool blk_req_needs_zone_write_lock(struct request *rq)
30{
31 if (!rq->q->seq_zones_wlock)
32 return false;
33
34 if (blk_rq_is_passthrough(rq))
35 return false;
36
37 switch (req_op(rq)) {
38 case REQ_OP_WRITE_ZEROES:
39 case REQ_OP_WRITE_SAME:
40 case REQ_OP_WRITE:
41 return blk_rq_zone_is_seq(rq);
42 default:
43 return false;
44 }
45}
46EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
47
48void __blk_req_zone_write_lock(struct request *rq)
49{
50 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
51 rq->q->seq_zones_wlock)))
52 return;
53
54 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
55 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
56}
57EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
58
59void __blk_req_zone_write_unlock(struct request *rq)
60{
61 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
62 if (rq->q->seq_zones_wlock)
63 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
64 rq->q->seq_zones_wlock));
65}
66EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
67
a91e1380
DLM
68static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
69 sector_t nr_sectors)
70{
71 unsigned long zone_sectors = blk_queue_zone_sectors(q);
72
73 return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
74}
75
76/**
77 * blkdev_nr_zones - Get number of zones
78 * @bdev: Target block device
79 *
80 * Description:
81 * Return the total number of zones of a zoned block device.
82 * For a regular block device, the number of zones is always 0.
83 */
84unsigned int blkdev_nr_zones(struct block_device *bdev)
85{
86 struct request_queue *q = bdev_get_queue(bdev);
87
88 if (!blk_queue_is_zoned(q))
89 return 0;
90
91 return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
92}
93EXPORT_SYMBOL_GPL(blkdev_nr_zones);
94
6a0cb1bc
HR
95/*
96 * Check that a zone report belongs to the partition.
97 * If yes, fix its start sector and write pointer, copy it in the
98 * zone information array and return true. Return false otherwise.
99 */
100static bool blkdev_report_zone(struct block_device *bdev,
101 struct blk_zone *rep,
102 struct blk_zone *zone)
103{
104 sector_t offset = get_start_sect(bdev);
105
106 if (rep->start < offset)
107 return false;
108
109 rep->start -= offset;
110 if (rep->start + rep->len > bdev->bd_part->nr_sects)
111 return false;
112
113 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
114 rep->wp = rep->start + rep->len;
115 else
116 rep->wp -= offset;
117 memcpy(zone, rep, sizeof(struct blk_zone));
118
119 return true;
120}
121
122/**
123 * blkdev_report_zones - Get zones information
124 * @bdev: Target block device
125 * @sector: Sector from which to report zones
126 * @zones: Array of zone structures where to return the zones information
127 * @nr_zones: Number of zone structures in the zone array
128 * @gfp_mask: Memory allocation flags (for bio_alloc)
129 *
130 * Description:
131 * Get zone information starting from the zone containing @sector.
132 * The number of zone information reported may be less than the number
133 * requested by @nr_zones. The number of zones actually reported is
134 * returned in @nr_zones.
135 */
136int blkdev_report_zones(struct block_device *bdev,
137 sector_t sector,
138 struct blk_zone *zones,
139 unsigned int *nr_zones,
140 gfp_t gfp_mask)
141{
142 struct request_queue *q = bdev_get_queue(bdev);
143 struct blk_zone_report_hdr *hdr;
144 unsigned int nrz = *nr_zones;
145 struct page *page;
146 unsigned int nr_rep;
147 size_t rep_bytes;
148 unsigned int nr_pages;
149 struct bio *bio;
150 struct bio_vec *bv;
151 unsigned int i, n, nz;
152 unsigned int ofst;
153 void *addr;
3c4da758 154 int ret;
6a0cb1bc
HR
155
156 if (!q)
157 return -ENXIO;
158
159 if (!blk_queue_is_zoned(q))
160 return -EOPNOTSUPP;
161
162 if (!nrz)
163 return 0;
164
165 if (sector > bdev->bd_part->nr_sects) {
166 *nr_zones = 0;
167 return 0;
168 }
169
170 /*
171 * The zone report has a header. So make room for it in the
172 * payload. Also make sure that the report fits in a single BIO
173 * that will not be split down the stack.
174 */
175 rep_bytes = sizeof(struct blk_zone_report_hdr) +
176 sizeof(struct blk_zone) * nrz;
177 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
178 if (rep_bytes > (queue_max_sectors(q) << 9))
179 rep_bytes = queue_max_sectors(q) << 9;
180
181 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
182 rep_bytes >> PAGE_SHIFT);
183 nr_pages = min_t(unsigned int, nr_pages,
184 queue_max_segments(q));
185
186 bio = bio_alloc(gfp_mask, nr_pages);
187 if (!bio)
188 return -ENOMEM;
189
74d46992 190 bio_set_dev(bio, bdev);
6a0cb1bc
HR
191 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
192 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
193
194 for (i = 0; i < nr_pages; i++) {
195 page = alloc_page(gfp_mask);
196 if (!page) {
197 ret = -ENOMEM;
198 goto out;
199 }
200 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
201 __free_page(page);
202 break;
203 }
204 }
205
206 if (i == 0)
207 ret = -ENOMEM;
208 else
209 ret = submit_bio_wait(bio);
210 if (ret)
211 goto out;
212
213 /*
214 * Process the report result: skip the header and go through the
215 * reported zones to fixup and fixup the zone information for
216 * partitions. At the same time, return the zone information into
217 * the zone array.
218 */
219 n = 0;
220 nz = 0;
221 nr_rep = 0;
222 bio_for_each_segment_all(bv, bio, i) {
223
224 if (!bv->bv_page)
225 break;
226
227 addr = kmap_atomic(bv->bv_page);
228
229 /* Get header in the first page */
230 ofst = 0;
231 if (!nr_rep) {
f441108f 232 hdr = addr;
6a0cb1bc
HR
233 nr_rep = hdr->nr_zones;
234 ofst = sizeof(struct blk_zone_report_hdr);
235 }
236
237 /* Fixup and report zones */
238 while (ofst < bv->bv_len &&
239 n < nr_rep && nz < nrz) {
240 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
241 nz++;
242 ofst += sizeof(struct blk_zone);
243 n++;
244 }
245
246 kunmap_atomic(addr);
247
248 if (n >= nr_rep || nz >= nrz)
249 break;
250
251 }
252
3c4da758 253 *nr_zones = nz;
6a0cb1bc
HR
254out:
255 bio_for_each_segment_all(bv, bio, i)
256 __free_page(bv->bv_page);
257 bio_put(bio);
258
6a0cb1bc
HR
259 return ret;
260}
261EXPORT_SYMBOL_GPL(blkdev_report_zones);
262
263/**
264 * blkdev_reset_zones - Reset zones write pointer
265 * @bdev: Target block device
266 * @sector: Start sector of the first zone to reset
267 * @nr_sectors: Number of sectors, at least the length of one zone
268 * @gfp_mask: Memory allocation flags (for bio_alloc)
269 *
270 * Description:
271 * Reset the write pointer of the zones contained in the range
272 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
273 * is valid, but the specified range should not contain conventional zones.
274 */
275int blkdev_reset_zones(struct block_device *bdev,
276 sector_t sector, sector_t nr_sectors,
277 gfp_t gfp_mask)
278{
279 struct request_queue *q = bdev_get_queue(bdev);
280 sector_t zone_sectors;
281 sector_t end_sector = sector + nr_sectors;
a2d6b3a2
DLM
282 struct bio *bio = NULL;
283 struct blk_plug plug;
6a0cb1bc
HR
284 int ret;
285
6a0cb1bc
HR
286 if (!blk_queue_is_zoned(q))
287 return -EOPNOTSUPP;
288
a2d6b3a2
DLM
289 if (bdev_read_only(bdev))
290 return -EPERM;
291
292 if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
6a0cb1bc
HR
293 /* Out of range */
294 return -EINVAL;
295
296 /* Check alignment (handle eventual smaller last zone) */
f99e8648 297 zone_sectors = blk_queue_zone_sectors(q);
6a0cb1bc
HR
298 if (sector & (zone_sectors - 1))
299 return -EINVAL;
300
301 if ((nr_sectors & (zone_sectors - 1)) &&
302 end_sector != bdev->bd_part->nr_sects)
303 return -EINVAL;
304
a2d6b3a2 305 blk_start_plug(&plug);
6a0cb1bc
HR
306 while (sector < end_sector) {
307
a2d6b3a2 308 bio = blk_next_bio(bio, 0, gfp_mask);
6a0cb1bc 309 bio->bi_iter.bi_sector = sector;
74d46992 310 bio_set_dev(bio, bdev);
6a0cb1bc
HR
311 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
312
6a0cb1bc
HR
313 sector += zone_sectors;
314
315 /* This may take a while, so be nice to others */
316 cond_resched();
317
318 }
319
a2d6b3a2
DLM
320 ret = submit_bio_wait(bio);
321 bio_put(bio);
322
323 blk_finish_plug(&plug);
324
325 return ret;
6a0cb1bc
HR
326}
327EXPORT_SYMBOL_GPL(blkdev_reset_zones);
3ed05a98 328
56c4bddb 329/*
3ed05a98
ST
330 * BLKREPORTZONE ioctl processing.
331 * Called from blkdev_ioctl.
332 */
333int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
334 unsigned int cmd, unsigned long arg)
335{
336 void __user *argp = (void __user *)arg;
337 struct request_queue *q;
338 struct blk_zone_report rep;
339 struct blk_zone *zones;
340 int ret;
341
342 if (!argp)
343 return -EINVAL;
344
345 q = bdev_get_queue(bdev);
346 if (!q)
347 return -ENXIO;
348
349 if (!blk_queue_is_zoned(q))
350 return -ENOTTY;
351
352 if (!capable(CAP_SYS_ADMIN))
353 return -EACCES;
354
355 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
356 return -EFAULT;
357
358 if (!rep.nr_zones)
359 return -EINVAL;
360
2e85fbaf 361 rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
327ea4ad 362
344476e1
KC
363 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
364 GFP_KERNEL | __GFP_ZERO);
3ed05a98
ST
365 if (!zones)
366 return -ENOMEM;
367
368 ret = blkdev_report_zones(bdev, rep.sector,
369 zones, &rep.nr_zones,
370 GFP_KERNEL);
371 if (ret)
372 goto out;
373
374 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
375 ret = -EFAULT;
376 goto out;
377 }
378
379 if (rep.nr_zones) {
380 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
381 sizeof(struct blk_zone) * rep.nr_zones))
382 ret = -EFAULT;
383 }
384
385 out:
327ea4ad 386 kvfree(zones);
3ed05a98
ST
387
388 return ret;
389}
390
56c4bddb 391/*
3ed05a98
ST
392 * BLKRESETZONE ioctl processing.
393 * Called from blkdev_ioctl.
394 */
395int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
396 unsigned int cmd, unsigned long arg)
397{
398 void __user *argp = (void __user *)arg;
399 struct request_queue *q;
400 struct blk_zone_range zrange;
401
402 if (!argp)
403 return -EINVAL;
404
405 q = bdev_get_queue(bdev);
406 if (!q)
407 return -ENXIO;
408
409 if (!blk_queue_is_zoned(q))
410 return -ENOTTY;
411
412 if (!capable(CAP_SYS_ADMIN))
413 return -EACCES;
414
415 if (!(mode & FMODE_WRITE))
416 return -EBADF;
417
418 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
419 return -EFAULT;
420
421 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
422 GFP_KERNEL);
423}