block: add ioctl to read the disk sequence number
[linux-2.6-block.git] / block / genhd.c
CommitLineData
3dcf60bc 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * gendisk handling
7b51e703
CH
4 *
5 * Portions Copyright (C) 2020 Christoph Hellwig
1da177e4
LT
6 */
7
1da177e4 8#include <linux/module.h>
3ad5cee5 9#include <linux/ctype.h>
1da177e4
LT
10#include <linux/fs.h>
11#include <linux/genhd.h>
b446b60e 12#include <linux/kdev_t.h>
1da177e4
LT
13#include <linux/kernel.h>
14#include <linux/blkdev.h>
66114cad 15#include <linux/backing-dev.h>
1da177e4
LT
16#include <linux/init.h>
17#include <linux/spinlock.h>
f500975a 18#include <linux/proc_fs.h>
1da177e4
LT
19#include <linux/seq_file.h>
20#include <linux/slab.h>
21#include <linux/kmod.h>
58383af6 22#include <linux/mutex.h>
bcce3de1 23#include <linux/idr.h>
77ea887e 24#include <linux/log2.h>
25e823c8 25#include <linux/pm_runtime.h>
99e6608c 26#include <linux/badblocks.h>
1da177e4 27
ff88972c
AB
28#include "blk.h"
29
31eb6186 30static struct kobject *block_depr;
1da177e4 31
cf179948
MC
32/*
33 * Unique, monotonically increasing sequential number associated with block
34 * devices instances (i.e. incremented each time a device is attached).
35 * Associating uevents with block devices in userspace is difficult and racy:
36 * the uevent netlink socket is lossy, and on slow and overloaded systems has
37 * a very high latency.
38 * Block devices do not have exclusive owners in userspace, any process can set
39 * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
40 * can be reused again and again).
41 * A userspace process setting up a block device and watching for its events
42 * cannot thus reliably tell whether an event relates to the device it just set
43 * up or another earlier instance with the same name.
44 * This sequential number allows userspace processes to solve this problem, and
45 * uniquely associate an uevent to the lifetime to a device.
46 */
47static atomic64_t diskseq;
48
bcce3de1 49/* for extended dynamic devt allocation, currently only one major is used */
ce23bba8 50#define NR_EXT_DEVT (1 << MINORBITS)
22ae8ce8 51static DEFINE_IDA(ext_devt_ida);
bcce3de1 52
a782483c
CH
53void set_capacity(struct gendisk *disk, sector_t sectors)
54{
cb8432d6 55 struct block_device *bdev = disk->part0;
a782483c 56
0f472277 57 spin_lock(&bdev->bd_size_lock);
a782483c 58 i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
0f472277 59 spin_unlock(&bdev->bd_size_lock);
a782483c
CH
60}
61EXPORT_SYMBOL(set_capacity);
62
e598a72f 63/*
449f4ec9
CH
64 * Set disk capacity and notify if the size is not currently zero and will not
65 * be set to zero. Returns true if a uevent was sent, otherwise false.
e598a72f 66 */
449f4ec9 67bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
e598a72f
BS
68{
69 sector_t capacity = get_capacity(disk);
a782483c 70 char *envp[] = { "RESIZE=1", NULL };
e598a72f
BS
71
72 set_capacity(disk, size);
e598a72f 73
a782483c
CH
74 /*
75 * Only print a message and send a uevent if the gendisk is user visible
76 * and alive. This avoids spamming the log and udev when setting the
77 * initial capacity during probing.
78 */
79 if (size == capacity ||
80 (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
81 return false;
e598a72f 82
a782483c 83 pr_info("%s: detected capacity change from %lld to %lld\n",
452c0bf8 84 disk->disk_name, capacity, size);
7e890c37 85
a782483c
CH
86 /*
87 * Historically we did not send a uevent for changes to/from an empty
88 * device.
89 */
90 if (!capacity || !size)
91 return false;
92 kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
93 return true;
e598a72f 94}
449f4ec9 95EXPORT_SYMBOL_GPL(set_capacity_and_notify);
e598a72f 96
5cbd28e3 97/*
abd2864a
CH
98 * Format the device name of the indicated block device into the supplied buffer
99 * and return a pointer to that same buffer for convenience.
100 *
101 * Note: do not use this in new code, use the %pg specifier to sprintf and
102 * printk insted.
5cbd28e3 103 */
abd2864a 104const char *bdevname(struct block_device *bdev, char *buf)
5cbd28e3 105{
abd2864a
CH
106 struct gendisk *hd = bdev->bd_disk;
107 int partno = bdev->bd_partno;
108
5cbd28e3
CH
109 if (!partno)
110 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
111 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
112 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
113 else
114 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
115
116 return buf;
117}
5cbd28e3 118EXPORT_SYMBOL(bdevname);
e598a72f 119
0d02129e
CH
120static void part_stat_read_all(struct block_device *part,
121 struct disk_stats *stat)
ea18e0f0
KK
122{
123 int cpu;
124
125 memset(stat, 0, sizeof(struct disk_stats));
126 for_each_possible_cpu(cpu) {
0d02129e 127 struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
ea18e0f0
KK
128 int group;
129
130 for (group = 0; group < NR_STAT_GROUPS; group++) {
131 stat->nsecs[group] += ptr->nsecs[group];
132 stat->sectors[group] += ptr->sectors[group];
133 stat->ios[group] += ptr->ios[group];
134 stat->merges[group] += ptr->merges[group];
135 }
136
137 stat->io_ticks += ptr->io_ticks;
ea18e0f0
KK
138 }
139}
ea18e0f0 140
8446fe92 141static unsigned int part_in_flight(struct block_device *part)
f299b7c7 142{
b2f609e1 143 unsigned int inflight = 0;
1226b8dd 144 int cpu;
f299b7c7 145
1226b8dd 146 for_each_possible_cpu(cpu) {
e016b782
MP
147 inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
148 part_stat_local_read_cpu(part, in_flight[1], cpu);
1226b8dd 149 }
e016b782
MP
150 if ((int)inflight < 0)
151 inflight = 0;
1226b8dd 152
e016b782 153 return inflight;
f299b7c7
JA
154}
155
8446fe92
CH
156static void part_in_flight_rw(struct block_device *part,
157 unsigned int inflight[2])
bf0ddaba 158{
1226b8dd
MP
159 int cpu;
160
1226b8dd
MP
161 inflight[0] = 0;
162 inflight[1] = 0;
163 for_each_possible_cpu(cpu) {
164 inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
165 inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
166 }
167 if ((int)inflight[0] < 0)
168 inflight[0] = 0;
169 if ((int)inflight[1] < 0)
170 inflight[1] = 0;
bf0ddaba
OS
171}
172
1da177e4
LT
173/*
174 * Can be deleted altogether. Later.
175 *
176 */
133d55cd 177#define BLKDEV_MAJOR_HASH_SIZE 255
1da177e4
LT
178static struct blk_major_name {
179 struct blk_major_name *next;
180 int major;
181 char name[16];
a160c615 182 void (*probe)(dev_t devt);
68eef3b4 183} *major_names[BLKDEV_MAJOR_HASH_SIZE];
e49fbbbf 184static DEFINE_MUTEX(major_names_lock);
1da177e4
LT
185
186/* index in the above - for now: assume no multimajor ranges */
e61eb2e9 187static inline int major_to_index(unsigned major)
1da177e4 188{
68eef3b4 189 return major % BLKDEV_MAJOR_HASH_SIZE;
7170be5f
NH
190}
191
68eef3b4 192#ifdef CONFIG_PROC_FS
cf771cb5 193void blkdev_show(struct seq_file *seqf, off_t offset)
7170be5f 194{
68eef3b4 195 struct blk_major_name *dp;
7170be5f 196
e49fbbbf 197 mutex_lock(&major_names_lock);
133d55cd
LG
198 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
199 if (dp->major == offset)
cf771cb5 200 seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
e49fbbbf 201 mutex_unlock(&major_names_lock);
1da177e4 202}
68eef3b4 203#endif /* CONFIG_PROC_FS */
1da177e4 204
9e8c0bcc 205/**
e2b6b301 206 * __register_blkdev - register a new block device
9e8c0bcc 207 *
f33ff110
SB
208 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
209 * @major = 0, try to allocate any unused major number.
9e8c0bcc 210 * @name: the name of the new block device as a zero terminated string
e2b6b301 211 * @probe: allback that is called on access to any minor number of @major
9e8c0bcc
MN
212 *
213 * The @name must be unique within the system.
214 *
0e056eb5 215 * The return value depends on the @major input parameter:
216 *
f33ff110
SB
217 * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
218 * then the function returns zero on success, or a negative error code
0e056eb5 219 * - if any unused major number was requested with @major = 0 parameter
9e8c0bcc 220 * then the return value is the allocated major number in range
f33ff110
SB
221 * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
222 *
223 * See Documentation/admin-guide/devices.txt for the list of allocated
224 * major numbers.
e2b6b301
CH
225 *
226 * Use register_blkdev instead for any new code.
9e8c0bcc 227 */
a160c615
CH
228int __register_blkdev(unsigned int major, const char *name,
229 void (*probe)(dev_t devt))
1da177e4
LT
230{
231 struct blk_major_name **n, *p;
232 int index, ret = 0;
233
e49fbbbf 234 mutex_lock(&major_names_lock);
1da177e4
LT
235
236 /* temporary */
237 if (major == 0) {
238 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
239 if (major_names[index] == NULL)
240 break;
241 }
242
243 if (index == 0) {
dfc76d11
KP
244 printk("%s: failed to get major for %s\n",
245 __func__, name);
1da177e4
LT
246 ret = -EBUSY;
247 goto out;
248 }
249 major = index;
250 ret = major;
251 }
252
133d55cd 253 if (major >= BLKDEV_MAJOR_MAX) {
dfc76d11
KP
254 pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
255 __func__, major, BLKDEV_MAJOR_MAX-1, name);
133d55cd
LG
256
257 ret = -EINVAL;
258 goto out;
259 }
260
1da177e4
LT
261 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
262 if (p == NULL) {
263 ret = -ENOMEM;
264 goto out;
265 }
266
267 p->major = major;
a160c615 268 p->probe = probe;
1da177e4
LT
269 strlcpy(p->name, name, sizeof(p->name));
270 p->next = NULL;
271 index = major_to_index(major);
272
273 for (n = &major_names[index]; *n; n = &(*n)->next) {
274 if ((*n)->major == major)
275 break;
276 }
277 if (!*n)
278 *n = p;
279 else
280 ret = -EBUSY;
281
282 if (ret < 0) {
f33ff110 283 printk("register_blkdev: cannot get major %u for %s\n",
1da177e4
LT
284 major, name);
285 kfree(p);
286 }
287out:
e49fbbbf 288 mutex_unlock(&major_names_lock);
1da177e4
LT
289 return ret;
290}
a160c615 291EXPORT_SYMBOL(__register_blkdev);
1da177e4 292
f4480240 293void unregister_blkdev(unsigned int major, const char *name)
1da177e4
LT
294{
295 struct blk_major_name **n;
296 struct blk_major_name *p = NULL;
297 int index = major_to_index(major);
1da177e4 298
e49fbbbf 299 mutex_lock(&major_names_lock);
1da177e4
LT
300 for (n = &major_names[index]; *n; n = &(*n)->next)
301 if ((*n)->major == major)
302 break;
294462a5
AM
303 if (!*n || strcmp((*n)->name, name)) {
304 WARN_ON(1);
294462a5 305 } else {
1da177e4
LT
306 p = *n;
307 *n = p->next;
308 }
e49fbbbf 309 mutex_unlock(&major_names_lock);
1da177e4 310 kfree(p);
1da177e4
LT
311}
312
313EXPORT_SYMBOL(unregister_blkdev);
314
870d6656
TH
315/**
316 * blk_mangle_minor - scatter minor numbers apart
317 * @minor: minor number to mangle
318 *
319 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
320 * is enabled. Mangling twice gives the original value.
321 *
322 * RETURNS:
323 * Mangled value.
324 *
325 * CONTEXT:
326 * Don't care.
327 */
328static int blk_mangle_minor(int minor)
329{
330#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
331 int i;
332
333 for (i = 0; i < MINORBITS / 2; i++) {
334 int low = minor & (1 << i);
335 int high = minor & (1 << (MINORBITS - 1 - i));
336 int distance = MINORBITS - 1 - 2 * i;
337
338 minor ^= low | high; /* clear both bits */
339 low <<= distance; /* swap the positions */
340 high >>= distance;
341 minor |= low | high; /* and set */
342 }
343#endif
344 return minor;
345}
346
7c3f828b 347int blk_alloc_ext_minor(void)
bcce3de1 348{
bab998d6 349 int idx;
bcce3de1 350
22ae8ce8 351 idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
7c3f828b
CH
352 if (idx < 0) {
353 if (idx == -ENOSPC)
354 return -EBUSY;
355 return idx;
356 }
357 return blk_mangle_minor(idx);
bcce3de1
TH
358}
359
7c3f828b 360void blk_free_ext_minor(unsigned int minor)
bcce3de1 361{
7c3f828b 362 ida_free(&ext_devt_ida, blk_mangle_minor(minor));
6fcc44d1
YY
363}
364
1f014290
TH
365static char *bdevt_str(dev_t devt, char *buf)
366{
367 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
368 char tbuf[BDEVT_SIZE];
369 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
370 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
371 } else
372 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
373
374 return buf;
375}
376
bc359d03
CH
377void disk_uevent(struct gendisk *disk, enum kobject_action action)
378{
bc359d03 379 struct block_device *part;
3212135a 380 unsigned long idx;
bc359d03 381
3212135a
CH
382 rcu_read_lock();
383 xa_for_each(&disk->part_tbl, idx, part) {
384 if (bdev_is_partition(part) && !bdev_nr_sectors(part))
385 continue;
498dcc13 386 if (!kobject_get_unless_zero(&part->bd_device.kobj))
3212135a
CH
387 continue;
388
389 rcu_read_unlock();
bc359d03 390 kobject_uevent(bdev_kobj(part), action);
498dcc13 391 put_device(&part->bd_device);
3212135a
CH
392 rcu_read_lock();
393 }
394 rcu_read_unlock();
bc359d03
CH
395}
396EXPORT_SYMBOL_GPL(disk_uevent);
397
9301fe73
CH
398static void disk_scan_partitions(struct gendisk *disk)
399{
400 struct block_device *bdev;
401
402 if (!get_capacity(disk) || !disk_part_scan_enabled(disk))
403 return;
404
405 set_bit(GD_NEED_PART_SCAN, &disk->state);
406 bdev = blkdev_get_by_dev(disk_devt(disk), FMODE_READ, NULL);
407 if (!IS_ERR(bdev))
408 blkdev_put(bdev, FMODE_READ);
409}
410
fef912bf
HR
411static void register_disk(struct device *parent, struct gendisk *disk,
412 const struct attribute_group **groups)
d2bf1b67
TH
413{
414 struct device *ddev = disk_to_dev(disk);
d2bf1b67
TH
415 int err;
416
e63a46be 417 ddev->parent = parent;
d2bf1b67 418
ffc8b308 419 dev_set_name(ddev, "%s", disk->disk_name);
d2bf1b67
TH
420
421 /* delay uevents, until we scanned partition table */
422 dev_set_uevent_suppress(ddev, 1);
423
fef912bf
HR
424 if (groups) {
425 WARN_ON(ddev->groups);
426 ddev->groups = groups;
427 }
d2bf1b67
TH
428 if (device_add(ddev))
429 return;
430 if (!sysfs_deprecated) {
431 err = sysfs_create_link(block_depr, &ddev->kobj,
432 kobject_name(&ddev->kobj));
433 if (err) {
434 device_del(ddev);
435 return;
436 }
437 }
25e823c8
ML
438
439 /*
440 * avoid probable deadlock caused by allocating memory with
441 * GFP_KERNEL in runtime_resume callback of its all ancestor
442 * devices
443 */
444 pm_runtime_set_memalloc_noio(ddev, true);
445
cb8432d6
CH
446 disk->part0->bd_holder_dir =
447 kobject_create_and_add("holders", &ddev->kobj);
d2bf1b67
TH
448 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
449
9ec49144 450 if (disk->flags & GENHD_FL_HIDDEN)
8ddcd653 451 return;
8ddcd653 452
9301fe73 453 disk_scan_partitions(disk);
d2bf1b67 454
bc359d03 455 /* announce the disk and partitions after all partitions are created */
d2bf1b67 456 dev_set_uevent_suppress(ddev, 0);
bc359d03 457 disk_uevent(disk, KOBJ_ADD);
8ddcd653 458
4d7c1d3f 459 if (disk->queue->backing_dev_info->dev) {
460 err = sysfs_create_link(&ddev->kobj,
461 &disk->queue->backing_dev_info->dev->kobj,
462 "bdi");
463 WARN_ON(err);
464 }
d2bf1b67
TH
465}
466
1da177e4 467/**
fa70d2e2 468 * __device_add_disk - add disk information to kernel list
e63a46be 469 * @parent: parent device for the disk
1da177e4 470 * @disk: per-device partitioning information
fef912bf 471 * @groups: Additional per-device sysfs groups
fa70d2e2 472 * @register_queue: register the queue if set to true
1da177e4
LT
473 *
474 * This function registers the partitioning information in @disk
475 * with the kernel.
3e1a7ff8
TH
476 *
477 * FIXME: error handling
1da177e4 478 */
fa70d2e2 479static void __device_add_disk(struct device *parent, struct gendisk *disk,
fef912bf 480 const struct attribute_group **groups,
fa70d2e2 481 bool register_queue)
1da177e4 482{
7c3f828b 483 int ret;
cf0ca9fe 484
737eb78e
DLM
485 /*
486 * The disk queue should now be all set with enough information about
487 * the device for the elevator code to pick an adequate default
488 * elevator if one is needed, that is, for devices requesting queue
489 * registration.
490 */
491 if (register_queue)
492 elevator_init_mq(disk->queue);
493
7c3f828b
CH
494 /*
495 * If the driver provides an explicit major number it also must provide
496 * the number of minors numbers supported, and those will be used to
497 * setup the gendisk.
498 * Otherwise just allocate the device numbers for both the whole device
499 * and all partitions from the extended dev_t space.
3e1a7ff8 500 */
7c3f828b
CH
501 if (disk->major) {
502 WARN_ON(!disk->minors);
2e3c73fa
CH
503
504 if (disk->minors > DISK_MAX_PARTS) {
505 pr_err("block: can't allocate more than %d partitions\n",
506 DISK_MAX_PARTS);
507 disk->minors = DISK_MAX_PARTS;
508 }
7c3f828b
CH
509 } else {
510 WARN_ON(disk->minors);
3e1a7ff8 511
7c3f828b
CH
512 ret = blk_alloc_ext_minor();
513 if (ret < 0) {
514 WARN_ON(1);
515 return;
516 }
517 disk->major = BLOCK_EXT_MAJOR;
518 disk->first_minor = MINOR(ret);
0d1feb72 519 disk->flags |= GENHD_FL_EXT_DEVT;
3e1a7ff8 520 }
7c3f828b
CH
521
522 disk->flags |= GENHD_FL_UP;
3e1a7ff8 523
9f53d2fe
SG
524 disk_alloc_events(disk);
525
8ddcd653
CH
526 if (disk->flags & GENHD_FL_HIDDEN) {
527 /*
528 * Don't let hidden disks show up in /proc/partitions,
529 * and don't bother scanning for partitions either.
530 */
531 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
532 disk->flags |= GENHD_FL_NO_PART_SCAN;
533 } else {
3c5d202b
CH
534 struct backing_dev_info *bdi = disk->queue->backing_dev_info;
535 struct device *dev = disk_to_dev(disk);
3a92168b 536
8ddcd653 537 /* Register BDI before referencing it from bdev */
7c3f828b
CH
538 dev->devt = MKDEV(disk->major, disk->first_minor);
539 ret = bdi_register(bdi, "%u:%u",
540 disk->major, disk->first_minor);
3a92168b 541 WARN_ON(ret);
3c5d202b 542 bdi_set_owner(bdi, dev);
7c3f828b 543 bdev_add(disk->part0, dev->devt);
8ddcd653 544 }
fef912bf 545 register_disk(parent, disk, groups);
fa70d2e2
MS
546 if (register_queue)
547 blk_register_queue(disk);
cf0ca9fe 548
523e1d39
TH
549 /*
550 * Take an extra ref on queue which will be put on disk_release()
551 * so that it sticks around as long as @disk is there.
552 */
958229a7
CH
553 if (blk_get_queue(disk->queue))
554 set_bit(GD_QUEUE_REF, &disk->state);
555 else
556 WARN_ON_ONCE(1);
523e1d39 557
77ea887e 558 disk_add_events(disk);
25520d55 559 blk_integrity_add(disk);
1da177e4 560}
fa70d2e2 561
fef912bf
HR
562void device_add_disk(struct device *parent, struct gendisk *disk,
563 const struct attribute_group **groups)
564
fa70d2e2 565{
fef912bf 566 __device_add_disk(parent, disk, groups, true);
fa70d2e2 567}
e63a46be 568EXPORT_SYMBOL(device_add_disk);
1da177e4 569
fa70d2e2
MS
570void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
571{
fef912bf 572 __device_add_disk(parent, disk, NULL, false);
fa70d2e2
MS
573}
574EXPORT_SYMBOL(device_add_disk_no_queue_reg);
575
b5bd357c
LC
576/**
577 * del_gendisk - remove the gendisk
578 * @disk: the struct gendisk to remove
579 *
580 * Removes the gendisk and all its associated resources. This deletes the
581 * partitions associated with the gendisk, and unregisters the associated
582 * request_queue.
583 *
584 * This is the counter to the respective __device_add_disk() call.
585 *
586 * The final removal of the struct gendisk happens when its refcount reaches 0
587 * with put_disk(), which should be called after del_gendisk(), if
588 * __device_add_disk() was used.
e8c7d14a
LC
589 *
590 * Drivers exist which depend on the release of the gendisk to be synchronous,
591 * it should not be deferred.
592 *
593 * Context: can sleep
b5bd357c 594 */
d2bf1b67 595void del_gendisk(struct gendisk *disk)
1da177e4 596{
e8c7d14a
LC
597 might_sleep();
598
6b3ba976
CH
599 if (WARN_ON_ONCE(!disk->queue))
600 return;
601
25520d55 602 blk_integrity_del(disk);
77ea887e
TH
603 disk_del_events(disk);
604
a8698707 605 mutex_lock(&disk->open_mutex);
d7a66574 606 remove_inode_hash(disk->part0->bd_inode);
6c60ff04 607 disk->flags &= ~GENHD_FL_UP;
d3c4a43d 608 blk_drop_partitions(disk);
a8698707 609 mutex_unlock(&disk->open_mutex);
c76f48eb 610
45611837
CH
611 fsync_bdev(disk->part0);
612 __invalidate_device(disk->part0, true);
613
d2bf1b67 614 set_capacity(disk, 0);
d2bf1b67 615
6b3ba976 616 if (!(disk->flags & GENHD_FL_HIDDEN)) {
8ddcd653 617 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
6b3ba976 618
90f16fdd
JK
619 /*
620 * Unregister bdi before releasing device numbers (as they can
621 * get reused and we'd get clashes in sysfs).
622 */
6b3ba976 623 bdi_unregister(disk->queue->backing_dev_info);
90f16fdd 624 }
d2bf1b67 625
6b3ba976 626 blk_unregister_queue(disk);
d2bf1b67 627
cb8432d6 628 kobject_put(disk->part0->bd_holder_dir);
d2bf1b67 629 kobject_put(disk->slave_dir);
d2bf1b67 630
8446fe92 631 part_stat_set_all(disk->part0, 0);
cb8432d6 632 disk->part0->bd_stamp = 0;
d2bf1b67
TH
633 if (!sysfs_deprecated)
634 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
25e823c8 635 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
d2bf1b67 636 device_del(disk_to_dev(disk));
1da177e4 637}
d2bf1b67 638EXPORT_SYMBOL(del_gendisk);
1da177e4 639
99e6608c
VV
640/* sysfs access to bad-blocks list. */
641static ssize_t disk_badblocks_show(struct device *dev,
642 struct device_attribute *attr,
643 char *page)
644{
645 struct gendisk *disk = dev_to_disk(dev);
646
647 if (!disk->bb)
648 return sprintf(page, "\n");
649
650 return badblocks_show(disk->bb, page, 0);
651}
652
653static ssize_t disk_badblocks_store(struct device *dev,
654 struct device_attribute *attr,
655 const char *page, size_t len)
656{
657 struct gendisk *disk = dev_to_disk(dev);
658
659 if (!disk->bb)
660 return -ENXIO;
661
662 return badblocks_store(disk->bb, page, len, 0);
663}
664
22ae8ce8 665void blk_request_module(dev_t devt)
bd8eff3b 666{
a160c615
CH
667 unsigned int major = MAJOR(devt);
668 struct blk_major_name **n;
669
670 mutex_lock(&major_names_lock);
671 for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
672 if ((*n)->major == major && (*n)->probe) {
673 (*n)->probe(devt);
674 mutex_unlock(&major_names_lock);
675 return;
676 }
677 }
678 mutex_unlock(&major_names_lock);
679
bd8eff3b
CH
680 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
681 /* Make old-style 2.4 aliases work */
682 request_module("block-major-%d", MAJOR(devt));
683}
684
5c6f35c5
GKH
685/*
686 * print a full list of all partitions - intended for places where the root
687 * filesystem can't be mounted and thus to give the victim some idea of what
688 * went wrong
689 */
690void __init printk_all_partitions(void)
691{
def4e38d
TH
692 struct class_dev_iter iter;
693 struct device *dev;
694
695 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
696 while ((dev = class_dev_iter_next(&iter))) {
697 struct gendisk *disk = dev_to_disk(dev);
ad1eaa53 698 struct block_device *part;
1f014290 699 char devt_buf[BDEVT_SIZE];
e559f58d 700 unsigned long idx;
def4e38d
TH
701
702 /*
703 * Don't show empty devices or things that have been
25985edc 704 * suppressed
def4e38d
TH
705 */
706 if (get_capacity(disk) == 0 ||
707 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
708 continue;
709
710 /*
e559f58d
CH
711 * Note, unlike /proc/partitions, I am showing the numbers in
712 * hex - the same format as the root= option takes.
def4e38d 713 */
e559f58d
CH
714 rcu_read_lock();
715 xa_for_each(&disk->part_tbl, idx, part) {
716 if (!bdev_nr_sectors(part))
717 continue;
a9e7bc3d 718 printk("%s%s %10llu %pg %s",
e559f58d 719 bdev_is_partition(part) ? " " : "",
ad1eaa53 720 bdevt_str(part->bd_dev, devt_buf),
a9e7bc3d 721 bdev_nr_sectors(part) >> 1, part,
ad1eaa53
CH
722 part->bd_meta_info ?
723 part->bd_meta_info->uuid : "");
e559f58d 724 if (bdev_is_partition(part))
074a7aca 725 printk("\n");
e559f58d
CH
726 else if (dev->parent && dev->parent->driver)
727 printk(" driver: %s\n",
728 dev->parent->driver->name);
729 else
730 printk(" (driver?)\n");
074a7aca 731 }
e559f58d 732 rcu_read_unlock();
def4e38d
TH
733 }
734 class_dev_iter_exit(&iter);
dd2a345f
DG
735}
736
1da177e4
LT
737#ifdef CONFIG_PROC_FS
738/* iterator */
def4e38d 739static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
68c4d4a7 740{
def4e38d
TH
741 loff_t skip = *pos;
742 struct class_dev_iter *iter;
743 struct device *dev;
68c4d4a7 744
aeb3d3a8 745 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
def4e38d
TH
746 if (!iter)
747 return ERR_PTR(-ENOMEM);
748
749 seqf->private = iter;
750 class_dev_iter_init(iter, &block_class, NULL, &disk_type);
751 do {
752 dev = class_dev_iter_next(iter);
753 if (!dev)
754 return NULL;
755 } while (skip--);
756
757 return dev_to_disk(dev);
68c4d4a7
GKH
758}
759
def4e38d 760static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
1da177e4 761{
edfaa7c3 762 struct device *dev;
1da177e4 763
def4e38d
TH
764 (*pos)++;
765 dev = class_dev_iter_next(seqf->private);
2ac3cee5 766 if (dev)
68c4d4a7 767 return dev_to_disk(dev);
2ac3cee5 768
1da177e4
LT
769 return NULL;
770}
771
def4e38d 772static void disk_seqf_stop(struct seq_file *seqf, void *v)
27f30251 773{
def4e38d 774 struct class_dev_iter *iter = seqf->private;
27f30251 775
def4e38d
TH
776 /* stop is called even after start failed :-( */
777 if (iter) {
778 class_dev_iter_exit(iter);
779 kfree(iter);
77da1605 780 seqf->private = NULL;
5c0ef6d0 781 }
1da177e4
LT
782}
783
def4e38d 784static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
1da177e4 785{
06768067 786 void *p;
def4e38d
TH
787
788 p = disk_seqf_start(seqf, pos);
b9f985b6 789 if (!IS_ERR_OR_NULL(p) && !*pos)
def4e38d
TH
790 seq_puts(seqf, "major minor #blocks name\n\n");
791 return p;
1da177e4
LT
792}
793
cf771cb5 794static int show_partition(struct seq_file *seqf, void *v)
1da177e4
LT
795{
796 struct gendisk *sgp = v;
ad1eaa53 797 struct block_device *part;
ecc75a98 798 unsigned long idx;
1da177e4 799
1da177e4 800 /* Don't show non-partitionable removeable devices or empty devices */
d27769ec 801 if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
f331c029 802 (sgp->flags & GENHD_FL_REMOVABLE)))
1da177e4
LT
803 return 0;
804 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
805 return 0;
806
ecc75a98
CH
807 rcu_read_lock();
808 xa_for_each(&sgp->part_tbl, idx, part) {
809 if (!bdev_nr_sectors(part))
810 continue;
a291bb43 811 seq_printf(seqf, "%4d %7d %10llu %pg\n",
ad1eaa53 812 MAJOR(part->bd_dev), MINOR(part->bd_dev),
a291bb43 813 bdev_nr_sectors(part) >> 1, part);
ecc75a98
CH
814 }
815 rcu_read_unlock();
1da177e4
LT
816 return 0;
817}
818
f500975a 819static const struct seq_operations partitions_op = {
def4e38d
TH
820 .start = show_partition_start,
821 .next = disk_seqf_next,
822 .stop = disk_seqf_stop,
edfaa7c3 823 .show = show_partition
1da177e4
LT
824};
825#endif
826
1da177e4
LT
827static int __init genhd_device_init(void)
828{
e105b8bf
DW
829 int error;
830
831 block_class.dev_kobj = sysfs_dev_block_kobj;
832 error = class_register(&block_class);
ee27a558
RM
833 if (unlikely(error))
834 return error;
1da177e4 835 blk_dev_init();
edfaa7c3 836
561ec68e
ZY
837 register_blkdev(BLOCK_EXT_MAJOR, "blkext");
838
edfaa7c3 839 /* create top-level block dir */
e52eec13
AK
840 if (!sysfs_deprecated)
841 block_depr = kobject_create_and_add("block", NULL);
830d3cfb 842 return 0;
1da177e4
LT
843}
844
845subsys_initcall(genhd_device_init);
846
edfaa7c3
KS
847static ssize_t disk_range_show(struct device *dev,
848 struct device_attribute *attr, char *buf)
1da177e4 849{
edfaa7c3 850 struct gendisk *disk = dev_to_disk(dev);
1da177e4 851
edfaa7c3 852 return sprintf(buf, "%d\n", disk->minors);
1da177e4
LT
853}
854
1f014290
TH
855static ssize_t disk_ext_range_show(struct device *dev,
856 struct device_attribute *attr, char *buf)
857{
858 struct gendisk *disk = dev_to_disk(dev);
859
b5d0b9df 860 return sprintf(buf, "%d\n", disk_max_parts(disk));
1f014290
TH
861}
862
edfaa7c3
KS
863static ssize_t disk_removable_show(struct device *dev,
864 struct device_attribute *attr, char *buf)
a7fd6706 865{
edfaa7c3 866 struct gendisk *disk = dev_to_disk(dev);
a7fd6706 867
edfaa7c3
KS
868 return sprintf(buf, "%d\n",
869 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
a7fd6706
KS
870}
871
8ddcd653
CH
872static ssize_t disk_hidden_show(struct device *dev,
873 struct device_attribute *attr, char *buf)
874{
875 struct gendisk *disk = dev_to_disk(dev);
876
877 return sprintf(buf, "%d\n",
878 (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
879}
880
1c9ce527
KS
881static ssize_t disk_ro_show(struct device *dev,
882 struct device_attribute *attr, char *buf)
883{
884 struct gendisk *disk = dev_to_disk(dev);
885
b7db9956 886 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
1c9ce527
KS
887}
888
3ad5cee5
CH
889ssize_t part_size_show(struct device *dev,
890 struct device_attribute *attr, char *buf)
891{
0d02129e 892 return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
3ad5cee5
CH
893}
894
895ssize_t part_stat_show(struct device *dev,
896 struct device_attribute *attr, char *buf)
897{
0d02129e
CH
898 struct block_device *bdev = dev_to_bdev(dev);
899 struct request_queue *q = bdev->bd_disk->queue;
ea18e0f0 900 struct disk_stats stat;
3ad5cee5
CH
901 unsigned int inflight;
902
0d02129e 903 part_stat_read_all(bdev, &stat);
b2f609e1 904 if (queue_is_mq(q))
0d02129e 905 inflight = blk_mq_in_flight(q, bdev);
b2f609e1 906 else
0d02129e 907 inflight = part_in_flight(bdev);
ea18e0f0 908
3ad5cee5
CH
909 return sprintf(buf,
910 "%8lu %8lu %8llu %8u "
911 "%8lu %8lu %8llu %8u "
912 "%8u %8u %8u "
913 "%8lu %8lu %8llu %8u "
914 "%8lu %8u"
915 "\n",
ea18e0f0
KK
916 stat.ios[STAT_READ],
917 stat.merges[STAT_READ],
918 (unsigned long long)stat.sectors[STAT_READ],
919 (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
920 stat.ios[STAT_WRITE],
921 stat.merges[STAT_WRITE],
922 (unsigned long long)stat.sectors[STAT_WRITE],
923 (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
3ad5cee5 924 inflight,
ea18e0f0 925 jiffies_to_msecs(stat.io_ticks),
8cd5b8fc
KK
926 (unsigned int)div_u64(stat.nsecs[STAT_READ] +
927 stat.nsecs[STAT_WRITE] +
928 stat.nsecs[STAT_DISCARD] +
929 stat.nsecs[STAT_FLUSH],
930 NSEC_PER_MSEC),
ea18e0f0
KK
931 stat.ios[STAT_DISCARD],
932 stat.merges[STAT_DISCARD],
933 (unsigned long long)stat.sectors[STAT_DISCARD],
934 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
935 stat.ios[STAT_FLUSH],
936 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
3ad5cee5
CH
937}
938
939ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
940 char *buf)
941{
0d02129e
CH
942 struct block_device *bdev = dev_to_bdev(dev);
943 struct request_queue *q = bdev->bd_disk->queue;
3ad5cee5
CH
944 unsigned int inflight[2];
945
b2f609e1 946 if (queue_is_mq(q))
0d02129e 947 blk_mq_in_flight_rw(q, bdev, inflight);
b2f609e1 948 else
0d02129e 949 part_in_flight_rw(bdev, inflight);
b2f609e1 950
3ad5cee5
CH
951 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
952}
953
edfaa7c3
KS
954static ssize_t disk_capability_show(struct device *dev,
955 struct device_attribute *attr, char *buf)
86ce18d7 956{
edfaa7c3
KS
957 struct gendisk *disk = dev_to_disk(dev);
958
959 return sprintf(buf, "%x\n", disk->flags);
86ce18d7 960}
edfaa7c3 961
c72758f3
MP
962static ssize_t disk_alignment_offset_show(struct device *dev,
963 struct device_attribute *attr,
964 char *buf)
965{
966 struct gendisk *disk = dev_to_disk(dev);
967
968 return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
969}
970
86b37281
MP
971static ssize_t disk_discard_alignment_show(struct device *dev,
972 struct device_attribute *attr,
973 char *buf)
974{
975 struct gendisk *disk = dev_to_disk(dev);
976
dd3d145d 977 return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
86b37281
MP
978}
979
5657a819
JP
980static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
981static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
982static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
983static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
984static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
985static DEVICE_ATTR(size, 0444, part_size_show, NULL);
986static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
987static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
988static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
989static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
990static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
991static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
3ad5cee5 992
c17bb495 993#ifdef CONFIG_FAIL_MAKE_REQUEST
3ad5cee5
CH
994ssize_t part_fail_show(struct device *dev,
995 struct device_attribute *attr, char *buf)
996{
0d02129e 997 return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
3ad5cee5
CH
998}
999
1000ssize_t part_fail_store(struct device *dev,
1001 struct device_attribute *attr,
1002 const char *buf, size_t count)
1003{
3ad5cee5
CH
1004 int i;
1005
1006 if (count > 0 && sscanf(buf, "%d", &i) > 0)
0d02129e 1007 dev_to_bdev(dev)->bd_make_it_fail = i;
3ad5cee5
CH
1008
1009 return count;
1010}
1011
edfaa7c3 1012static struct device_attribute dev_attr_fail =
5657a819 1013 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
3ad5cee5
CH
1014#endif /* CONFIG_FAIL_MAKE_REQUEST */
1015
581d4e28
JA
1016#ifdef CONFIG_FAIL_IO_TIMEOUT
1017static struct device_attribute dev_attr_fail_timeout =
5657a819 1018 __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
581d4e28 1019#endif
edfaa7c3
KS
1020
1021static struct attribute *disk_attrs[] = {
1022 &dev_attr_range.attr,
1f014290 1023 &dev_attr_ext_range.attr,
edfaa7c3 1024 &dev_attr_removable.attr,
8ddcd653 1025 &dev_attr_hidden.attr,
1c9ce527 1026 &dev_attr_ro.attr,
edfaa7c3 1027 &dev_attr_size.attr,
c72758f3 1028 &dev_attr_alignment_offset.attr,
86b37281 1029 &dev_attr_discard_alignment.attr,
edfaa7c3
KS
1030 &dev_attr_capability.attr,
1031 &dev_attr_stat.attr,
316d315b 1032 &dev_attr_inflight.attr,
99e6608c 1033 &dev_attr_badblocks.attr,
2bc8cda5
CH
1034 &dev_attr_events.attr,
1035 &dev_attr_events_async.attr,
1036 &dev_attr_events_poll_msecs.attr,
edfaa7c3
KS
1037#ifdef CONFIG_FAIL_MAKE_REQUEST
1038 &dev_attr_fail.attr,
581d4e28
JA
1039#endif
1040#ifdef CONFIG_FAIL_IO_TIMEOUT
1041 &dev_attr_fail_timeout.attr,
edfaa7c3
KS
1042#endif
1043 NULL
1044};
1045
9438b3e0
DW
1046static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1047{
1048 struct device *dev = container_of(kobj, typeof(*dev), kobj);
1049 struct gendisk *disk = dev_to_disk(dev);
1050
1051 if (a == &dev_attr_badblocks.attr && !disk->bb)
1052 return 0;
1053 return a->mode;
1054}
1055
edfaa7c3
KS
1056static struct attribute_group disk_attr_group = {
1057 .attrs = disk_attrs,
9438b3e0 1058 .is_visible = disk_visible,
edfaa7c3
KS
1059};
1060
a4dbd674 1061static const struct attribute_group *disk_attr_groups[] = {
edfaa7c3
KS
1062 &disk_attr_group,
1063 NULL
1da177e4
LT
1064};
1065
b5bd357c
LC
1066/**
1067 * disk_release - releases all allocated resources of the gendisk
1068 * @dev: the device representing this disk
1069 *
1070 * This function releases all allocated resources of the gendisk.
1071 *
b5bd357c
LC
1072 * Drivers which used __device_add_disk() have a gendisk with a request_queue
1073 * assigned. Since the request_queue sits on top of the gendisk for these
1074 * drivers we also call blk_put_queue() for them, and we expect the
1075 * request_queue refcount to reach 0 at this point, and so the request_queue
1076 * will also be freed prior to the disk.
e8c7d14a
LC
1077 *
1078 * Context: can sleep
b5bd357c 1079 */
edfaa7c3 1080static void disk_release(struct device *dev)
1da177e4 1081{
edfaa7c3
KS
1082 struct gendisk *disk = dev_to_disk(dev);
1083
e8c7d14a
LC
1084 might_sleep();
1085
7c3f828b
CH
1086 if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
1087 blk_free_ext_minor(MINOR(dev->devt));
77ea887e 1088 disk_release_events(disk);
1da177e4 1089 kfree(disk->random);
a33df75c 1090 xa_destroy(&disk->part_tbl);
958229a7 1091 if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
523e1d39 1092 blk_put_queue(disk->queue);
2f4731dc 1093 iput(disk->part0->bd_inode); /* frees the disk */
1da177e4 1094}
87eb7107
MC
1095
1096static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
1097{
1098 struct gendisk *disk = dev_to_disk(dev);
1099
1100 return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
1101}
1102
edfaa7c3
KS
1103struct class block_class = {
1104 .name = "block",
87eb7107 1105 .dev_uevent = block_uevent,
1da177e4
LT
1106};
1107
3c2670e6 1108static char *block_devnode(struct device *dev, umode_t *mode,
4e4098a3 1109 kuid_t *uid, kgid_t *gid)
b03f38b6
KS
1110{
1111 struct gendisk *disk = dev_to_disk(dev);
1112
348e114b
CH
1113 if (disk->fops->devnode)
1114 return disk->fops->devnode(disk, mode);
b03f38b6
KS
1115 return NULL;
1116}
1117
ef45fe47 1118const struct device_type disk_type = {
edfaa7c3
KS
1119 .name = "disk",
1120 .groups = disk_attr_groups,
1121 .release = disk_release,
e454cea2 1122 .devnode = block_devnode,
1da177e4
LT
1123};
1124
a6e2ba88 1125#ifdef CONFIG_PROC_FS
cf771cb5
TH
1126/*
1127 * aggregate disk stat collector. Uses the same stats that the sysfs
1128 * entries do, above, but makes them available through one seq_file.
1129 *
1130 * The output looks suspiciously like /proc/partitions with a bunch of
1131 * extra fields.
1132 */
1133static int diskstats_show(struct seq_file *seqf, void *v)
1da177e4
LT
1134{
1135 struct gendisk *gp = v;
ad1eaa53 1136 struct block_device *hd;
e016b782 1137 unsigned int inflight;
ea18e0f0 1138 struct disk_stats stat;
7fae67cc 1139 unsigned long idx;
1da177e4
LT
1140
1141 /*
ed9e1982 1142 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
cf771cb5 1143 seq_puts(seqf, "major minor name"
1da177e4
LT
1144 " rio rmerge rsect ruse wio wmerge "
1145 "wsect wuse running use aveq"
1146 "\n\n");
1147 */
9f5e4865 1148
7fae67cc
CH
1149 rcu_read_lock();
1150 xa_for_each(&gp->part_tbl, idx, hd) {
1151 if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
1152 continue;
0d02129e 1153 part_stat_read_all(hd, &stat);
b2f609e1 1154 if (queue_is_mq(gp->queue))
ad1eaa53 1155 inflight = blk_mq_in_flight(gp->queue, hd);
b2f609e1 1156 else
ad1eaa53 1157 inflight = part_in_flight(hd);
ea18e0f0 1158
26e2d7a3 1159 seq_printf(seqf, "%4d %7d %pg "
bdca3c87
MC
1160 "%lu %lu %lu %u "
1161 "%lu %lu %lu %u "
1162 "%u %u %u "
b6866318
KK
1163 "%lu %lu %lu %u "
1164 "%lu %u"
1165 "\n",
26e2d7a3 1166 MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
ea18e0f0
KK
1167 stat.ios[STAT_READ],
1168 stat.merges[STAT_READ],
1169 stat.sectors[STAT_READ],
1170 (unsigned int)div_u64(stat.nsecs[STAT_READ],
1171 NSEC_PER_MSEC),
1172 stat.ios[STAT_WRITE],
1173 stat.merges[STAT_WRITE],
1174 stat.sectors[STAT_WRITE],
1175 (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
1176 NSEC_PER_MSEC),
e016b782 1177 inflight,
ea18e0f0 1178 jiffies_to_msecs(stat.io_ticks),
8cd5b8fc
KK
1179 (unsigned int)div_u64(stat.nsecs[STAT_READ] +
1180 stat.nsecs[STAT_WRITE] +
1181 stat.nsecs[STAT_DISCARD] +
1182 stat.nsecs[STAT_FLUSH],
1183 NSEC_PER_MSEC),
ea18e0f0
KK
1184 stat.ios[STAT_DISCARD],
1185 stat.merges[STAT_DISCARD],
1186 stat.sectors[STAT_DISCARD],
1187 (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
1188 NSEC_PER_MSEC),
1189 stat.ios[STAT_FLUSH],
1190 (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
1191 NSEC_PER_MSEC)
28f39d55 1192 );
1da177e4 1193 }
7fae67cc 1194 rcu_read_unlock();
9f5e4865 1195
1da177e4
LT
1196 return 0;
1197}
1198
31d85ab2 1199static const struct seq_operations diskstats_op = {
def4e38d
TH
1200 .start = disk_seqf_start,
1201 .next = disk_seqf_next,
1202 .stop = disk_seqf_stop,
1da177e4
LT
1203 .show = diskstats_show
1204};
f500975a
AD
1205
1206static int __init proc_genhd_init(void)
1207{
fddda2b7
CH
1208 proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1209 proc_create_seq("partitions", 0, NULL, &partitions_op);
f500975a
AD
1210 return 0;
1211}
1212module_init(proc_genhd_init);
a6e2ba88 1213#endif /* CONFIG_PROC_FS */
1da177e4 1214
c97d93c3
CH
1215dev_t part_devt(struct gendisk *disk, u8 partno)
1216{
0e0ccdec 1217 struct block_device *part;
c97d93c3
CH
1218 dev_t devt = 0;
1219
0e0ccdec
CH
1220 rcu_read_lock();
1221 part = xa_load(&disk->part_tbl, partno);
1222 if (part)
c97d93c3 1223 devt = part->bd_dev;
0e0ccdec 1224 rcu_read_unlock();
c97d93c3
CH
1225
1226 return devt;
1227}
1228
cf771cb5 1229dev_t blk_lookup_devt(const char *name, int partno)
a142be85 1230{
def4e38d
TH
1231 dev_t devt = MKDEV(0, 0);
1232 struct class_dev_iter iter;
1233 struct device *dev;
a142be85 1234
def4e38d
TH
1235 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1236 while ((dev = class_dev_iter_next(&iter))) {
a142be85 1237 struct gendisk *disk = dev_to_disk(dev);
a142be85 1238
3ada8b7e 1239 if (strcmp(dev_name(dev), name))
f331c029 1240 continue;
f331c029 1241
41b8c853
NB
1242 if (partno < disk->minors) {
1243 /* We need to return the right devno, even
1244 * if the partition doesn't exist yet.
1245 */
1246 devt = MKDEV(MAJOR(dev->devt),
1247 MINOR(dev->devt) + partno);
c97d93c3
CH
1248 } else {
1249 devt = part_devt(disk, partno);
1250 if (devt)
1251 break;
def4e38d 1252 }
5c0ef6d0 1253 }
def4e38d 1254 class_dev_iter_exit(&iter);
edfaa7c3
KS
1255 return devt;
1256}
edfaa7c3 1257
e319e1fb 1258struct gendisk *__alloc_disk_node(int minors, int node_id)
1946089a
CL
1259{
1260 struct gendisk *disk;
1261
c1b511eb 1262 disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
f93af2a4
CH
1263 if (!disk)
1264 return NULL;
6c23a968 1265
cb8432d6
CH
1266 disk->part0 = bdev_alloc(disk, 0);
1267 if (!disk->part0)
22ae8ce8
CH
1268 goto out_free_disk;
1269
f93af2a4 1270 disk->node_id = node_id;
a8698707 1271 mutex_init(&disk->open_mutex);
a33df75c
CH
1272 xa_init(&disk->part_tbl);
1273 if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
1274 goto out_destroy_part_tbl;
f93af2a4
CH
1275
1276 disk->minors = minors;
1277 rand_initialize_disk(disk);
1278 disk_to_dev(disk)->class = &block_class;
1279 disk_to_dev(disk)->type = &disk_type;
1280 device_initialize(disk_to_dev(disk));
cf179948
MC
1281 inc_diskseq(disk);
1282
1da177e4 1283 return disk;
f93af2a4 1284
a33df75c
CH
1285out_destroy_part_tbl:
1286 xa_destroy(&disk->part_tbl);
2f4731dc 1287 iput(disk->part0->bd_inode);
f93af2a4
CH
1288out_free_disk:
1289 kfree(disk);
1290 return NULL;
1da177e4 1291}
e319e1fb 1292EXPORT_SYMBOL(__alloc_disk_node);
1da177e4 1293
f525464a
CH
1294struct gendisk *__blk_alloc_disk(int node)
1295{
1296 struct request_queue *q;
1297 struct gendisk *disk;
1298
1299 q = blk_alloc_queue(node);
1300 if (!q)
1301 return NULL;
1302
1303 disk = __alloc_disk_node(0, node);
1304 if (!disk) {
1305 blk_cleanup_queue(q);
1306 return NULL;
1307 }
1308 disk->queue = q;
1309 return disk;
1310}
1311EXPORT_SYMBOL(__blk_alloc_disk);
1312
b5bd357c
LC
1313/**
1314 * put_disk - decrements the gendisk refcount
0d20dcc2 1315 * @disk: the struct gendisk to decrement the refcount for
b5bd357c
LC
1316 *
1317 * This decrements the refcount for the struct gendisk. When this reaches 0
1318 * we'll have disk_release() called.
e8c7d14a
LC
1319 *
1320 * Context: Any context, but the last reference must not be dropped from
1321 * atomic context.
b5bd357c 1322 */
1da177e4
LT
1323void put_disk(struct gendisk *disk)
1324{
1325 if (disk)
efdc41c8 1326 put_device(disk_to_dev(disk));
1da177e4 1327}
1da177e4
LT
1328EXPORT_SYMBOL(put_disk);
1329
f525464a
CH
1330/**
1331 * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
1332 * @disk: gendisk to shutdown
1333 *
1334 * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
1335 * the queue DEAD, destroy and put it and the gendisk structure.
1336 *
1337 * Context: can sleep
1338 */
1339void blk_cleanup_disk(struct gendisk *disk)
1340{
1341 blk_cleanup_queue(disk->queue);
1342 put_disk(disk);
1343}
1344EXPORT_SYMBOL(blk_cleanup_disk);
1345
e3264a4d
HR
1346static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1347{
1348 char event[] = "DISK_RO=1";
1349 char *envp[] = { event, NULL };
1350
1351 if (!ro)
1352 event[8] = '0';
1353 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1354}
1355
52f019d4
CH
1356/**
1357 * set_disk_ro - set a gendisk read-only
1358 * @disk: gendisk to operate on
7f31bee3 1359 * @read_only: %true to set the disk read-only, %false set the disk read/write
52f019d4
CH
1360 *
1361 * This function is used to indicate whether a given disk device should have its
1362 * read-only flag set. set_disk_ro() is typically used by device drivers to
1363 * indicate whether the underlying physical device is write-protected.
1364 */
1365void set_disk_ro(struct gendisk *disk, bool read_only)
1da177e4 1366{
52f019d4
CH
1367 if (read_only) {
1368 if (test_and_set_bit(GD_READ_ONLY, &disk->state))
1369 return;
1370 } else {
1371 if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
1372 return;
e3264a4d 1373 }
52f019d4 1374 set_disk_ro_uevent(disk, read_only);
1da177e4 1375}
1da177e4
LT
1376EXPORT_SYMBOL(set_disk_ro);
1377
1378int bdev_read_only(struct block_device *bdev)
1379{
947139bf 1380 return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
1da177e4 1381}
1da177e4 1382EXPORT_SYMBOL(bdev_read_only);
cf179948
MC
1383
1384void inc_diskseq(struct gendisk *disk)
1385{
1386 disk->diskseq = atomic64_inc_return(&diskseq);
1387}