ubd: pass queue_limits to blk_mq_alloc_disk
[linux-2.6-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
dbddf429 1// SPDX-License-Identifier: GPL-2.0
6c29256c 2/*
a41421ed 3 * Copyright (C) 2018 Cambridge Greys Ltd
f88f0bdf 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
1da177e4 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
1da177e4
LT
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
1da177e4
LT
22#define UBD_SHIFT 4
23
8ea3c06a
AV
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
4e6da0fe 27#include <linux/blk-mq.h>
8ea3c06a
AV
28#include <linux/ata.h>
29#include <linux/hdreg.h>
b81e0c23 30#include <linux/major.h>
8ea3c06a
AV
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <asm/tlbflush.h>
37185b33 40#include <kern_util.h>
1da177e4 41#include "mconsole_kern.h"
37185b33
AV
42#include <init.h>
43#include <irq_kern.h>
8ea3c06a 44#include "ubd.h"
37185b33 45#include <os.h>
1da177e4
LT
46#include "cow.h"
47
a43c8316
AI
48/* Max request size is determined by sector mask - 32K */
49#define UBD_MAX_REQUEST (8 * sizeof(long))
1da177e4 50
fc6b6a87
GKB
51struct io_desc {
52 char *buffer;
53 unsigned long length;
54 unsigned long sector_mask;
55 unsigned long long cow_offset;
56 unsigned long bitmap_words[2];
57};
58
1da177e4 59struct io_thread_req {
62f96cb0 60 struct request *req;
1da177e4
LT
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
1da177e4 64 int sectorsize;
1da177e4 65 int error;
fc6b6a87
GKB
66
67 int desc_cnt;
68 /* io_desc has to be the last element of the struct */
69 struct io_desc io_desc[];
1da177e4
LT
70};
71
f88f0bdf
AI
72
73static struct io_thread_req * (*irq_req_buffer)[];
74static struct io_thread_req *irq_remainder;
75static int irq_remainder_size;
76
77static struct io_thread_req * (*io_req_buffer)[];
78static struct io_thread_req *io_remainder;
79static int io_remainder_size;
80
81
82
91acb21f 83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
84{
85 __u64 n;
86 int bits, off;
87
91acb21f 88 bits = sizeof(data[0]) * 8;
1da177e4
LT
89 n = bit / bits;
90 off = bit % bits;
dc764e50 91 return (data[n] & (1 << off)) != 0;
1da177e4
LT
92}
93
91acb21f 94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
95{
96 __u64 n;
97 int bits, off;
98
91acb21f 99 bits = sizeof(data[0]) * 8;
1da177e4
LT
100 n = bit / bits;
101 off = bit % bits;
91acb21f 102 data[n] |= (1 << off);
1da177e4
LT
103}
104/*End stuff from ubd_user.h*/
105
106#define DRIVER_NAME "uml-blkdev"
107
d7fb2c38 108static DEFINE_MUTEX(ubd_lock);
9a181c58 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
1da177e4 110
05bdb996 111static int ubd_open(struct gendisk *disk, blk_mode_t mode);
ae220766 112static void ubd_release(struct gendisk *disk);
05bdb996 113static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4 114 unsigned int cmd, unsigned long arg);
a885c8c4 115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 116
97d88ac8 117#define MAX_DEV (16)
1da177e4 118
83d5cde4 119static const struct block_device_operations ubd_blops = {
1da177e4 120 .owner = THIS_MODULE,
a625c998
AV
121 .open = ubd_open,
122 .release = ubd_release,
123 .ioctl = ubd_ioctl,
ab0cf1e4 124 .compat_ioctl = blkdev_compat_ptr_ioctl,
a885c8c4 125 .getgeo = ubd_getgeo,
1da177e4
LT
126};
127
1da177e4 128/* Protected by ubd_lock */
1da177e4 129static struct gendisk *ubd_gendisk[MAX_DEV];
6c29256c 130
1da177e4
LT
131#ifdef CONFIG_BLK_DEV_UBD_SYNC
132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
133 .cl = 1 })
134#else
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
136 .cl = 1 })
137#endif
1da177e4
LT
138static struct openflags global_openflags = OPEN_FLAGS;
139
140struct cow {
2a9d32f6 141 /* backing file name */
1da177e4 142 char *file;
2a9d32f6 143 /* backing file fd */
1da177e4
LT
144 int fd;
145 unsigned long *bitmap;
146 unsigned long bitmap_len;
147 int bitmap_offset;
dc764e50 148 int data_offset;
1da177e4
LT
149};
150
a0044bdf
JD
151#define MAX_SG 64
152
1da177e4 153struct ubd {
2a9d32f6
PBG
154 /* name (and fd, below) of the file opened for writing, either the
155 * backing or the cow file. */
1da177e4 156 char *file;
ef3ba87c 157 char *serial;
1da177e4
LT
158 int count;
159 int fd;
160 __u64 size;
161 struct openflags boot_openflags;
162 struct openflags openflags;
84e945e3
PBG
163 unsigned shared:1;
164 unsigned no_cow:1;
50109b5a 165 unsigned no_trim:1;
1da177e4
LT
166 struct cow cow;
167 struct platform_device pdev;
62f96cb0 168 struct request_queue *queue;
4e6da0fe 169 struct blk_mq_tag_set tag_set;
62f96cb0 170 spinlock_t lock;
4e6da0fe
RW
171};
172
1da177e4
LT
173#define DEFAULT_COW { \
174 .file = NULL, \
dc764e50
JD
175 .fd = -1, \
176 .bitmap = NULL, \
1da177e4 177 .bitmap_offset = 0, \
dc764e50 178 .data_offset = 0, \
1da177e4
LT
179}
180
181#define DEFAULT_UBD { \
182 .file = NULL, \
ef3ba87c 183 .serial = NULL, \
1da177e4
LT
184 .count = 0, \
185 .fd = -1, \
186 .size = -1, \
187 .boot_openflags = OPEN_FLAGS, \
188 .openflags = OPEN_FLAGS, \
dc764e50 189 .no_cow = 0, \
50109b5a 190 .no_trim = 0, \
6c29256c 191 .shared = 0, \
dc764e50 192 .cow = DEFAULT_COW, \
22e65004 193 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
1da177e4
LT
194}
195
b8831a1d 196/* Protected by ubd_lock */
5dc62b1b 197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 198
4e6da0fe
RW
199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
200 const struct blk_mq_queue_data *bd);
4e6da0fe 201
1da177e4
LT
202static int fake_ide_setup(char *str)
203{
7eb90f7e 204 pr_warn("The fake_ide option has been removed\n");
dc764e50 205 return 1;
1da177e4 206}
1da177e4
LT
207__setup("fake_ide", fake_ide_setup);
208
209__uml_help(fake_ide_setup,
210"fake_ide\n"
7eb90f7e 211" Obsolete stub.\n\n"
1da177e4
LT
212);
213
214static int parse_unit(char **ptr)
215{
216 char *str = *ptr, *end;
217 int n = -1;
218
219 if(isdigit(*str)) {
220 n = simple_strtoul(str, &end, 0);
221 if(end == str)
dc764e50 222 return -1;
1da177e4
LT
223 *ptr = end;
224 }
97d88ac8 225 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
226 n = *str - 'a';
227 str++;
228 *ptr = str;
229 }
dc764e50 230 return n;
1da177e4
LT
231}
232
d8d7c28e
PBG
233/* If *index_out == -1 at exit, the passed option was a general one;
234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
235 * should not be freed on exit.
236 */
f28169d2 237static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 238{
7d314e34 239 struct ubd *ubd_dev;
1da177e4 240 struct openflags flags = global_openflags;
ef3ba87c 241 char *file, *backing_file, *serial;
b8831a1d 242 int n, err = 0, i;
1da177e4
LT
243
244 if(index_out) *index_out = -1;
245 n = *str;
246 if(n == '='){
1da177e4 247 str++;
1da177e4
LT
248 if(!strcmp(str, "sync")){
249 global_openflags = of_sync(global_openflags);
9ca55299 250 return err;
1da177e4 251 }
b8831a1d 252
7eb90f7e
CH
253 pr_warn("fake major not supported any more\n");
254 return 0;
1da177e4
LT
255 }
256
257 n = parse_unit(&str);
258 if(n < 0){
f28169d2
JD
259 *error_out = "Couldn't parse device number";
260 return -EINVAL;
1da177e4
LT
261 }
262 if(n >= MAX_DEV){
f28169d2
JD
263 *error_out = "Device number out of range";
264 return 1;
1da177e4
LT
265 }
266
f28169d2 267 err = -EBUSY;
d7fb2c38 268 mutex_lock(&ubd_lock);
1da177e4 269
7d314e34
PBG
270 ubd_dev = &ubd_devs[n];
271 if(ubd_dev->file != NULL){
f28169d2 272 *error_out = "Device is already configured";
1da177e4
LT
273 goto out;
274 }
275
276 if (index_out)
277 *index_out = n;
278
f28169d2 279 err = -EINVAL;
50109b5a 280 for (i = 0; i < sizeof("rscdt="); i++) {
1da177e4
LT
281 switch (*str) {
282 case 'r':
283 flags.w = 0;
284 break;
285 case 's':
286 flags.s = 1;
287 break;
288 case 'd':
7d314e34 289 ubd_dev->no_cow = 1;
1da177e4 290 break;
6c29256c 291 case 'c':
7d314e34 292 ubd_dev->shared = 1;
6c29256c 293 break;
50109b5a
AI
294 case 't':
295 ubd_dev->no_trim = 1;
296 break;
1da177e4
LT
297 case '=':
298 str++;
299 goto break_loop;
300 default:
f28169d2 301 *error_out = "Expected '=' or flag letter "
50109b5a 302 "(r, s, c, t or d)";
1da177e4
LT
303 goto out;
304 }
305 str++;
306 }
307
f28169d2
JD
308 if (*str == '=')
309 *error_out = "Too many flags specified";
310 else
311 *error_out = "Missing '='";
1da177e4
LT
312 goto out;
313
314break_loop:
ef3ba87c
CO
315 file = strsep(&str, ",:");
316 if (*file == '\0')
317 file = NULL;
1da177e4 318
ef3ba87c 319 backing_file = strsep(&str, ",:");
94c41b3a 320 if (backing_file && *backing_file == '\0')
ef3ba87c 321 backing_file = NULL;
1da177e4 322
ef3ba87c 323 serial = strsep(&str, ",:");
94c41b3a 324 if (serial && *serial == '\0')
ef3ba87c
CO
325 serial = NULL;
326
327 if (backing_file && ubd_dev->no_cow) {
328 *error_out = "Can't specify both 'd' and a cow file";
329 goto out;
1da177e4 330 }
ef3ba87c 331
f28169d2 332 err = 0;
ef3ba87c 333 ubd_dev->file = file;
7d314e34 334 ubd_dev->cow.file = backing_file;
ef3ba87c 335 ubd_dev->serial = serial;
7d314e34 336 ubd_dev->boot_openflags = flags;
1da177e4 337out:
d7fb2c38 338 mutex_unlock(&ubd_lock);
f28169d2 339 return err;
1da177e4
LT
340}
341
342static int ubd_setup(char *str)
343{
f28169d2
JD
344 char *error;
345 int err;
346
347 err = ubd_setup_common(str, NULL, &error);
348 if(err)
349 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
350 "%s\n", str, error);
351 return 1;
1da177e4
LT
352}
353
354__setup("ubd", ubd_setup);
355__uml_help(ubd_setup,
ef3ba87c 356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
1da177e4
LT
357" This is used to associate a device with a file in the underlying\n"
358" filesystem. When specifying two filenames, the first one is the\n"
359" COW name and the second is the backing file name. As separator you can\n"
360" use either a ':' or a ',': the first one allows writing things like;\n"
361" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
362" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 363" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
364" a COW file or a backing file. To override this detection, add the 'd'\n"
365" flag:\n"
366" ubd0d=BackingFile\n"
367" Usually, there is a filesystem in the file, but \n"
368" that's not required. Swap devices containing swap files can be\n"
369" specified like this. Also, a file which doesn't contain a\n"
370" filesystem can have its contents read in the virtual \n"
371" machine by running 'dd' on the device. <n> must be in the range\n"
372" 0 to 7. Appending an 'r' to the number will cause that device\n"
373" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
374" an 's' will cause data to be written to disk on the host immediately.\n"
375" 'c' will cause the device to be treated as being shared between multiple\n"
376" UMLs and file locking will be turned off - this is appropriate for a\n"
377" cluster filesystem and inappropriate at almost all other times.\n\n"
50109b5a 378" 't' will disable trim/discard support on the device (enabled by default).\n\n"
ef3ba87c
CO
379" An optional device serial number can be exposed using the serial parameter\n"
380" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
381" useful when a unique number should be given to the device. Note when\n"
382" specifying a label, the filename2 must be also presented. It can be\n"
383" an empty string, in which case the backing file is not used:\n"
384" ubd0=File,,Serial\n"
1da177e4
LT
385);
386
8299ca5c 387static int udb_setup(char *str)
1da177e4
LT
388{
389 printk("udb%s specified on command line is almost certainly a ubd -> "
390 "udb TYPO\n", str);
dc764e50 391 return 1;
1da177e4
LT
392}
393
394__setup("udb", udb_setup);
395__uml_help(udb_setup,
396"udb\n"
0894e27e
JD
397" This option is here solely to catch ubd -> udb typos, which can be\n"
398" to impossible to catch visually unless you specifically look for\n"
399" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
400" in the boot output.\n\n"
401);
402
91acb21f 403/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 404static int thread_fd = -1;
a0044bdf 405
f88f0bdf
AI
406/* Function to read several request pointers at a time
407* handling fractional reads if (and as) needed
408*/
409
410static int bulk_req_safe_read(
411 int fd,
412 struct io_thread_req * (*request_buffer)[],
413 struct io_thread_req **remainder,
414 int *remainder_size,
415 int max_recs
416 )
417{
418 int n = 0;
419 int res = 0;
420
421 if (*remainder_size > 0) {
422 memmove(
423 (char *) request_buffer,
424 (char *) remainder, *remainder_size
425 );
426 n = *remainder_size;
427 }
428
429 res = os_read_file(
430 fd,
431 ((char *) request_buffer) + *remainder_size,
432 sizeof(struct io_thread_req *)*max_recs
433 - *remainder_size
434 );
435 if (res > 0) {
436 n += res;
437 if ((n % sizeof(struct io_thread_req *)) > 0) {
438 /*
439 * Read somehow returned not a multiple of dword
440 * theoretically possible, but never observed in the
441 * wild, so read routine must be able to handle it
442 */
443 *remainder_size = n % sizeof(struct io_thread_req *);
444 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
445 memmove(
446 remainder,
447 ((char *) request_buffer) +
448 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
449 *remainder_size
450 );
451 n = n - *remainder_size;
452 }
453 } else {
454 n = res;
455 }
456 return n;
457}
458
62f96cb0 459/* Called without dev->lock held, and only in interrupt context. */
91acb21f 460static void ubd_handler(void)
1da177e4 461{
91acb21f 462 int n;
f88f0bdf 463 int count;
91acb21f 464
a0044bdf 465 while(1){
f88f0bdf
AI
466 n = bulk_req_safe_read(
467 thread_fd,
468 irq_req_buffer,
469 &irq_remainder,
470 &irq_remainder_size,
471 UBD_REQ_BUFFER_SIZE
472 );
473 if (n < 0) {
a0044bdf
JD
474 if(n == -EAGAIN)
475 break;
476 printk(KERN_ERR "spurious interrupt in ubd_handler, "
477 "err = %d\n", -n);
478 return;
479 }
f88f0bdf 480 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
4e6da0fe 481 struct io_thread_req *io_req = (*irq_req_buffer)[count];
4e6da0fe 482
50109b5a
AI
483 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
484 blk_queue_max_discard_sectors(io_req->req->q, 0);
485 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
50109b5a 486 }
fc6b6a87 487 blk_mq_end_request(io_req->req, io_req->error);
4e6da0fe 488 kfree(io_req);
f88f0bdf 489 }
a0044bdf 490 }
1da177e4
LT
491}
492
7bea96fd 493static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 494{
91acb21f 495 ubd_handler();
dc764e50 496 return IRQ_HANDLED;
91acb21f 497}
09ace81c 498
91acb21f
JD
499/* Only changed by ubd_init, which is an initcall. */
500static int io_pid = -1;
09ace81c 501
5dc62b1b 502static void kill_io_thread(void)
91acb21f 503{
6c29256c 504 if(io_pid != -1)
91acb21f 505 os_kill_process(io_pid, 1);
09ace81c 506}
1da177e4 507
91acb21f
JD
508__uml_exitcall(kill_io_thread);
509
d8d7c28e 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
511{
512 char *file;
85356398
RW
513 int fd;
514 int err;
515
516 __u32 version;
517 __u32 align;
518 char *backing_file;
853bc0ab 519 time64_t mtime;
85356398
RW
520 unsigned long long size;
521 int sector_size;
522 int bitmap_offset;
523
524 if (ubd_dev->file && ubd_dev->cow.file) {
525 file = ubd_dev->cow.file;
526
527 goto out;
528 }
529
d4afcba9 530 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
531 if (fd < 0)
532 return fd;
533
534 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
535 &mtime, &size, &sector_size, &align, &bitmap_offset);
536 os_close_file(fd);
1da177e4 537
85356398
RW
538 if(err == -EINVAL)
539 file = ubd_dev->file;
540 else
541 file = backing_file;
542
543out:
dc764e50 544 return os_file_size(file, size_out);
1da177e4
LT
545}
546
5dc62b1b
WC
547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
548{
549 int err;
550
8c6157b6 551 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
552 if (err < 0)
553 return err;
554
555 return 0;
556}
557
853bc0ab 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
5dc62b1b 559{
853bc0ab 560 time64_t modtime;
5dc62b1b
WC
561 unsigned long long actual;
562 int err;
563
564 err = os_file_modtime(file, &modtime);
565 if (err < 0) {
566 printk(KERN_ERR "Failed to get modification time of backing "
567 "file \"%s\", err = %d\n", file, -err);
568 return err;
569 }
570
571 err = os_file_size(file, &actual);
572 if (err < 0) {
573 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
574 "err = %d\n", file, -err);
575 return err;
576 }
577
578 if (actual != size) {
579 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
580 * the typecast.*/
581 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
582 "vs backing file\n", (unsigned long long) size, actual);
583 return -EINVAL;
584 }
585 if (modtime != mtime) {
853bc0ab 586 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
5dc62b1b
WC
587 "backing file\n", mtime, modtime);
588 return -EINVAL;
589 }
590 return 0;
591}
592
593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
594{
595 struct uml_stat buf1, buf2;
596 int err;
597
598 if (from_cmdline == NULL)
599 return 0;
600 if (!strcmp(from_cmdline, from_cow))
601 return 0;
602
603 err = os_stat_file(from_cmdline, &buf1);
604 if (err < 0) {
605 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
606 -err);
607 return 0;
608 }
609 err = os_stat_file(from_cow, &buf2);
610 if (err < 0) {
611 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
612 -err);
613 return 1;
614 }
615 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
616 return 0;
617
618 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
619 "\"%s\" specified in COW header of \"%s\"\n",
620 from_cmdline, from_cow, cow);
621 return 1;
622}
623
624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
625 char **backing_file_out, int *bitmap_offset_out,
626 unsigned long *bitmap_len_out, int *data_offset_out,
627 int *create_cow_out)
628{
853bc0ab 629 time64_t mtime;
5dc62b1b
WC
630 unsigned long long size;
631 __u32 version, align;
632 char *backing_file;
633 int fd, err, sectorsize, asked_switch, mode = 0644;
634
635 fd = os_open_file(file, *openflags, mode);
636 if (fd < 0) {
637 if ((fd == -ENOENT) && (create_cow_out != NULL))
638 *create_cow_out = 1;
639 if (!openflags->w ||
640 ((fd != -EROFS) && (fd != -EACCES)))
641 return fd;
642 openflags->w = 0;
643 fd = os_open_file(file, *openflags, mode);
644 if (fd < 0)
645 return fd;
646 }
647
648 if (shared)
649 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
650 else {
651 err = os_lock_file(fd, openflags->w);
652 if (err < 0) {
653 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
654 file, -err);
655 goto out_close;
656 }
657 }
658
659 /* Successful return case! */
660 if (backing_file_out == NULL)
661 return fd;
662
663 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
664 &size, &sectorsize, &align, bitmap_offset_out);
665 if (err && (*backing_file_out != NULL)) {
666 printk(KERN_ERR "Failed to read COW header from COW file "
667 "\"%s\", errno = %d\n", file, -err);
668 goto out_close;
669 }
670 if (err)
671 return fd;
672
673 asked_switch = path_requires_switch(*backing_file_out, backing_file,
674 file);
675
676 /* Allow switching only if no mismatch. */
677 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
678 mtime)) {
679 printk(KERN_ERR "Switching backing file to '%s'\n",
680 *backing_file_out);
681 err = write_cow_header(file, fd, *backing_file_out,
682 sectorsize, align, &size);
683 if (err) {
684 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
685 goto out_close;
686 }
687 } else {
688 *backing_file_out = backing_file;
689 err = backing_file_mismatch(*backing_file_out, size, mtime);
690 if (err)
691 goto out_close;
692 }
693
694 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
695 bitmap_len_out, data_offset_out);
696
697 return fd;
698 out_close:
699 os_close_file(fd);
700 return err;
701}
702
703static int create_cow_file(char *cow_file, char *backing_file,
704 struct openflags flags,
705 int sectorsize, int alignment, int *bitmap_offset_out,
706 unsigned long *bitmap_len_out, int *data_offset_out)
707{
708 int err, fd;
709
710 flags.c = 1;
711 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
712 if (fd < 0) {
713 err = fd;
714 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
715 cow_file, -err);
716 goto out;
717 }
718
719 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
720 bitmap_offset_out, bitmap_len_out,
721 data_offset_out);
722 if (!err)
723 return fd;
724 os_close_file(fd);
725 out:
726 return err;
727}
728
5f75a4f8 729static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 730{
7d314e34
PBG
731 os_close_file(ubd_dev->fd);
732 if(ubd_dev->cow.file == NULL)
1da177e4
LT
733 return;
734
7d314e34
PBG
735 os_close_file(ubd_dev->cow.fd);
736 vfree(ubd_dev->cow.bitmap);
737 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
738}
739
7d314e34 740static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
741{
742 struct openflags flags;
743 char **back_ptr;
744 int err, create_cow, *create_ptr;
0bf16bff 745 int fd;
1da177e4 746
7d314e34 747 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 748 create_cow = 0;
7d314e34
PBG
749 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
750 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
751
752 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
753 back_ptr, &ubd_dev->cow.bitmap_offset,
754 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 755 create_ptr);
1da177e4 756
0bf16bff
PBG
757 if((fd == -ENOENT) && create_cow){
758 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
a43c8316 759 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
7d314e34
PBG
760 &ubd_dev->cow.bitmap_offset,
761 &ubd_dev->cow.bitmap_len,
762 &ubd_dev->cow.data_offset);
0bf16bff 763 if(fd >= 0){
1da177e4 764 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 765 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
766 }
767 }
768
0bf16bff 769 if(fd < 0){
7d314e34 770 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
771 -fd);
772 return fd;
1da177e4 773 }
0bf16bff 774 ubd_dev->fd = fd;
1da177e4 775
7d314e34 776 if(ubd_dev->cow.file != NULL){
086fa5ff 777 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
f4768ffd 778
1da177e4 779 err = -ENOMEM;
da2486ba 780 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 781 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
782 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
783 goto error;
784 }
785 flush_tlb_kernel_vm();
786
7d314e34
PBG
787 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
788 ubd_dev->cow.bitmap_offset,
789 ubd_dev->cow.bitmap_len);
1da177e4
LT
790 if(err < 0)
791 goto error;
792
7d314e34 793 flags = ubd_dev->openflags;
1da177e4 794 flags.w = 0;
7d314e34 795 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 796 NULL, NULL, NULL, NULL);
1da177e4 797 if(err < 0) goto error;
7d314e34 798 ubd_dev->cow.fd = err;
1da177e4 799 }
50109b5a 800 if (ubd_dev->no_trim == 0) {
50109b5a
AI
801 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
802 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
50109b5a 803 }
a43c8316 804 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
dc764e50 805 return 0;
1da177e4 806 error:
7d314e34 807 os_close_file(ubd_dev->fd);
dc764e50 808 return err;
1da177e4
LT
809}
810
2e3f5251
JD
811static void ubd_device_release(struct device *dev)
812{
8691b97b 813 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251 814
4e6da0fe 815 blk_mq_free_tag_set(&ubd_dev->tag_set);
2e3f5251
JD
816 *ubd_dev = ((struct ubd) DEFAULT_UBD);
817}
818
ef3ba87c
CO
819static ssize_t serial_show(struct device *dev,
820 struct device_attribute *attr, char *buf)
821{
822 struct gendisk *disk = dev_to_disk(dev);
823 struct ubd *ubd_dev = disk->private_data;
824
825 if (!ubd_dev)
826 return 0;
827
828 return sprintf(buf, "%s", ubd_dev->serial);
829}
830
831static DEVICE_ATTR_RO(serial);
832
833static struct attribute *ubd_attrs[] = {
834 &dev_attr_serial.attr,
835 NULL,
836};
837
838static umode_t ubd_attrs_are_visible(struct kobject *kobj,
839 struct attribute *a, int n)
840{
841 return a->mode;
842}
843
844static const struct attribute_group ubd_attr_group = {
845 .attrs = ubd_attrs,
846 .is_visible = ubd_attrs_are_visible,
847};
848
849static const struct attribute_group *ubd_attr_groups[] = {
850 &ubd_attr_group,
851 NULL,
852};
853
66638f16
LC
854static int ubd_disk_register(int major, u64 size, int unit,
855 struct gendisk *disk)
1da177e4 856{
1da177e4
LT
857 disk->major = major;
858 disk->first_minor = unit << UBD_SHIFT;
35efb594 859 disk->minors = 1 << UBD_SHIFT;
1da177e4
LT
860 disk->fops = &ubd_blops;
861 set_capacity(disk, size / 512);
7eb90f7e
CH
862 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
863
864 ubd_devs[unit].pdev.id = unit;
865 ubd_devs[unit].pdev.name = DRIVER_NAME;
866 ubd_devs[unit].pdev.dev.release = ubd_device_release;
867 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
868 platform_device_register(&ubd_devs[unit].pdev);
1da177e4 869
7d314e34 870 disk->private_data = &ubd_devs[unit];
62f96cb0 871 disk->queue = ubd_devs[unit].queue;
66638f16 872 return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
1da177e4
LT
873}
874
a43c8316 875#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
1da177e4 876
4e6da0fe
RW
877static const struct blk_mq_ops ubd_mq_ops = {
878 .queue_rq = ubd_queue_rq,
4e6da0fe
RW
879};
880
f28169d2 881static int ubd_add(int n, char **error_out)
1da177e4 882{
7d314e34 883 struct ubd *ubd_dev = &ubd_devs[n];
5d6789ce
CH
884 struct queue_limits lim = {
885 .max_segments = MAX_SG,
886 .seg_boundary_mask = PAGE_SIZE - 1,
887 };
35efb594 888 struct gendisk *disk;
f28169d2 889 int err = 0;
1da177e4 890
7d314e34 891 if(ubd_dev->file == NULL)
ec7cf783 892 goto out;
1da177e4 893
7d314e34 894 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
895 if(err < 0){
896 *error_out = "Couldn't determine size of device's file";
80c13749 897 goto out;
f28169d2 898 }
1da177e4 899
7d314e34 900 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 901
4e6da0fe
RW
902 ubd_dev->tag_set.ops = &ubd_mq_ops;
903 ubd_dev->tag_set.queue_depth = 64;
904 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
905 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
4e6da0fe
RW
906 ubd_dev->tag_set.driver_data = ubd_dev;
907 ubd_dev->tag_set.nr_hw_queues = 1;
a0044bdf 908
4e6da0fe
RW
909 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
910 if (err)
80c13749 911 goto out;
4e6da0fe 912
5d6789ce 913 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
35efb594
CH
914 if (IS_ERR(disk)) {
915 err = PTR_ERR(disk);
aea05eb5 916 goto out_cleanup_tags;
62f96cb0 917 }
35efb594 918 ubd_dev->queue = disk->queue;
4e6da0fe 919
f935a8ce 920 blk_queue_write_cache(ubd_dev->queue, true, false);
66638f16
LC
921 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
922 if (err)
923 goto out_cleanup_disk;
924
35efb594
CH
925 ubd_gendisk[n] = disk;
926 return 0;
62f96cb0 927
66638f16 928out_cleanup_disk:
8b9ab626 929 put_disk(disk);
4e6da0fe
RW
930out_cleanup_tags:
931 blk_mq_free_tag_set(&ubd_dev->tag_set);
35efb594
CH
932out:
933 return err;
1da177e4
LT
934}
935
f28169d2 936static int ubd_config(char *str, char **error_out)
1da177e4 937{
e7f6552f 938 int n, ret;
1da177e4 939
f28169d2
JD
940 /* This string is possibly broken up and stored, so it's only
941 * freed if ubd_setup_common fails, or if only general options
942 * were set.
943 */
970d6e3a 944 str = kstrdup(str, GFP_KERNEL);
e7f6552f 945 if (str == NULL) {
f28169d2
JD
946 *error_out = "Failed to allocate memory";
947 return -ENOMEM;
1da177e4 948 }
f28169d2
JD
949
950 ret = ubd_setup_common(str, &n, error_out);
951 if (ret)
e7f6552f 952 goto err_free;
f28169d2 953
e7f6552f
PBG
954 if (n == -1) {
955 ret = 0;
d8d7c28e 956 goto err_free;
1da177e4 957 }
1da177e4 958
dc764e50 959 mutex_lock(&ubd_lock);
f28169d2 960 ret = ubd_add(n, error_out);
e7f6552f 961 if (ret)
7d314e34 962 ubd_devs[n].file = NULL;
dc764e50 963 mutex_unlock(&ubd_lock);
1da177e4 964
e7f6552f 965out:
dc764e50 966 return ret;
e7f6552f
PBG
967
968err_free:
969 kfree(str);
970 goto out;
1da177e4
LT
971}
972
973static int ubd_get_config(char *name, char *str, int size, char **error_out)
974{
7d314e34 975 struct ubd *ubd_dev;
1da177e4
LT
976 int n, len = 0;
977
978 n = parse_unit(&name);
979 if((n >= MAX_DEV) || (n < 0)){
980 *error_out = "ubd_get_config : device number out of range";
dc764e50 981 return -1;
1da177e4
LT
982 }
983
7d314e34 984 ubd_dev = &ubd_devs[n];
d7fb2c38 985 mutex_lock(&ubd_lock);
1da177e4 986
7d314e34 987 if(ubd_dev->file == NULL){
1da177e4
LT
988 CONFIG_CHUNK(str, size, len, "", 1);
989 goto out;
990 }
991
7d314e34 992 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 993
7d314e34 994 if(ubd_dev->cow.file != NULL){
1da177e4 995 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 996 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
997 }
998 else CONFIG_CHUNK(str, size, len, "", 1);
999
1000 out:
d7fb2c38 1001 mutex_unlock(&ubd_lock);
dc764e50 1002 return len;
1da177e4
LT
1003}
1004
29d56cfe
JD
1005static int ubd_id(char **str, int *start_out, int *end_out)
1006{
dc764e50 1007 int n;
29d56cfe
JD
1008
1009 n = parse_unit(str);
dc764e50
JD
1010 *start_out = 0;
1011 *end_out = MAX_DEV - 1;
1012 return n;
29d56cfe
JD
1013}
1014
f28169d2 1015static int ubd_remove(int n, char **error_out)
1da177e4 1016{
2e3f5251 1017 struct gendisk *disk = ubd_gendisk[n];
7d314e34 1018 struct ubd *ubd_dev;
29d56cfe 1019 int err = -ENODEV;
1da177e4 1020
d7fb2c38 1021 mutex_lock(&ubd_lock);
1da177e4 1022
7d314e34 1023 ubd_dev = &ubd_devs[n];
1da177e4 1024
7d314e34 1025 if(ubd_dev->file == NULL)
29d56cfe 1026 goto out;
1da177e4 1027
29d56cfe
JD
1028 /* you cannot remove a open disk */
1029 err = -EBUSY;
7d314e34 1030 if(ubd_dev->count > 0)
1da177e4
LT
1031 goto out;
1032
dc764e50 1033 ubd_gendisk[n] = NULL;
b47d2deb
JD
1034 if(disk != NULL){
1035 del_gendisk(disk);
8b9ab626 1036 put_disk(disk);
1da177e4
LT
1037 }
1038
1da177e4 1039 err = 0;
2e3f5251 1040 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1041out:
d7fb2c38 1042 mutex_unlock(&ubd_lock);
29d56cfe 1043 return err;
1da177e4
LT
1044}
1045
f28169d2 1046/* All these are called by mconsole in process context and without
b8831a1d 1047 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1048 */
1da177e4 1049static struct mc_device ubd_mc = {
84f48d4f 1050 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1051 .name = "ubd",
1052 .config = ubd_config,
dc764e50 1053 .get_config = ubd_get_config,
29d56cfe 1054 .id = ubd_id,
1da177e4
LT
1055 .remove = ubd_remove,
1056};
1057
d8d7c28e 1058static int __init ubd_mc_init(void)
1da177e4
LT
1059{
1060 mconsole_register_dev(&ubd_mc);
1061 return 0;
1062}
1063
1064__initcall(ubd_mc_init);
1065
d8d7c28e
PBG
1066static int __init ubd0_init(void)
1067{
1068 struct ubd *ubd_dev = &ubd_devs[0];
1069
b8831a1d 1070 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1071 if(ubd_dev->file == NULL)
1072 ubd_dev->file = "root_fs";
b8831a1d
JD
1073 mutex_unlock(&ubd_lock);
1074
dc764e50 1075 return 0;
d8d7c28e
PBG
1076}
1077
1078__initcall(ubd0_init);
1079
b8831a1d 1080/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1081static struct platform_driver ubd_driver = {
1082 .driver = {
1083 .name = DRIVER_NAME,
1084 },
1da177e4
LT
1085};
1086
d8d7c28e 1087static int __init ubd_init(void)
1da177e4 1088{
f28169d2
JD
1089 char *error;
1090 int i, err;
1da177e4 1091
792dd4fc 1092 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1093 return -1;
1094
6da2ec56
KC
1095 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1096 sizeof(struct io_thread_req *),
1097 GFP_KERNEL
f88f0bdf
AI
1098 );
1099 irq_remainder = 0;
1100
1101 if (irq_req_buffer == NULL) {
1102 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1103 return -1;
1104 }
6da2ec56
KC
1105 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1106 sizeof(struct io_thread_req *),
1107 GFP_KERNEL
f88f0bdf
AI
1108 );
1109
1110 io_remainder = 0;
1111
1112 if (io_req_buffer == NULL) {
1113 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1114 return -1;
1115 }
3ae5eaec 1116 platform_driver_register(&ubd_driver);
dc764e50 1117 mutex_lock(&ubd_lock);
f28169d2
JD
1118 for (i = 0; i < MAX_DEV; i++){
1119 err = ubd_add(i, &error);
1120 if(err)
1121 printk(KERN_ERR "Failed to initialize ubd device %d :"
1122 "%s\n", i, error);
1123 }
dc764e50 1124 mutex_unlock(&ubd_lock);
1da177e4
LT
1125 return 0;
1126}
1127
1128late_initcall(ubd_init);
1129
d8d7c28e 1130static int __init ubd_driver_init(void){
91acb21f
JD
1131 unsigned long stack;
1132 int err;
1133
1134 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1135 if(global_openflags.s){
1136 printk(KERN_INFO "ubd: Synchronous mode\n");
1137 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1138 * enough. So use anyway the io thread. */
1139 }
2fcb4090 1140 stack = alloc_stack(0, 0);
558f9b2f 1141 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
91acb21f 1142 if(io_pid < 0){
6c29256c 1143 printk(KERN_ERR
91acb21f
JD
1144 "ubd : Failed to start I/O thread (errno = %d) - "
1145 "falling back to synchronous I/O\n", -io_pid);
1146 io_pid = -1;
dc764e50 1147 return 0;
91acb21f 1148 }
6c29256c 1149 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1150 0, "ubd", ubd_devs);
36d46a59 1151 if(err < 0)
91acb21f 1152 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1153 return 0;
91acb21f
JD
1154}
1155
1156device_initcall(ubd_driver_init);
1157
05bdb996 1158static int ubd_open(struct gendisk *disk, blk_mode_t mode)
1da177e4 1159{
7d314e34 1160 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
1161 int err = 0;
1162
9a181c58 1163 mutex_lock(&ubd_mutex);
7d314e34
PBG
1164 if(ubd_dev->count == 0){
1165 err = ubd_open_dev(ubd_dev);
1da177e4
LT
1166 if(err){
1167 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 1168 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
1169 goto out;
1170 }
1171 }
7d314e34
PBG
1172 ubd_dev->count++;
1173 set_disk_ro(disk, !ubd_dev->openflags.w);
6e9624b8 1174out:
9a181c58 1175 mutex_unlock(&ubd_mutex);
dc764e50 1176 return err;
1da177e4
LT
1177}
1178
ae220766 1179static void ubd_release(struct gendisk *disk)
1da177e4 1180{
7d314e34 1181 struct ubd *ubd_dev = disk->private_data;
1da177e4 1182
9a181c58 1183 mutex_lock(&ubd_mutex);
7d314e34 1184 if(--ubd_dev->count == 0)
5f75a4f8 1185 ubd_close_dev(ubd_dev);
9a181c58 1186 mutex_unlock(&ubd_mutex);
1da177e4
LT
1187}
1188
91acb21f
JD
1189static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1190 __u64 *cow_offset, unsigned long *bitmap,
1191 __u64 bitmap_offset, unsigned long *bitmap_words,
1192 __u64 bitmap_len)
1da177e4 1193{
a43c8316 1194 __u64 sector = io_offset >> SECTOR_SHIFT;
91acb21f
JD
1195 int i, update_bitmap = 0;
1196
a43c8316 1197 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
91acb21f
JD
1198 if(cow_mask != NULL)
1199 ubd_set_bit(i, (unsigned char *) cow_mask);
1200 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1201 continue;
1da177e4 1202
91acb21f
JD
1203 update_bitmap = 1;
1204 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1205 }
1206
1207 if(!update_bitmap)
1208 return;
1da177e4 1209
91acb21f 1210 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1211
91acb21f
JD
1212 /* This takes care of the case where we're exactly at the end of the
1213 * device, and *cow_offset + 1 is off the end. So, just back it up
1214 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1215 * for the original diagnosis.
1216 */
6d074242
JO
1217 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1218 sizeof(unsigned long)) - 1))
91acb21f
JD
1219 (*cow_offset)--;
1220
1221 bitmap_words[0] = bitmap[*cow_offset];
1222 bitmap_words[1] = bitmap[*cow_offset + 1];
1223
1224 *cow_offset *= sizeof(unsigned long);
1225 *cow_offset += bitmap_offset;
1226}
1227
fc6b6a87
GKB
1228static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1229 unsigned long offset, unsigned long *bitmap,
91acb21f
JD
1230 __u64 bitmap_offset, __u64 bitmap_len)
1231{
fc6b6a87 1232 __u64 sector = offset >> SECTOR_SHIFT;
91acb21f
JD
1233 int i;
1234
fc6b6a87 1235 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
91acb21f
JD
1236 panic("Operation too long");
1237
a43c8316 1238 if (req_op(req->req) == REQ_OP_READ) {
fc6b6a87 1239 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
91acb21f 1240 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1241 ubd_set_bit(i, (unsigned char *)
fc6b6a87
GKB
1242 &segment->sector_mask);
1243 }
1244 } else {
1245 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1246 &segment->cow_offset, bitmap, bitmap_offset,
1247 segment->bitmap_words, bitmap_len);
1248 }
1249}
1250
1251static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1252 struct request *req)
1253{
1254 struct bio_vec bvec;
1255 struct req_iterator iter;
1256 int i = 0;
1257 unsigned long byte_offset = io_req->offset;
7ee1de6e 1258 enum req_op op = req_op(req);
fc6b6a87
GKB
1259
1260 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1261 io_req->io_desc[0].buffer = NULL;
1262 io_req->io_desc[0].length = blk_rq_bytes(req);
1263 } else {
1264 rq_for_each_segment(bvec, req, iter) {
1265 BUG_ON(i >= io_req->desc_cnt);
1266
25d84545 1267 io_req->io_desc[i].buffer = bvec_virt(&bvec);
fc6b6a87
GKB
1268 io_req->io_desc[i].length = bvec.bv_len;
1269 i++;
1270 }
1271 }
1272
1273 if (dev->cow.file) {
1274 for (i = 0; i < io_req->desc_cnt; i++) {
1275 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1276 dev->cow.bitmap, dev->cow.bitmap_offset,
1277 dev->cow.bitmap_len);
1278 byte_offset += io_req->io_desc[i].length;
dc764e50 1279 }
fc6b6a87 1280
91acb21f 1281 }
1da177e4
LT
1282}
1283
fc6b6a87
GKB
1284static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1285 int desc_cnt)
1da177e4 1286{
ecb0a83e 1287 struct io_thread_req *io_req;
fc6b6a87 1288 int i;
1da177e4 1289
fc6b6a87
GKB
1290 io_req = kmalloc(sizeof(*io_req) +
1291 (desc_cnt * sizeof(struct io_desc)),
1292 GFP_ATOMIC);
ecb0a83e 1293 if (!io_req)
fc6b6a87 1294 return NULL;
805f11a0
RW
1295
1296 io_req->req = req;
ecb0a83e
CH
1297 if (dev->cow.file)
1298 io_req->fds[0] = dev->cow.fd;
1299 else
1300 io_req->fds[0] = dev->fd;
0033dfd9 1301 io_req->error = 0;
53766def
AI
1302 io_req->sectorsize = SECTOR_SIZE;
1303 io_req->fds[1] = dev->fd;
fc6b6a87 1304 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
53766def
AI
1305 io_req->offsets[0] = 0;
1306 io_req->offsets[1] = dev->cow.data_offset;
1307
fc6b6a87
GKB
1308 for (i = 0 ; i < desc_cnt; i++) {
1309 io_req->io_desc[i].sector_mask = 0;
1310 io_req->io_desc[i].cow_offset = -1;
1311 }
1312
1313 return io_req;
1314}
1315
1316static int ubd_submit_request(struct ubd *dev, struct request *req)
1317{
1318 int segs = 0;
1319 struct io_thread_req *io_req;
1320 int ret;
7ee1de6e 1321 enum req_op op = req_op(req);
fc6b6a87
GKB
1322
1323 if (op == REQ_OP_FLUSH)
1324 segs = 0;
1325 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1326 segs = 1;
1327 else
1328 segs = blk_rq_nr_phys_segments(req);
1329
1330 io_req = ubd_alloc_req(dev, req, segs);
1331 if (!io_req)
1332 return -ENOMEM;
1333
1334 io_req->desc_cnt = segs;
1335 if (segs)
1336 ubd_map_req(dev, io_req, req);
53766def 1337
ecb0a83e
CH
1338 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1339 if (ret != sizeof(io_req)) {
1340 if (ret != -EAGAIN)
1341 pr_err("write to io thread failed: %d\n", -ret);
bc1d72e7 1342 kfree(io_req);
bc1d72e7 1343 }
ecb0a83e 1344 return ret;
bc1d72e7
RW
1345}
1346
4e6da0fe
RW
1347static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1348 const struct blk_mq_queue_data *bd)
1da177e4 1349{
6961cd4d 1350 struct ubd *ubd_dev = hctx->queue->queuedata;
4e6da0fe 1351 struct request *req = bd->rq;
53766def 1352 int ret = 0, res = BLK_STS_OK;
a0044bdf 1353
4e6da0fe
RW
1354 blk_mq_start_request(req);
1355
6961cd4d
JA
1356 spin_lock_irq(&ubd_dev->lock);
1357
53766def 1358 switch (req_op(req)) {
53766def 1359 case REQ_OP_FLUSH:
53766def
AI
1360 case REQ_OP_READ:
1361 case REQ_OP_WRITE:
50109b5a
AI
1362 case REQ_OP_DISCARD:
1363 case REQ_OP_WRITE_ZEROES:
fc6b6a87 1364 ret = ubd_submit_request(ubd_dev, req);
50109b5a 1365 break;
53766def
AI
1366 default:
1367 WARN_ON_ONCE(1);
1368 res = BLK_STS_NOTSUPP;
4e6da0fe 1369 }
53766def 1370
6961cd4d
JA
1371 spin_unlock_irq(&ubd_dev->lock);
1372
d848074b
AI
1373 if (ret < 0) {
1374 if (ret == -ENOMEM)
1375 res = BLK_STS_RESOURCE;
1376 else
1377 res = BLK_STS_DEV_RESOURCE;
1378 }
6961cd4d 1379
53766def 1380 return res;
1da177e4
LT
1381}
1382
a885c8c4
CH
1383static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1384{
7d314e34 1385 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1386
1387 geo->heads = 128;
1388 geo->sectors = 32;
7d314e34 1389 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1390 return 0;
1391}
1392
05bdb996 1393static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4
LT
1394 unsigned int cmd, unsigned long arg)
1395{
a625c998 1396 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1397 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1398
1399 switch (cmd) {
1da177e4 1400 struct cdrom_volctrl volume;
1da177e4 1401 case HDIO_GET_IDENTITY:
73855e13
BZ
1402 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1403 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1404 ubd_id[ATA_ID_HEADS] = 128;
1405 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1406 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1407 sizeof(ubd_id)))
dc764e50
JD
1408 return -EFAULT;
1409 return 0;
b8831a1d 1410
1da177e4
LT
1411 case CDROMVOLREAD:
1412 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1413 return -EFAULT;
1da177e4
LT
1414 volume.channel0 = 255;
1415 volume.channel1 = 255;
1416 volume.channel2 = 255;
1417 volume.channel3 = 255;
1418 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1419 return -EFAULT;
1420 return 0;
1da177e4 1421 }
dc764e50 1422 return -EINVAL;
1da177e4
LT
1423}
1424
a43c8316
AI
1425static int map_error(int error_code)
1426{
1427 switch (error_code) {
1428 case 0:
1429 return BLK_STS_OK;
1430 case ENOSYS:
1431 case EOPNOTSUPP:
1432 return BLK_STS_NOTSUPP;
1433 case ENOSPC:
1434 return BLK_STS_NOSPC;
1435 }
1436 return BLK_STS_IOERR;
1437}
1438
a41421ed
AI
1439/*
1440 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1441 *
1442 * The following functions are part of UML hypervisor code.
1443 * All functions from here onwards are executed as a helper
1444 * thread and are not allowed to execute any kernel functions.
1445 *
1446 * Any communication must occur strictly via shared memory and IPC.
1447 *
1448 * Do not add printks, locks, kernel memory operations, etc - it
1449 * will result in unpredictable behaviour and/or crashes.
1450 */
1451
fc6b6a87 1452static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1da177e4 1453{
91acb21f 1454 int n;
1da177e4 1455
fc6b6a87 1456 if (segment->cow_offset == -1)
a43c8316 1457 return map_error(0);
1da177e4 1458
fc6b6a87
GKB
1459 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1460 sizeof(segment->bitmap_words), segment->cow_offset);
1461 if (n != sizeof(segment->bitmap_words))
a43c8316 1462 return map_error(-n);
1da177e4 1463
a43c8316 1464 return map_error(0);
91acb21f 1465}
1da177e4 1466
fc6b6a87 1467static void do_io(struct io_thread_req *req, struct io_desc *desc)
91acb21f 1468{
50109b5a 1469 char *buf = NULL;
91acb21f
JD
1470 unsigned long len;
1471 int n, nsectors, start, end, bit;
91acb21f
JD
1472 __u64 off;
1473
50109b5a
AI
1474 /* FLUSH is really a special case, we cannot "case" it with others */
1475
a43c8316 1476 if (req_op(req->req) == REQ_OP_FLUSH) {
805f11a0 1477 /* fds[0] is always either the rw image or our cow file */
a41421ed 1478 req->error = map_error(-os_sync_file(req->fds[0]));
805f11a0
RW
1479 return;
1480 }
1481
fc6b6a87 1482 nsectors = desc->length / req->sectorsize;
91acb21f
JD
1483 start = 0;
1484 do {
fc6b6a87 1485 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
91acb21f
JD
1486 end = start;
1487 while((end < nsectors) &&
fc6b6a87 1488 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
91acb21f
JD
1489 end++;
1490
1491 off = req->offset + req->offsets[bit] +
1492 start * req->sectorsize;
1493 len = (end - start) * req->sectorsize;
fc6b6a87
GKB
1494 if (desc->buffer != NULL)
1495 buf = &desc->buffer[start * req->sectorsize];
91acb21f 1496
50109b5a
AI
1497 switch (req_op(req->req)) {
1498 case REQ_OP_READ:
91acb21f
JD
1499 n = 0;
1500 do {
1501 buf = &buf[n];
1502 len -= n;
8c6157b6 1503 n = os_pread_file(req->fds[bit], buf, len, off);
50109b5a 1504 if (n < 0) {
a43c8316 1505 req->error = map_error(-n);
91acb21f
JD
1506 return;
1507 }
1508 } while((n < len) && (n != 0));
1509 if (n < len) memset(&buf[n], 0, len - n);
50109b5a
AI
1510 break;
1511 case REQ_OP_WRITE:
8c6157b6 1512 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f 1513 if(n != len){
a43c8316 1514 req->error = map_error(-n);
91acb21f
JD
1515 return;
1516 }
50109b5a
AI
1517 break;
1518 case REQ_OP_DISCARD:
50109b5a
AI
1519 n = os_falloc_punch(req->fds[bit], off, len);
1520 if (n) {
1521 req->error = map_error(-n);
1522 return;
1523 }
1524 break;
d2a0a616
FD
1525 case REQ_OP_WRITE_ZEROES:
1526 n = os_falloc_zeroes(req->fds[bit], off, len);
1527 if (n) {
1528 req->error = map_error(-n);
1529 return;
1530 }
1531 break;
50109b5a
AI
1532 default:
1533 WARN_ON_ONCE(1);
1534 req->error = BLK_STS_NOTSUPP;
1535 return;
91acb21f
JD
1536 }
1537
1538 start = end;
1539 } while(start < nsectors);
1da177e4 1540
fc6b6a87
GKB
1541 req->offset += len;
1542 req->error = update_bitmap(req, desc);
1da177e4 1543}
91acb21f
JD
1544
1545/* Changed in start_io_thread, which is serialized by being called only
1546 * from ubd_init, which is an initcall.
1547 */
1548int kernel_fd = -1;
1549
d8d7c28e 1550/* Only changed by the io thread. XXX: currently unused. */
4dc5a328 1551static int io_count;
91acb21f
JD
1552
1553int io_thread(void *arg)
1554{
f88f0bdf 1555 int n, count, written, res;
91acb21f 1556
91d44ff8
RW
1557 os_fix_helper_signals();
1558
91acb21f 1559 while(1){
f88f0bdf
AI
1560 n = bulk_req_safe_read(
1561 kernel_fd,
1562 io_req_buffer,
1563 &io_remainder,
1564 &io_remainder_size,
1565 UBD_REQ_BUFFER_SIZE
1566 );
e355b2f5
GKB
1567 if (n <= 0) {
1568 if (n == -EAGAIN)
f88f0bdf 1569 ubd_read_poll(-1);
e355b2f5
GKB
1570
1571 continue;
91acb21f 1572 }
f88f0bdf
AI
1573
1574 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
fc6b6a87
GKB
1575 struct io_thread_req *req = (*io_req_buffer)[count];
1576 int i;
1577
f88f0bdf 1578 io_count++;
fc6b6a87
GKB
1579 for (i = 0; !req->error && i < req->desc_cnt; i++)
1580 do_io(req, &(req->io_desc[i]));
1581
f88f0bdf
AI
1582 }
1583
1584 written = 0;
1585
1586 do {
6e682d53
GKB
1587 res = os_write_file(kernel_fd,
1588 ((char *) io_req_buffer) + written,
1589 n - written);
ff6a1798 1590 if (res >= 0) {
f88f0bdf 1591 written += res;
f88f0bdf
AI
1592 }
1593 if (written < n) {
1594 ubd_write_poll(-1);
1595 }
1596 } while (written < n);
91acb21f 1597 }
91acb21f 1598
1b57e9c2
JD
1599 return 0;
1600}