Merge tag 'soc-drivers-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
dbddf429 1// SPDX-License-Identifier: GPL-2.0
6c29256c 2/*
a41421ed 3 * Copyright (C) 2018 Cambridge Greys Ltd
f88f0bdf 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
1da177e4 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
1da177e4
LT
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
1da177e4
LT
22#define UBD_SHIFT 4
23
8ea3c06a
AV
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
4e6da0fe 27#include <linux/blk-mq.h>
8ea3c06a
AV
28#include <linux/ata.h>
29#include <linux/hdreg.h>
b81e0c23 30#include <linux/major.h>
8ea3c06a
AV
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <asm/tlbflush.h>
37185b33 40#include <kern_util.h>
1da177e4 41#include "mconsole_kern.h"
37185b33
AV
42#include <init.h>
43#include <irq_kern.h>
8ea3c06a 44#include "ubd.h"
37185b33 45#include <os.h>
1da177e4
LT
46#include "cow.h"
47
a43c8316
AI
48/* Max request size is determined by sector mask - 32K */
49#define UBD_MAX_REQUEST (8 * sizeof(long))
1da177e4 50
fc6b6a87
GKB
51struct io_desc {
52 char *buffer;
53 unsigned long length;
54 unsigned long sector_mask;
55 unsigned long long cow_offset;
56 unsigned long bitmap_words[2];
57};
58
1da177e4 59struct io_thread_req {
62f96cb0 60 struct request *req;
1da177e4
LT
61 int fds[2];
62 unsigned long offsets[2];
63 unsigned long long offset;
1da177e4 64 int sectorsize;
1da177e4 65 int error;
fc6b6a87
GKB
66
67 int desc_cnt;
68 /* io_desc has to be the last element of the struct */
69 struct io_desc io_desc[];
1da177e4
LT
70};
71
f88f0bdf
AI
72
73static struct io_thread_req * (*irq_req_buffer)[];
74static struct io_thread_req *irq_remainder;
75static int irq_remainder_size;
76
77static struct io_thread_req * (*io_req_buffer)[];
78static struct io_thread_req *io_remainder;
79static int io_remainder_size;
80
81
82
91acb21f 83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
84{
85 __u64 n;
86 int bits, off;
87
91acb21f 88 bits = sizeof(data[0]) * 8;
1da177e4
LT
89 n = bit / bits;
90 off = bit % bits;
dc764e50 91 return (data[n] & (1 << off)) != 0;
1da177e4
LT
92}
93
91acb21f 94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
95{
96 __u64 n;
97 int bits, off;
98
91acb21f 99 bits = sizeof(data[0]) * 8;
1da177e4
LT
100 n = bit / bits;
101 off = bit % bits;
91acb21f 102 data[n] |= (1 << off);
1da177e4
LT
103}
104/*End stuff from ubd_user.h*/
105
106#define DRIVER_NAME "uml-blkdev"
107
d7fb2c38 108static DEFINE_MUTEX(ubd_lock);
9a181c58 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
1da177e4 110
05bdb996 111static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4 112 unsigned int cmd, unsigned long arg);
a885c8c4 113static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 114
97d88ac8 115#define MAX_DEV (16)
1da177e4 116
83d5cde4 117static const struct block_device_operations ubd_blops = {
1da177e4 118 .owner = THIS_MODULE,
a625c998 119 .ioctl = ubd_ioctl,
ab0cf1e4 120 .compat_ioctl = blkdev_compat_ptr_ioctl,
a885c8c4 121 .getgeo = ubd_getgeo,
1da177e4
LT
122};
123
1da177e4
LT
124#ifdef CONFIG_BLK_DEV_UBD_SYNC
125#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
126 .cl = 1 })
127#else
128#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
129 .cl = 1 })
130#endif
1da177e4
LT
131static struct openflags global_openflags = OPEN_FLAGS;
132
133struct cow {
2a9d32f6 134 /* backing file name */
1da177e4 135 char *file;
2a9d32f6 136 /* backing file fd */
1da177e4
LT
137 int fd;
138 unsigned long *bitmap;
139 unsigned long bitmap_len;
140 int bitmap_offset;
dc764e50 141 int data_offset;
1da177e4
LT
142};
143
a0044bdf
JD
144#define MAX_SG 64
145
1da177e4 146struct ubd {
2a9d32f6
PBG
147 /* name (and fd, below) of the file opened for writing, either the
148 * backing or the cow file. */
1da177e4 149 char *file;
ef3ba87c 150 char *serial;
1da177e4
LT
151 int fd;
152 __u64 size;
153 struct openflags boot_openflags;
154 struct openflags openflags;
84e945e3
PBG
155 unsigned shared:1;
156 unsigned no_cow:1;
50109b5a 157 unsigned no_trim:1;
1da177e4
LT
158 struct cow cow;
159 struct platform_device pdev;
32621ad7 160 struct gendisk *disk;
4e6da0fe 161 struct blk_mq_tag_set tag_set;
62f96cb0 162 spinlock_t lock;
4e6da0fe
RW
163};
164
1da177e4
LT
165#define DEFAULT_COW { \
166 .file = NULL, \
dc764e50
JD
167 .fd = -1, \
168 .bitmap = NULL, \
1da177e4 169 .bitmap_offset = 0, \
dc764e50 170 .data_offset = 0, \
1da177e4
LT
171}
172
173#define DEFAULT_UBD { \
174 .file = NULL, \
ef3ba87c 175 .serial = NULL, \
1da177e4
LT
176 .fd = -1, \
177 .size = -1, \
178 .boot_openflags = OPEN_FLAGS, \
179 .openflags = OPEN_FLAGS, \
dc764e50 180 .no_cow = 0, \
50109b5a 181 .no_trim = 0, \
6c29256c 182 .shared = 0, \
dc764e50 183 .cow = DEFAULT_COW, \
22e65004 184 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
1da177e4
LT
185}
186
b8831a1d 187/* Protected by ubd_lock */
5dc62b1b 188static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 189
4e6da0fe
RW
190static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
191 const struct blk_mq_queue_data *bd);
4e6da0fe 192
1da177e4
LT
193static int fake_ide_setup(char *str)
194{
7eb90f7e 195 pr_warn("The fake_ide option has been removed\n");
dc764e50 196 return 1;
1da177e4 197}
1da177e4
LT
198__setup("fake_ide", fake_ide_setup);
199
200__uml_help(fake_ide_setup,
201"fake_ide\n"
7eb90f7e 202" Obsolete stub.\n\n"
1da177e4
LT
203);
204
205static int parse_unit(char **ptr)
206{
207 char *str = *ptr, *end;
208 int n = -1;
209
210 if(isdigit(*str)) {
211 n = simple_strtoul(str, &end, 0);
212 if(end == str)
dc764e50 213 return -1;
1da177e4
LT
214 *ptr = end;
215 }
97d88ac8 216 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
217 n = *str - 'a';
218 str++;
219 *ptr = str;
220 }
dc764e50 221 return n;
1da177e4
LT
222}
223
d8d7c28e
PBG
224/* If *index_out == -1 at exit, the passed option was a general one;
225 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
226 * should not be freed on exit.
227 */
f28169d2 228static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 229{
7d314e34 230 struct ubd *ubd_dev;
1da177e4 231 struct openflags flags = global_openflags;
ef3ba87c 232 char *file, *backing_file, *serial;
b8831a1d 233 int n, err = 0, i;
1da177e4
LT
234
235 if(index_out) *index_out = -1;
236 n = *str;
237 if(n == '='){
1da177e4 238 str++;
1da177e4
LT
239 if(!strcmp(str, "sync")){
240 global_openflags = of_sync(global_openflags);
9ca55299 241 return err;
1da177e4 242 }
b8831a1d 243
7eb90f7e
CH
244 pr_warn("fake major not supported any more\n");
245 return 0;
1da177e4
LT
246 }
247
248 n = parse_unit(&str);
249 if(n < 0){
f28169d2
JD
250 *error_out = "Couldn't parse device number";
251 return -EINVAL;
1da177e4
LT
252 }
253 if(n >= MAX_DEV){
f28169d2
JD
254 *error_out = "Device number out of range";
255 return 1;
1da177e4
LT
256 }
257
f28169d2 258 err = -EBUSY;
d7fb2c38 259 mutex_lock(&ubd_lock);
1da177e4 260
7d314e34
PBG
261 ubd_dev = &ubd_devs[n];
262 if(ubd_dev->file != NULL){
f28169d2 263 *error_out = "Device is already configured";
1da177e4
LT
264 goto out;
265 }
266
267 if (index_out)
268 *index_out = n;
269
f28169d2 270 err = -EINVAL;
50109b5a 271 for (i = 0; i < sizeof("rscdt="); i++) {
1da177e4
LT
272 switch (*str) {
273 case 'r':
274 flags.w = 0;
275 break;
276 case 's':
277 flags.s = 1;
278 break;
279 case 'd':
7d314e34 280 ubd_dev->no_cow = 1;
1da177e4 281 break;
6c29256c 282 case 'c':
7d314e34 283 ubd_dev->shared = 1;
6c29256c 284 break;
50109b5a
AI
285 case 't':
286 ubd_dev->no_trim = 1;
287 break;
1da177e4
LT
288 case '=':
289 str++;
290 goto break_loop;
291 default:
f28169d2 292 *error_out = "Expected '=' or flag letter "
50109b5a 293 "(r, s, c, t or d)";
1da177e4
LT
294 goto out;
295 }
296 str++;
297 }
298
f28169d2
JD
299 if (*str == '=')
300 *error_out = "Too many flags specified";
301 else
302 *error_out = "Missing '='";
1da177e4
LT
303 goto out;
304
305break_loop:
ef3ba87c
CO
306 file = strsep(&str, ",:");
307 if (*file == '\0')
308 file = NULL;
1da177e4 309
ef3ba87c 310 backing_file = strsep(&str, ",:");
94c41b3a 311 if (backing_file && *backing_file == '\0')
ef3ba87c 312 backing_file = NULL;
1da177e4 313
ef3ba87c 314 serial = strsep(&str, ",:");
94c41b3a 315 if (serial && *serial == '\0')
ef3ba87c
CO
316 serial = NULL;
317
318 if (backing_file && ubd_dev->no_cow) {
319 *error_out = "Can't specify both 'd' and a cow file";
320 goto out;
1da177e4 321 }
ef3ba87c 322
f28169d2 323 err = 0;
ef3ba87c 324 ubd_dev->file = file;
7d314e34 325 ubd_dev->cow.file = backing_file;
ef3ba87c 326 ubd_dev->serial = serial;
7d314e34 327 ubd_dev->boot_openflags = flags;
1da177e4 328out:
d7fb2c38 329 mutex_unlock(&ubd_lock);
f28169d2 330 return err;
1da177e4
LT
331}
332
333static int ubd_setup(char *str)
334{
f28169d2
JD
335 char *error;
336 int err;
337
338 err = ubd_setup_common(str, NULL, &error);
339 if(err)
340 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
341 "%s\n", str, error);
342 return 1;
1da177e4
LT
343}
344
345__setup("ubd", ubd_setup);
346__uml_help(ubd_setup,
ef3ba87c 347"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
1da177e4
LT
348" This is used to associate a device with a file in the underlying\n"
349" filesystem. When specifying two filenames, the first one is the\n"
350" COW name and the second is the backing file name. As separator you can\n"
351" use either a ':' or a ',': the first one allows writing things like;\n"
352" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
353" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 354" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
355" a COW file or a backing file. To override this detection, add the 'd'\n"
356" flag:\n"
357" ubd0d=BackingFile\n"
358" Usually, there is a filesystem in the file, but \n"
359" that's not required. Swap devices containing swap files can be\n"
360" specified like this. Also, a file which doesn't contain a\n"
361" filesystem can have its contents read in the virtual \n"
362" machine by running 'dd' on the device. <n> must be in the range\n"
363" 0 to 7. Appending an 'r' to the number will cause that device\n"
364" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
365" an 's' will cause data to be written to disk on the host immediately.\n"
366" 'c' will cause the device to be treated as being shared between multiple\n"
367" UMLs and file locking will be turned off - this is appropriate for a\n"
368" cluster filesystem and inappropriate at almost all other times.\n\n"
50109b5a 369" 't' will disable trim/discard support on the device (enabled by default).\n\n"
ef3ba87c
CO
370" An optional device serial number can be exposed using the serial parameter\n"
371" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
372" useful when a unique number should be given to the device. Note when\n"
373" specifying a label, the filename2 must be also presented. It can be\n"
374" an empty string, in which case the backing file is not used:\n"
375" ubd0=File,,Serial\n"
1da177e4
LT
376);
377
8299ca5c 378static int udb_setup(char *str)
1da177e4
LT
379{
380 printk("udb%s specified on command line is almost certainly a ubd -> "
381 "udb TYPO\n", str);
dc764e50 382 return 1;
1da177e4
LT
383}
384
385__setup("udb", udb_setup);
386__uml_help(udb_setup,
387"udb\n"
0894e27e
JD
388" This option is here solely to catch ubd -> udb typos, which can be\n"
389" to impossible to catch visually unless you specifically look for\n"
390" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
391" in the boot output.\n\n"
392);
393
91acb21f 394/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 395static int thread_fd = -1;
a0044bdf 396
f88f0bdf
AI
397/* Function to read several request pointers at a time
398* handling fractional reads if (and as) needed
399*/
400
401static int bulk_req_safe_read(
402 int fd,
403 struct io_thread_req * (*request_buffer)[],
404 struct io_thread_req **remainder,
405 int *remainder_size,
406 int max_recs
407 )
408{
409 int n = 0;
410 int res = 0;
411
412 if (*remainder_size > 0) {
413 memmove(
414 (char *) request_buffer,
415 (char *) remainder, *remainder_size
416 );
417 n = *remainder_size;
418 }
419
420 res = os_read_file(
421 fd,
422 ((char *) request_buffer) + *remainder_size,
423 sizeof(struct io_thread_req *)*max_recs
424 - *remainder_size
425 );
426 if (res > 0) {
427 n += res;
428 if ((n % sizeof(struct io_thread_req *)) > 0) {
429 /*
430 * Read somehow returned not a multiple of dword
431 * theoretically possible, but never observed in the
432 * wild, so read routine must be able to handle it
433 */
434 *remainder_size = n % sizeof(struct io_thread_req *);
435 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
436 memmove(
437 remainder,
438 ((char *) request_buffer) +
439 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
440 *remainder_size
441 );
442 n = n - *remainder_size;
443 }
444 } else {
445 n = res;
446 }
447 return n;
448}
449
62f96cb0 450/* Called without dev->lock held, and only in interrupt context. */
91acb21f 451static void ubd_handler(void)
1da177e4 452{
91acb21f 453 int n;
f88f0bdf 454 int count;
91acb21f 455
a0044bdf 456 while(1){
f88f0bdf
AI
457 n = bulk_req_safe_read(
458 thread_fd,
459 irq_req_buffer,
460 &irq_remainder,
461 &irq_remainder_size,
462 UBD_REQ_BUFFER_SIZE
463 );
464 if (n < 0) {
a0044bdf
JD
465 if(n == -EAGAIN)
466 break;
467 printk(KERN_ERR "spurious interrupt in ubd_handler, "
468 "err = %d\n", -n);
469 return;
470 }
f88f0bdf 471 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
4e6da0fe 472 struct io_thread_req *io_req = (*irq_req_buffer)[count];
4e6da0fe 473
50109b5a
AI
474 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
475 blk_queue_max_discard_sectors(io_req->req->q, 0);
476 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
50109b5a 477 }
fc6b6a87 478 blk_mq_end_request(io_req->req, io_req->error);
4e6da0fe 479 kfree(io_req);
f88f0bdf 480 }
a0044bdf 481 }
1da177e4
LT
482}
483
7bea96fd 484static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 485{
91acb21f 486 ubd_handler();
dc764e50 487 return IRQ_HANDLED;
91acb21f 488}
09ace81c 489
91acb21f
JD
490/* Only changed by ubd_init, which is an initcall. */
491static int io_pid = -1;
09ace81c 492
5dc62b1b 493static void kill_io_thread(void)
91acb21f 494{
6c29256c 495 if(io_pid != -1)
91acb21f 496 os_kill_process(io_pid, 1);
09ace81c 497}
1da177e4 498
91acb21f
JD
499__uml_exitcall(kill_io_thread);
500
d8d7c28e 501static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
502{
503 char *file;
85356398
RW
504 int fd;
505 int err;
506
507 __u32 version;
508 __u32 align;
509 char *backing_file;
853bc0ab 510 time64_t mtime;
85356398
RW
511 unsigned long long size;
512 int sector_size;
513 int bitmap_offset;
514
515 if (ubd_dev->file && ubd_dev->cow.file) {
516 file = ubd_dev->cow.file;
517
518 goto out;
519 }
520
d4afcba9 521 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
522 if (fd < 0)
523 return fd;
524
525 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
526 &mtime, &size, &sector_size, &align, &bitmap_offset);
527 os_close_file(fd);
1da177e4 528
85356398
RW
529 if(err == -EINVAL)
530 file = ubd_dev->file;
531 else
532 file = backing_file;
533
534out:
dc764e50 535 return os_file_size(file, size_out);
1da177e4
LT
536}
537
5dc62b1b
WC
538static int read_cow_bitmap(int fd, void *buf, int offset, int len)
539{
540 int err;
541
8c6157b6 542 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
543 if (err < 0)
544 return err;
545
546 return 0;
547}
548
853bc0ab 549static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
5dc62b1b 550{
853bc0ab 551 time64_t modtime;
5dc62b1b
WC
552 unsigned long long actual;
553 int err;
554
555 err = os_file_modtime(file, &modtime);
556 if (err < 0) {
557 printk(KERN_ERR "Failed to get modification time of backing "
558 "file \"%s\", err = %d\n", file, -err);
559 return err;
560 }
561
562 err = os_file_size(file, &actual);
563 if (err < 0) {
564 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
565 "err = %d\n", file, -err);
566 return err;
567 }
568
569 if (actual != size) {
570 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
571 * the typecast.*/
572 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
573 "vs backing file\n", (unsigned long long) size, actual);
574 return -EINVAL;
575 }
576 if (modtime != mtime) {
853bc0ab 577 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
5dc62b1b
WC
578 "backing file\n", mtime, modtime);
579 return -EINVAL;
580 }
581 return 0;
582}
583
584static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
585{
586 struct uml_stat buf1, buf2;
587 int err;
588
589 if (from_cmdline == NULL)
590 return 0;
591 if (!strcmp(from_cmdline, from_cow))
592 return 0;
593
594 err = os_stat_file(from_cmdline, &buf1);
595 if (err < 0) {
596 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
597 -err);
598 return 0;
599 }
600 err = os_stat_file(from_cow, &buf2);
601 if (err < 0) {
602 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
603 -err);
604 return 1;
605 }
606 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
607 return 0;
608
609 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
610 "\"%s\" specified in COW header of \"%s\"\n",
611 from_cmdline, from_cow, cow);
612 return 1;
613}
614
615static int open_ubd_file(char *file, struct openflags *openflags, int shared,
616 char **backing_file_out, int *bitmap_offset_out,
617 unsigned long *bitmap_len_out, int *data_offset_out,
618 int *create_cow_out)
619{
853bc0ab 620 time64_t mtime;
5dc62b1b
WC
621 unsigned long long size;
622 __u32 version, align;
623 char *backing_file;
624 int fd, err, sectorsize, asked_switch, mode = 0644;
625
626 fd = os_open_file(file, *openflags, mode);
627 if (fd < 0) {
628 if ((fd == -ENOENT) && (create_cow_out != NULL))
629 *create_cow_out = 1;
630 if (!openflags->w ||
631 ((fd != -EROFS) && (fd != -EACCES)))
632 return fd;
633 openflags->w = 0;
634 fd = os_open_file(file, *openflags, mode);
635 if (fd < 0)
636 return fd;
637 }
638
639 if (shared)
640 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
641 else {
642 err = os_lock_file(fd, openflags->w);
643 if (err < 0) {
644 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
645 file, -err);
646 goto out_close;
647 }
648 }
649
650 /* Successful return case! */
651 if (backing_file_out == NULL)
652 return fd;
653
654 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
655 &size, &sectorsize, &align, bitmap_offset_out);
656 if (err && (*backing_file_out != NULL)) {
657 printk(KERN_ERR "Failed to read COW header from COW file "
658 "\"%s\", errno = %d\n", file, -err);
659 goto out_close;
660 }
661 if (err)
662 return fd;
663
664 asked_switch = path_requires_switch(*backing_file_out, backing_file,
665 file);
666
667 /* Allow switching only if no mismatch. */
668 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
669 mtime)) {
670 printk(KERN_ERR "Switching backing file to '%s'\n",
671 *backing_file_out);
672 err = write_cow_header(file, fd, *backing_file_out,
673 sectorsize, align, &size);
674 if (err) {
675 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
676 goto out_close;
677 }
678 } else {
679 *backing_file_out = backing_file;
680 err = backing_file_mismatch(*backing_file_out, size, mtime);
681 if (err)
682 goto out_close;
683 }
684
685 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
686 bitmap_len_out, data_offset_out);
687
688 return fd;
689 out_close:
690 os_close_file(fd);
691 return err;
692}
693
694static int create_cow_file(char *cow_file, char *backing_file,
695 struct openflags flags,
696 int sectorsize, int alignment, int *bitmap_offset_out,
697 unsigned long *bitmap_len_out, int *data_offset_out)
698{
699 int err, fd;
700
701 flags.c = 1;
702 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
703 if (fd < 0) {
704 err = fd;
705 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
706 cow_file, -err);
707 goto out;
708 }
709
710 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
711 bitmap_offset_out, bitmap_len_out,
712 data_offset_out);
713 if (!err)
714 return fd;
715 os_close_file(fd);
716 out:
717 return err;
718}
719
5f75a4f8 720static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 721{
7d314e34
PBG
722 os_close_file(ubd_dev->fd);
723 if(ubd_dev->cow.file == NULL)
1da177e4
LT
724 return;
725
7d314e34
PBG
726 os_close_file(ubd_dev->cow.fd);
727 vfree(ubd_dev->cow.bitmap);
728 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
729}
730
7d314e34 731static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
732{
733 struct openflags flags;
734 char **back_ptr;
735 int err, create_cow, *create_ptr;
0bf16bff 736 int fd;
1da177e4 737
7d314e34 738 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 739 create_cow = 0;
7d314e34
PBG
740 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
741 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
742
743 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
744 back_ptr, &ubd_dev->cow.bitmap_offset,
745 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 746 create_ptr);
1da177e4 747
0bf16bff
PBG
748 if((fd == -ENOENT) && create_cow){
749 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
a43c8316 750 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
7d314e34
PBG
751 &ubd_dev->cow.bitmap_offset,
752 &ubd_dev->cow.bitmap_len,
753 &ubd_dev->cow.data_offset);
0bf16bff 754 if(fd >= 0){
1da177e4 755 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 756 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
757 }
758 }
759
0bf16bff 760 if(fd < 0){
7d314e34 761 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
762 -fd);
763 return fd;
1da177e4 764 }
0bf16bff 765 ubd_dev->fd = fd;
1da177e4 766
7d314e34 767 if(ubd_dev->cow.file != NULL){
1da177e4 768 err = -ENOMEM;
da2486ba 769 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 770 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
771 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
772 goto error;
773 }
774 flush_tlb_kernel_vm();
775
7d314e34
PBG
776 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
777 ubd_dev->cow.bitmap_offset,
778 ubd_dev->cow.bitmap_len);
1da177e4
LT
779 if(err < 0)
780 goto error;
781
7d314e34 782 flags = ubd_dev->openflags;
1da177e4 783 flags.w = 0;
7d314e34 784 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 785 NULL, NULL, NULL, NULL);
1da177e4 786 if(err < 0) goto error;
7d314e34 787 ubd_dev->cow.fd = err;
1da177e4 788 }
dc764e50 789 return 0;
1da177e4 790 error:
7d314e34 791 os_close_file(ubd_dev->fd);
dc764e50 792 return err;
1da177e4
LT
793}
794
2e3f5251
JD
795static void ubd_device_release(struct device *dev)
796{
8691b97b 797 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251 798
4e6da0fe 799 blk_mq_free_tag_set(&ubd_dev->tag_set);
2e3f5251
JD
800 *ubd_dev = ((struct ubd) DEFAULT_UBD);
801}
802
ef3ba87c
CO
803static ssize_t serial_show(struct device *dev,
804 struct device_attribute *attr, char *buf)
805{
806 struct gendisk *disk = dev_to_disk(dev);
807 struct ubd *ubd_dev = disk->private_data;
808
809 if (!ubd_dev)
810 return 0;
811
812 return sprintf(buf, "%s", ubd_dev->serial);
813}
814
815static DEVICE_ATTR_RO(serial);
816
817static struct attribute *ubd_attrs[] = {
818 &dev_attr_serial.attr,
819 NULL,
820};
821
822static umode_t ubd_attrs_are_visible(struct kobject *kobj,
823 struct attribute *a, int n)
824{
825 return a->mode;
826}
827
828static const struct attribute_group ubd_attr_group = {
829 .attrs = ubd_attrs,
830 .is_visible = ubd_attrs_are_visible,
831};
832
833static const struct attribute_group *ubd_attr_groups[] = {
834 &ubd_attr_group,
835 NULL,
836};
837
a43c8316 838#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
1da177e4 839
4e6da0fe
RW
840static const struct blk_mq_ops ubd_mq_ops = {
841 .queue_rq = ubd_queue_rq,
4e6da0fe
RW
842};
843
f28169d2 844static int ubd_add(int n, char **error_out)
1da177e4 845{
7d314e34 846 struct ubd *ubd_dev = &ubd_devs[n];
5d6789ce
CH
847 struct queue_limits lim = {
848 .max_segments = MAX_SG,
849 .seg_boundary_mask = PAGE_SIZE - 1,
850 };
35efb594 851 struct gendisk *disk;
f28169d2 852 int err = 0;
1da177e4 853
7d314e34 854 if(ubd_dev->file == NULL)
ec7cf783 855 goto out;
1da177e4 856
58ebe3e7
CH
857 if (ubd_dev->cow.file)
858 lim.max_hw_sectors = 8 * sizeof(long);
859 if (!ubd_dev->no_trim) {
860 lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
861 lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
862 }
863
7d314e34 864 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
865 if(err < 0){
866 *error_out = "Couldn't determine size of device's file";
80c13749 867 goto out;
f28169d2 868 }
1da177e4 869
fb5d1d38
CH
870 err = ubd_open_dev(ubd_dev);
871 if (err) {
872 pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
873 'a' + n, ubd_dev->file, -err);
874 goto out;
875 }
876
7d314e34 877 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 878
4e6da0fe
RW
879 ubd_dev->tag_set.ops = &ubd_mq_ops;
880 ubd_dev->tag_set.queue_depth = 64;
881 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
882 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
4e6da0fe
RW
883 ubd_dev->tag_set.driver_data = ubd_dev;
884 ubd_dev->tag_set.nr_hw_queues = 1;
a0044bdf 885
4e6da0fe
RW
886 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
887 if (err)
fb5d1d38 888 goto out_close;
4e6da0fe 889
5d6789ce 890 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
35efb594
CH
891 if (IS_ERR(disk)) {
892 err = PTR_ERR(disk);
aea05eb5 893 goto out_cleanup_tags;
62f96cb0 894 }
4e6da0fe 895
b8b364d2 896 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
f3c17dcc 897 blk_queue_write_cache(disk->queue, true, false);
0267e9ca
CH
898 disk->major = UBD_MAJOR;
899 disk->first_minor = n << UBD_SHIFT;
900 disk->minors = 1 << UBD_SHIFT;
901 disk->fops = &ubd_blops;
902 set_capacity(disk, ubd_dev->size / 512);
903 sprintf(disk->disk_name, "ubd%c", 'a' + n);
904 disk->private_data = ubd_dev;
5e4e1ff8 905 set_disk_ro(disk, !ubd_dev->openflags.w);
0267e9ca
CH
906
907 ubd_dev->pdev.id = n;
908 ubd_dev->pdev.name = DRIVER_NAME;
909 ubd_dev->pdev.dev.release = ubd_device_release;
910 dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
911 platform_device_register(&ubd_dev->pdev);
912
913 err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
66638f16
LC
914 if (err)
915 goto out_cleanup_disk;
916
35efb594 917 return 0;
62f96cb0 918
66638f16 919out_cleanup_disk:
8b9ab626 920 put_disk(disk);
4e6da0fe
RW
921out_cleanup_tags:
922 blk_mq_free_tag_set(&ubd_dev->tag_set);
fb5d1d38
CH
923out_close:
924 ubd_close_dev(ubd_dev);
35efb594
CH
925out:
926 return err;
1da177e4
LT
927}
928
f28169d2 929static int ubd_config(char *str, char **error_out)
1da177e4 930{
e7f6552f 931 int n, ret;
1da177e4 932
f28169d2
JD
933 /* This string is possibly broken up and stored, so it's only
934 * freed if ubd_setup_common fails, or if only general options
935 * were set.
936 */
970d6e3a 937 str = kstrdup(str, GFP_KERNEL);
e7f6552f 938 if (str == NULL) {
f28169d2
JD
939 *error_out = "Failed to allocate memory";
940 return -ENOMEM;
1da177e4 941 }
f28169d2
JD
942
943 ret = ubd_setup_common(str, &n, error_out);
944 if (ret)
e7f6552f 945 goto err_free;
f28169d2 946
e7f6552f
PBG
947 if (n == -1) {
948 ret = 0;
d8d7c28e 949 goto err_free;
1da177e4 950 }
1da177e4 951
dc764e50 952 mutex_lock(&ubd_lock);
f28169d2 953 ret = ubd_add(n, error_out);
e7f6552f 954 if (ret)
7d314e34 955 ubd_devs[n].file = NULL;
dc764e50 956 mutex_unlock(&ubd_lock);
1da177e4 957
e7f6552f 958out:
dc764e50 959 return ret;
e7f6552f
PBG
960
961err_free:
962 kfree(str);
963 goto out;
1da177e4
LT
964}
965
966static int ubd_get_config(char *name, char *str, int size, char **error_out)
967{
7d314e34 968 struct ubd *ubd_dev;
1da177e4
LT
969 int n, len = 0;
970
971 n = parse_unit(&name);
972 if((n >= MAX_DEV) || (n < 0)){
973 *error_out = "ubd_get_config : device number out of range";
dc764e50 974 return -1;
1da177e4
LT
975 }
976
7d314e34 977 ubd_dev = &ubd_devs[n];
d7fb2c38 978 mutex_lock(&ubd_lock);
1da177e4 979
7d314e34 980 if(ubd_dev->file == NULL){
1da177e4
LT
981 CONFIG_CHUNK(str, size, len, "", 1);
982 goto out;
983 }
984
7d314e34 985 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 986
7d314e34 987 if(ubd_dev->cow.file != NULL){
1da177e4 988 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 989 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
990 }
991 else CONFIG_CHUNK(str, size, len, "", 1);
992
993 out:
d7fb2c38 994 mutex_unlock(&ubd_lock);
dc764e50 995 return len;
1da177e4
LT
996}
997
29d56cfe
JD
998static int ubd_id(char **str, int *start_out, int *end_out)
999{
dc764e50 1000 int n;
29d56cfe
JD
1001
1002 n = parse_unit(str);
dc764e50
JD
1003 *start_out = 0;
1004 *end_out = MAX_DEV - 1;
1005 return n;
29d56cfe
JD
1006}
1007
f28169d2 1008static int ubd_remove(int n, char **error_out)
1da177e4 1009{
7d314e34 1010 struct ubd *ubd_dev;
29d56cfe 1011 int err = -ENODEV;
1da177e4 1012
d7fb2c38 1013 mutex_lock(&ubd_lock);
1da177e4 1014
7d314e34 1015 ubd_dev = &ubd_devs[n];
1da177e4 1016
7d314e34 1017 if(ubd_dev->file == NULL)
29d56cfe 1018 goto out;
1da177e4 1019
32621ad7 1020 if (ubd_dev->disk) {
fb5d1d38
CH
1021 /* you cannot remove a open disk */
1022 err = -EBUSY;
1023 if (disk_openers(ubd_dev->disk))
1024 goto out;
1025
32621ad7 1026 del_gendisk(ubd_dev->disk);
fb5d1d38 1027 ubd_close_dev(ubd_dev);
32621ad7 1028 put_disk(ubd_dev->disk);
1da177e4
LT
1029 }
1030
1da177e4 1031 err = 0;
2e3f5251 1032 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1033out:
d7fb2c38 1034 mutex_unlock(&ubd_lock);
29d56cfe 1035 return err;
1da177e4
LT
1036}
1037
f28169d2 1038/* All these are called by mconsole in process context and without
b8831a1d 1039 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1040 */
1da177e4 1041static struct mc_device ubd_mc = {
84f48d4f 1042 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1043 .name = "ubd",
1044 .config = ubd_config,
dc764e50 1045 .get_config = ubd_get_config,
29d56cfe 1046 .id = ubd_id,
1da177e4
LT
1047 .remove = ubd_remove,
1048};
1049
d8d7c28e 1050static int __init ubd_mc_init(void)
1da177e4
LT
1051{
1052 mconsole_register_dev(&ubd_mc);
1053 return 0;
1054}
1055
1056__initcall(ubd_mc_init);
1057
d8d7c28e
PBG
1058static int __init ubd0_init(void)
1059{
1060 struct ubd *ubd_dev = &ubd_devs[0];
1061
b8831a1d 1062 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1063 if(ubd_dev->file == NULL)
1064 ubd_dev->file = "root_fs";
b8831a1d
JD
1065 mutex_unlock(&ubd_lock);
1066
dc764e50 1067 return 0;
d8d7c28e
PBG
1068}
1069
1070__initcall(ubd0_init);
1071
b8831a1d 1072/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1073static struct platform_driver ubd_driver = {
1074 .driver = {
1075 .name = DRIVER_NAME,
1076 },
1da177e4
LT
1077};
1078
d8d7c28e 1079static int __init ubd_init(void)
1da177e4 1080{
f28169d2
JD
1081 char *error;
1082 int i, err;
1da177e4 1083
792dd4fc 1084 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1085 return -1;
1086
6da2ec56
KC
1087 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1088 sizeof(struct io_thread_req *),
1089 GFP_KERNEL
f88f0bdf
AI
1090 );
1091 irq_remainder = 0;
1092
1093 if (irq_req_buffer == NULL) {
1094 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1095 return -1;
1096 }
6da2ec56
KC
1097 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1098 sizeof(struct io_thread_req *),
1099 GFP_KERNEL
f88f0bdf
AI
1100 );
1101
1102 io_remainder = 0;
1103
1104 if (io_req_buffer == NULL) {
1105 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1106 return -1;
1107 }
3ae5eaec 1108 platform_driver_register(&ubd_driver);
dc764e50 1109 mutex_lock(&ubd_lock);
f28169d2
JD
1110 for (i = 0; i < MAX_DEV; i++){
1111 err = ubd_add(i, &error);
1112 if(err)
1113 printk(KERN_ERR "Failed to initialize ubd device %d :"
1114 "%s\n", i, error);
1115 }
dc764e50 1116 mutex_unlock(&ubd_lock);
1da177e4
LT
1117 return 0;
1118}
1119
1120late_initcall(ubd_init);
1121
d8d7c28e 1122static int __init ubd_driver_init(void){
91acb21f
JD
1123 unsigned long stack;
1124 int err;
1125
1126 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1127 if(global_openflags.s){
1128 printk(KERN_INFO "ubd: Synchronous mode\n");
1129 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1130 * enough. So use anyway the io thread. */
1131 }
2fcb4090 1132 stack = alloc_stack(0, 0);
558f9b2f 1133 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
91acb21f 1134 if(io_pid < 0){
6c29256c 1135 printk(KERN_ERR
91acb21f
JD
1136 "ubd : Failed to start I/O thread (errno = %d) - "
1137 "falling back to synchronous I/O\n", -io_pid);
1138 io_pid = -1;
dc764e50 1139 return 0;
91acb21f 1140 }
6c29256c 1141 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1142 0, "ubd", ubd_devs);
36d46a59 1143 if(err < 0)
91acb21f 1144 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1145 return 0;
91acb21f
JD
1146}
1147
1148device_initcall(ubd_driver_init);
1149
91acb21f
JD
1150static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1151 __u64 *cow_offset, unsigned long *bitmap,
1152 __u64 bitmap_offset, unsigned long *bitmap_words,
1153 __u64 bitmap_len)
1da177e4 1154{
a43c8316 1155 __u64 sector = io_offset >> SECTOR_SHIFT;
91acb21f
JD
1156 int i, update_bitmap = 0;
1157
a43c8316 1158 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
91acb21f
JD
1159 if(cow_mask != NULL)
1160 ubd_set_bit(i, (unsigned char *) cow_mask);
1161 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1162 continue;
1da177e4 1163
91acb21f
JD
1164 update_bitmap = 1;
1165 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1166 }
1167
1168 if(!update_bitmap)
1169 return;
1da177e4 1170
91acb21f 1171 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1172
91acb21f
JD
1173 /* This takes care of the case where we're exactly at the end of the
1174 * device, and *cow_offset + 1 is off the end. So, just back it up
1175 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1176 * for the original diagnosis.
1177 */
6d074242
JO
1178 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1179 sizeof(unsigned long)) - 1))
91acb21f
JD
1180 (*cow_offset)--;
1181
1182 bitmap_words[0] = bitmap[*cow_offset];
1183 bitmap_words[1] = bitmap[*cow_offset + 1];
1184
1185 *cow_offset *= sizeof(unsigned long);
1186 *cow_offset += bitmap_offset;
1187}
1188
fc6b6a87
GKB
1189static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1190 unsigned long offset, unsigned long *bitmap,
91acb21f
JD
1191 __u64 bitmap_offset, __u64 bitmap_len)
1192{
fc6b6a87 1193 __u64 sector = offset >> SECTOR_SHIFT;
91acb21f
JD
1194 int i;
1195
fc6b6a87 1196 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
91acb21f
JD
1197 panic("Operation too long");
1198
a43c8316 1199 if (req_op(req->req) == REQ_OP_READ) {
fc6b6a87 1200 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
91acb21f 1201 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1202 ubd_set_bit(i, (unsigned char *)
fc6b6a87
GKB
1203 &segment->sector_mask);
1204 }
1205 } else {
1206 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1207 &segment->cow_offset, bitmap, bitmap_offset,
1208 segment->bitmap_words, bitmap_len);
1209 }
1210}
1211
1212static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1213 struct request *req)
1214{
1215 struct bio_vec bvec;
1216 struct req_iterator iter;
1217 int i = 0;
1218 unsigned long byte_offset = io_req->offset;
7ee1de6e 1219 enum req_op op = req_op(req);
fc6b6a87
GKB
1220
1221 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1222 io_req->io_desc[0].buffer = NULL;
1223 io_req->io_desc[0].length = blk_rq_bytes(req);
1224 } else {
1225 rq_for_each_segment(bvec, req, iter) {
1226 BUG_ON(i >= io_req->desc_cnt);
1227
25d84545 1228 io_req->io_desc[i].buffer = bvec_virt(&bvec);
fc6b6a87
GKB
1229 io_req->io_desc[i].length = bvec.bv_len;
1230 i++;
1231 }
1232 }
1233
1234 if (dev->cow.file) {
1235 for (i = 0; i < io_req->desc_cnt; i++) {
1236 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1237 dev->cow.bitmap, dev->cow.bitmap_offset,
1238 dev->cow.bitmap_len);
1239 byte_offset += io_req->io_desc[i].length;
dc764e50 1240 }
fc6b6a87 1241
91acb21f 1242 }
1da177e4
LT
1243}
1244
fc6b6a87
GKB
1245static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1246 int desc_cnt)
1da177e4 1247{
ecb0a83e 1248 struct io_thread_req *io_req;
fc6b6a87 1249 int i;
1da177e4 1250
fc6b6a87
GKB
1251 io_req = kmalloc(sizeof(*io_req) +
1252 (desc_cnt * sizeof(struct io_desc)),
1253 GFP_ATOMIC);
ecb0a83e 1254 if (!io_req)
fc6b6a87 1255 return NULL;
805f11a0
RW
1256
1257 io_req->req = req;
ecb0a83e
CH
1258 if (dev->cow.file)
1259 io_req->fds[0] = dev->cow.fd;
1260 else
1261 io_req->fds[0] = dev->fd;
0033dfd9 1262 io_req->error = 0;
53766def
AI
1263 io_req->sectorsize = SECTOR_SIZE;
1264 io_req->fds[1] = dev->fd;
fc6b6a87 1265 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
53766def
AI
1266 io_req->offsets[0] = 0;
1267 io_req->offsets[1] = dev->cow.data_offset;
1268
fc6b6a87
GKB
1269 for (i = 0 ; i < desc_cnt; i++) {
1270 io_req->io_desc[i].sector_mask = 0;
1271 io_req->io_desc[i].cow_offset = -1;
1272 }
1273
1274 return io_req;
1275}
1276
1277static int ubd_submit_request(struct ubd *dev, struct request *req)
1278{
1279 int segs = 0;
1280 struct io_thread_req *io_req;
1281 int ret;
7ee1de6e 1282 enum req_op op = req_op(req);
fc6b6a87
GKB
1283
1284 if (op == REQ_OP_FLUSH)
1285 segs = 0;
1286 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1287 segs = 1;
1288 else
1289 segs = blk_rq_nr_phys_segments(req);
1290
1291 io_req = ubd_alloc_req(dev, req, segs);
1292 if (!io_req)
1293 return -ENOMEM;
1294
1295 io_req->desc_cnt = segs;
1296 if (segs)
1297 ubd_map_req(dev, io_req, req);
53766def 1298
ecb0a83e
CH
1299 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1300 if (ret != sizeof(io_req)) {
1301 if (ret != -EAGAIN)
1302 pr_err("write to io thread failed: %d\n", -ret);
bc1d72e7 1303 kfree(io_req);
bc1d72e7 1304 }
ecb0a83e 1305 return ret;
bc1d72e7
RW
1306}
1307
4e6da0fe
RW
1308static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1309 const struct blk_mq_queue_data *bd)
1da177e4 1310{
6961cd4d 1311 struct ubd *ubd_dev = hctx->queue->queuedata;
4e6da0fe 1312 struct request *req = bd->rq;
53766def 1313 int ret = 0, res = BLK_STS_OK;
a0044bdf 1314
4e6da0fe
RW
1315 blk_mq_start_request(req);
1316
6961cd4d
JA
1317 spin_lock_irq(&ubd_dev->lock);
1318
53766def 1319 switch (req_op(req)) {
53766def 1320 case REQ_OP_FLUSH:
53766def
AI
1321 case REQ_OP_READ:
1322 case REQ_OP_WRITE:
50109b5a
AI
1323 case REQ_OP_DISCARD:
1324 case REQ_OP_WRITE_ZEROES:
fc6b6a87 1325 ret = ubd_submit_request(ubd_dev, req);
50109b5a 1326 break;
53766def
AI
1327 default:
1328 WARN_ON_ONCE(1);
1329 res = BLK_STS_NOTSUPP;
4e6da0fe 1330 }
53766def 1331
6961cd4d
JA
1332 spin_unlock_irq(&ubd_dev->lock);
1333
d848074b
AI
1334 if (ret < 0) {
1335 if (ret == -ENOMEM)
1336 res = BLK_STS_RESOURCE;
1337 else
1338 res = BLK_STS_DEV_RESOURCE;
1339 }
6961cd4d 1340
53766def 1341 return res;
1da177e4
LT
1342}
1343
a885c8c4
CH
1344static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1345{
7d314e34 1346 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1347
1348 geo->heads = 128;
1349 geo->sectors = 32;
7d314e34 1350 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1351 return 0;
1352}
1353
05bdb996 1354static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4
LT
1355 unsigned int cmd, unsigned long arg)
1356{
a625c998 1357 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1358 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1359
1360 switch (cmd) {
1da177e4 1361 struct cdrom_volctrl volume;
1da177e4 1362 case HDIO_GET_IDENTITY:
73855e13
BZ
1363 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1364 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1365 ubd_id[ATA_ID_HEADS] = 128;
1366 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1367 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1368 sizeof(ubd_id)))
dc764e50
JD
1369 return -EFAULT;
1370 return 0;
b8831a1d 1371
1da177e4
LT
1372 case CDROMVOLREAD:
1373 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1374 return -EFAULT;
1da177e4
LT
1375 volume.channel0 = 255;
1376 volume.channel1 = 255;
1377 volume.channel2 = 255;
1378 volume.channel3 = 255;
1379 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1380 return -EFAULT;
1381 return 0;
1da177e4 1382 }
dc764e50 1383 return -EINVAL;
1da177e4
LT
1384}
1385
a43c8316
AI
1386static int map_error(int error_code)
1387{
1388 switch (error_code) {
1389 case 0:
1390 return BLK_STS_OK;
1391 case ENOSYS:
1392 case EOPNOTSUPP:
1393 return BLK_STS_NOTSUPP;
1394 case ENOSPC:
1395 return BLK_STS_NOSPC;
1396 }
1397 return BLK_STS_IOERR;
1398}
1399
a41421ed
AI
1400/*
1401 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1402 *
1403 * The following functions are part of UML hypervisor code.
1404 * All functions from here onwards are executed as a helper
1405 * thread and are not allowed to execute any kernel functions.
1406 *
1407 * Any communication must occur strictly via shared memory and IPC.
1408 *
1409 * Do not add printks, locks, kernel memory operations, etc - it
1410 * will result in unpredictable behaviour and/or crashes.
1411 */
1412
fc6b6a87 1413static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1da177e4 1414{
91acb21f 1415 int n;
1da177e4 1416
fc6b6a87 1417 if (segment->cow_offset == -1)
a43c8316 1418 return map_error(0);
1da177e4 1419
fc6b6a87
GKB
1420 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1421 sizeof(segment->bitmap_words), segment->cow_offset);
1422 if (n != sizeof(segment->bitmap_words))
a43c8316 1423 return map_error(-n);
1da177e4 1424
a43c8316 1425 return map_error(0);
91acb21f 1426}
1da177e4 1427
fc6b6a87 1428static void do_io(struct io_thread_req *req, struct io_desc *desc)
91acb21f 1429{
50109b5a 1430 char *buf = NULL;
91acb21f
JD
1431 unsigned long len;
1432 int n, nsectors, start, end, bit;
91acb21f
JD
1433 __u64 off;
1434
50109b5a
AI
1435 /* FLUSH is really a special case, we cannot "case" it with others */
1436
a43c8316 1437 if (req_op(req->req) == REQ_OP_FLUSH) {
805f11a0 1438 /* fds[0] is always either the rw image or our cow file */
a41421ed 1439 req->error = map_error(-os_sync_file(req->fds[0]));
805f11a0
RW
1440 return;
1441 }
1442
fc6b6a87 1443 nsectors = desc->length / req->sectorsize;
91acb21f
JD
1444 start = 0;
1445 do {
fc6b6a87 1446 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
91acb21f
JD
1447 end = start;
1448 while((end < nsectors) &&
fc6b6a87 1449 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
91acb21f
JD
1450 end++;
1451
1452 off = req->offset + req->offsets[bit] +
1453 start * req->sectorsize;
1454 len = (end - start) * req->sectorsize;
fc6b6a87
GKB
1455 if (desc->buffer != NULL)
1456 buf = &desc->buffer[start * req->sectorsize];
91acb21f 1457
50109b5a
AI
1458 switch (req_op(req->req)) {
1459 case REQ_OP_READ:
91acb21f
JD
1460 n = 0;
1461 do {
1462 buf = &buf[n];
1463 len -= n;
8c6157b6 1464 n = os_pread_file(req->fds[bit], buf, len, off);
50109b5a 1465 if (n < 0) {
a43c8316 1466 req->error = map_error(-n);
91acb21f
JD
1467 return;
1468 }
1469 } while((n < len) && (n != 0));
1470 if (n < len) memset(&buf[n], 0, len - n);
50109b5a
AI
1471 break;
1472 case REQ_OP_WRITE:
8c6157b6 1473 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f 1474 if(n != len){
a43c8316 1475 req->error = map_error(-n);
91acb21f
JD
1476 return;
1477 }
50109b5a
AI
1478 break;
1479 case REQ_OP_DISCARD:
50109b5a
AI
1480 n = os_falloc_punch(req->fds[bit], off, len);
1481 if (n) {
1482 req->error = map_error(-n);
1483 return;
1484 }
1485 break;
d2a0a616
FD
1486 case REQ_OP_WRITE_ZEROES:
1487 n = os_falloc_zeroes(req->fds[bit], off, len);
1488 if (n) {
1489 req->error = map_error(-n);
1490 return;
1491 }
1492 break;
50109b5a
AI
1493 default:
1494 WARN_ON_ONCE(1);
1495 req->error = BLK_STS_NOTSUPP;
1496 return;
91acb21f
JD
1497 }
1498
1499 start = end;
1500 } while(start < nsectors);
1da177e4 1501
fc6b6a87
GKB
1502 req->offset += len;
1503 req->error = update_bitmap(req, desc);
1da177e4 1504}
91acb21f
JD
1505
1506/* Changed in start_io_thread, which is serialized by being called only
1507 * from ubd_init, which is an initcall.
1508 */
1509int kernel_fd = -1;
1510
d8d7c28e 1511/* Only changed by the io thread. XXX: currently unused. */
4dc5a328 1512static int io_count;
91acb21f
JD
1513
1514int io_thread(void *arg)
1515{
f88f0bdf 1516 int n, count, written, res;
91acb21f 1517
91d44ff8
RW
1518 os_fix_helper_signals();
1519
91acb21f 1520 while(1){
f88f0bdf
AI
1521 n = bulk_req_safe_read(
1522 kernel_fd,
1523 io_req_buffer,
1524 &io_remainder,
1525 &io_remainder_size,
1526 UBD_REQ_BUFFER_SIZE
1527 );
e355b2f5
GKB
1528 if (n <= 0) {
1529 if (n == -EAGAIN)
f88f0bdf 1530 ubd_read_poll(-1);
e355b2f5
GKB
1531
1532 continue;
91acb21f 1533 }
f88f0bdf
AI
1534
1535 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
fc6b6a87
GKB
1536 struct io_thread_req *req = (*io_req_buffer)[count];
1537 int i;
1538
f88f0bdf 1539 io_count++;
fc6b6a87
GKB
1540 for (i = 0; !req->error && i < req->desc_cnt; i++)
1541 do_io(req, &(req->io_desc[i]));
1542
f88f0bdf
AI
1543 }
1544
1545 written = 0;
1546
1547 do {
6e682d53
GKB
1548 res = os_write_file(kernel_fd,
1549 ((char *) io_req_buffer) + written,
1550 n - written);
ff6a1798 1551 if (res >= 0) {
f88f0bdf 1552 written += res;
f88f0bdf
AI
1553 }
1554 if (written < n) {
1555 ubd_write_poll(-1);
1556 }
1557 } while (written < n);
91acb21f 1558 }
91acb21f 1559
1b57e9c2
JD
1560 return 0;
1561}