Merge tag 'sched_ext-for-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
[linux-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
dbddf429 1// SPDX-License-Identifier: GPL-2.0
6c29256c 2/*
a41421ed 3 * Copyright (C) 2018 Cambridge Greys Ltd
f88f0bdf 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
1da177e4 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
1da177e4
LT
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
1da177e4
LT
22#define UBD_SHIFT 4
23
8ea3c06a
AV
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
4e6da0fe 27#include <linux/blk-mq.h>
8ea3c06a
AV
28#include <linux/ata.h>
29#include <linux/hdreg.h>
b81e0c23 30#include <linux/major.h>
8ea3c06a
AV
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
37185b33 39#include <kern_util.h>
1da177e4 40#include "mconsole_kern.h"
37185b33
AV
41#include <init.h>
42#include <irq_kern.h>
8ea3c06a 43#include "ubd.h"
37185b33 44#include <os.h>
1da177e4
LT
45#include "cow.h"
46
a43c8316
AI
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
1da177e4 49
fc6b6a87
GKB
50struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
1da177e4 58struct io_thread_req {
62f96cb0 59 struct request *req;
1da177e4
LT
60 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
1da177e4 63 int sectorsize;
1da177e4 64 int error;
fc6b6a87
GKB
65
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
1da177e4
LT
69};
70
f88f0bdf
AI
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
91acb21f 82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
83{
84 __u64 n;
85 int bits, off;
86
91acb21f 87 bits = sizeof(data[0]) * 8;
1da177e4
LT
88 n = bit / bits;
89 off = bit % bits;
dc764e50 90 return (data[n] & (1 << off)) != 0;
1da177e4
LT
91}
92
91acb21f 93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
94{
95 __u64 n;
96 int bits, off;
97
91acb21f 98 bits = sizeof(data[0]) * 8;
1da177e4
LT
99 n = bit / bits;
100 off = bit % bits;
91acb21f 101 data[n] |= (1 << off);
1da177e4
LT
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
d7fb2c38 107static DEFINE_MUTEX(ubd_lock);
1da177e4 108
05bdb996 109static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4 110 unsigned int cmd, unsigned long arg);
a885c8c4 111static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 112
97d88ac8 113#define MAX_DEV (16)
1da177e4 114
83d5cde4 115static const struct block_device_operations ubd_blops = {
1da177e4 116 .owner = THIS_MODULE,
a625c998 117 .ioctl = ubd_ioctl,
ab0cf1e4 118 .compat_ioctl = blkdev_compat_ptr_ioctl,
a885c8c4 119 .getgeo = ubd_getgeo,
1da177e4
LT
120};
121
1da177e4
LT
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
1da177e4
LT
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
2a9d32f6 132 /* backing file name */
1da177e4 133 char *file;
2a9d32f6 134 /* backing file fd */
1da177e4
LT
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
dc764e50 139 int data_offset;
1da177e4
LT
140};
141
a0044bdf
JD
142#define MAX_SG 64
143
1da177e4 144struct ubd {
2a9d32f6
PBG
145 /* name (and fd, below) of the file opened for writing, either the
146 * backing or the cow file. */
1da177e4 147 char *file;
ef3ba87c 148 char *serial;
1da177e4
LT
149 int fd;
150 __u64 size;
151 struct openflags boot_openflags;
152 struct openflags openflags;
84e945e3
PBG
153 unsigned shared:1;
154 unsigned no_cow:1;
50109b5a 155 unsigned no_trim:1;
1da177e4
LT
156 struct cow cow;
157 struct platform_device pdev;
32621ad7 158 struct gendisk *disk;
4e6da0fe 159 struct blk_mq_tag_set tag_set;
62f96cb0 160 spinlock_t lock;
4e6da0fe
RW
161};
162
1da177e4
LT
163#define DEFAULT_COW { \
164 .file = NULL, \
dc764e50
JD
165 .fd = -1, \
166 .bitmap = NULL, \
1da177e4 167 .bitmap_offset = 0, \
dc764e50 168 .data_offset = 0, \
1da177e4
LT
169}
170
171#define DEFAULT_UBD { \
172 .file = NULL, \
ef3ba87c 173 .serial = NULL, \
1da177e4
LT
174 .fd = -1, \
175 .size = -1, \
176 .boot_openflags = OPEN_FLAGS, \
177 .openflags = OPEN_FLAGS, \
dc764e50 178 .no_cow = 0, \
50109b5a 179 .no_trim = 0, \
6c29256c 180 .shared = 0, \
dc764e50 181 .cow = DEFAULT_COW, \
22e65004 182 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
1da177e4
LT
183}
184
b8831a1d 185/* Protected by ubd_lock */
5dc62b1b 186static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 187
4e6da0fe
RW
188static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
189 const struct blk_mq_queue_data *bd);
4e6da0fe 190
1da177e4
LT
191static int fake_ide_setup(char *str)
192{
7eb90f7e 193 pr_warn("The fake_ide option has been removed\n");
dc764e50 194 return 1;
1da177e4 195}
1da177e4
LT
196__setup("fake_ide", fake_ide_setup);
197
198__uml_help(fake_ide_setup,
199"fake_ide\n"
7eb90f7e 200" Obsolete stub.\n\n"
1da177e4
LT
201);
202
203static int parse_unit(char **ptr)
204{
205 char *str = *ptr, *end;
206 int n = -1;
207
208 if(isdigit(*str)) {
209 n = simple_strtoul(str, &end, 0);
210 if(end == str)
dc764e50 211 return -1;
1da177e4
LT
212 *ptr = end;
213 }
97d88ac8 214 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
215 n = *str - 'a';
216 str++;
217 *ptr = str;
218 }
dc764e50 219 return n;
1da177e4
LT
220}
221
d8d7c28e
PBG
222/* If *index_out == -1 at exit, the passed option was a general one;
223 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
224 * should not be freed on exit.
225 */
f28169d2 226static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 227{
7d314e34 228 struct ubd *ubd_dev;
1da177e4 229 struct openflags flags = global_openflags;
ef3ba87c 230 char *file, *backing_file, *serial;
b8831a1d 231 int n, err = 0, i;
1da177e4
LT
232
233 if(index_out) *index_out = -1;
234 n = *str;
235 if(n == '='){
1da177e4 236 str++;
1da177e4
LT
237 if(!strcmp(str, "sync")){
238 global_openflags = of_sync(global_openflags);
9ca55299 239 return err;
1da177e4 240 }
b8831a1d 241
7eb90f7e
CH
242 pr_warn("fake major not supported any more\n");
243 return 0;
1da177e4
LT
244 }
245
246 n = parse_unit(&str);
247 if(n < 0){
f28169d2
JD
248 *error_out = "Couldn't parse device number";
249 return -EINVAL;
1da177e4
LT
250 }
251 if(n >= MAX_DEV){
f28169d2
JD
252 *error_out = "Device number out of range";
253 return 1;
1da177e4
LT
254 }
255
f28169d2 256 err = -EBUSY;
d7fb2c38 257 mutex_lock(&ubd_lock);
1da177e4 258
7d314e34
PBG
259 ubd_dev = &ubd_devs[n];
260 if(ubd_dev->file != NULL){
f28169d2 261 *error_out = "Device is already configured";
1da177e4
LT
262 goto out;
263 }
264
265 if (index_out)
266 *index_out = n;
267
f28169d2 268 err = -EINVAL;
50109b5a 269 for (i = 0; i < sizeof("rscdt="); i++) {
1da177e4
LT
270 switch (*str) {
271 case 'r':
272 flags.w = 0;
273 break;
274 case 's':
275 flags.s = 1;
276 break;
277 case 'd':
7d314e34 278 ubd_dev->no_cow = 1;
1da177e4 279 break;
6c29256c 280 case 'c':
7d314e34 281 ubd_dev->shared = 1;
6c29256c 282 break;
50109b5a
AI
283 case 't':
284 ubd_dev->no_trim = 1;
285 break;
1da177e4
LT
286 case '=':
287 str++;
288 goto break_loop;
289 default:
f28169d2 290 *error_out = "Expected '=' or flag letter "
50109b5a 291 "(r, s, c, t or d)";
1da177e4
LT
292 goto out;
293 }
294 str++;
295 }
296
f28169d2
JD
297 if (*str == '=')
298 *error_out = "Too many flags specified";
299 else
300 *error_out = "Missing '='";
1da177e4
LT
301 goto out;
302
303break_loop:
ef3ba87c
CO
304 file = strsep(&str, ",:");
305 if (*file == '\0')
306 file = NULL;
1da177e4 307
ef3ba87c 308 backing_file = strsep(&str, ",:");
94c41b3a 309 if (backing_file && *backing_file == '\0')
ef3ba87c 310 backing_file = NULL;
1da177e4 311
ef3ba87c 312 serial = strsep(&str, ",:");
94c41b3a 313 if (serial && *serial == '\0')
ef3ba87c
CO
314 serial = NULL;
315
316 if (backing_file && ubd_dev->no_cow) {
317 *error_out = "Can't specify both 'd' and a cow file";
318 goto out;
1da177e4 319 }
ef3ba87c 320
f28169d2 321 err = 0;
ef3ba87c 322 ubd_dev->file = file;
7d314e34 323 ubd_dev->cow.file = backing_file;
ef3ba87c 324 ubd_dev->serial = serial;
7d314e34 325 ubd_dev->boot_openflags = flags;
1da177e4 326out:
d7fb2c38 327 mutex_unlock(&ubd_lock);
f28169d2 328 return err;
1da177e4
LT
329}
330
331static int ubd_setup(char *str)
332{
f28169d2
JD
333 char *error;
334 int err;
335
336 err = ubd_setup_common(str, NULL, &error);
337 if(err)
338 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
339 "%s\n", str, error);
340 return 1;
1da177e4
LT
341}
342
343__setup("ubd", ubd_setup);
344__uml_help(ubd_setup,
ef3ba87c 345"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
1da177e4
LT
346" This is used to associate a device with a file in the underlying\n"
347" filesystem. When specifying two filenames, the first one is the\n"
348" COW name and the second is the backing file name. As separator you can\n"
349" use either a ':' or a ',': the first one allows writing things like;\n"
350" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
351" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 352" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
353" a COW file or a backing file. To override this detection, add the 'd'\n"
354" flag:\n"
355" ubd0d=BackingFile\n"
356" Usually, there is a filesystem in the file, but \n"
357" that's not required. Swap devices containing swap files can be\n"
358" specified like this. Also, a file which doesn't contain a\n"
359" filesystem can have its contents read in the virtual \n"
360" machine by running 'dd' on the device. <n> must be in the range\n"
361" 0 to 7. Appending an 'r' to the number will cause that device\n"
362" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
363" an 's' will cause data to be written to disk on the host immediately.\n"
364" 'c' will cause the device to be treated as being shared between multiple\n"
365" UMLs and file locking will be turned off - this is appropriate for a\n"
366" cluster filesystem and inappropriate at almost all other times.\n\n"
50109b5a 367" 't' will disable trim/discard support on the device (enabled by default).\n\n"
ef3ba87c
CO
368" An optional device serial number can be exposed using the serial parameter\n"
369" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
370" useful when a unique number should be given to the device. Note when\n"
371" specifying a label, the filename2 must be also presented. It can be\n"
372" an empty string, in which case the backing file is not used:\n"
373" ubd0=File,,Serial\n"
1da177e4
LT
374);
375
8299ca5c 376static int udb_setup(char *str)
1da177e4
LT
377{
378 printk("udb%s specified on command line is almost certainly a ubd -> "
379 "udb TYPO\n", str);
dc764e50 380 return 1;
1da177e4
LT
381}
382
383__setup("udb", udb_setup);
384__uml_help(udb_setup,
385"udb\n"
0894e27e
JD
386" This option is here solely to catch ubd -> udb typos, which can be\n"
387" to impossible to catch visually unless you specifically look for\n"
388" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
389" in the boot output.\n\n"
390);
391
91acb21f 392/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 393static int thread_fd = -1;
a0044bdf 394
f88f0bdf
AI
395/* Function to read several request pointers at a time
396* handling fractional reads if (and as) needed
397*/
398
399static int bulk_req_safe_read(
400 int fd,
401 struct io_thread_req * (*request_buffer)[],
402 struct io_thread_req **remainder,
403 int *remainder_size,
404 int max_recs
405 )
406{
407 int n = 0;
408 int res = 0;
409
410 if (*remainder_size > 0) {
411 memmove(
412 (char *) request_buffer,
413 (char *) remainder, *remainder_size
414 );
415 n = *remainder_size;
416 }
417
418 res = os_read_file(
419 fd,
420 ((char *) request_buffer) + *remainder_size,
421 sizeof(struct io_thread_req *)*max_recs
422 - *remainder_size
423 );
424 if (res > 0) {
425 n += res;
426 if ((n % sizeof(struct io_thread_req *)) > 0) {
427 /*
428 * Read somehow returned not a multiple of dword
429 * theoretically possible, but never observed in the
430 * wild, so read routine must be able to handle it
431 */
432 *remainder_size = n % sizeof(struct io_thread_req *);
433 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
434 memmove(
435 remainder,
436 ((char *) request_buffer) +
437 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
438 *remainder_size
439 );
440 n = n - *remainder_size;
441 }
442 } else {
443 n = res;
444 }
445 return n;
446}
447
5db755fb 448static void ubd_end_request(struct io_thread_req *io_req)
1da177e4 449{
31ade7d4
CH
450 if (io_req->error == BLK_STS_NOTSUPP) {
451 if (req_op(io_req->req) == REQ_OP_DISCARD)
73e3715e 452 blk_queue_disable_discard(io_req->req->q);
31ade7d4 453 else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
73e3715e 454 blk_queue_disable_write_zeroes(io_req->req->q);
a0044bdf 455 }
5db755fb
CH
456 blk_mq_end_request(io_req->req, io_req->error);
457 kfree(io_req);
1da177e4
LT
458}
459
7bea96fd 460static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 461{
5db755fb
CH
462 int len, i;
463
464 while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
465 &irq_remainder, &irq_remainder_size,
466 UBD_REQ_BUFFER_SIZE)) >= 0) {
467 for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
468 ubd_end_request((*irq_req_buffer)[i]);
469 }
470
471 if (len < 0 && len != -EAGAIN)
472 pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
dc764e50 473 return IRQ_HANDLED;
91acb21f 474}
09ace81c 475
91acb21f
JD
476/* Only changed by ubd_init, which is an initcall. */
477static int io_pid = -1;
09ace81c 478
5dc62b1b 479static void kill_io_thread(void)
91acb21f 480{
6c29256c 481 if(io_pid != -1)
91acb21f 482 os_kill_process(io_pid, 1);
09ace81c 483}
1da177e4 484
91acb21f
JD
485__uml_exitcall(kill_io_thread);
486
d8d7c28e 487static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
488{
489 char *file;
85356398
RW
490 int fd;
491 int err;
492
493 __u32 version;
494 __u32 align;
495 char *backing_file;
853bc0ab 496 time64_t mtime;
85356398
RW
497 unsigned long long size;
498 int sector_size;
499 int bitmap_offset;
500
501 if (ubd_dev->file && ubd_dev->cow.file) {
502 file = ubd_dev->cow.file;
503
504 goto out;
505 }
506
d4afcba9 507 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
508 if (fd < 0)
509 return fd;
510
511 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
512 &mtime, &size, &sector_size, &align, &bitmap_offset);
513 os_close_file(fd);
1da177e4 514
85356398
RW
515 if(err == -EINVAL)
516 file = ubd_dev->file;
517 else
518 file = backing_file;
519
520out:
dc764e50 521 return os_file_size(file, size_out);
1da177e4
LT
522}
523
5dc62b1b
WC
524static int read_cow_bitmap(int fd, void *buf, int offset, int len)
525{
526 int err;
527
8c6157b6 528 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
529 if (err < 0)
530 return err;
531
532 return 0;
533}
534
853bc0ab 535static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
5dc62b1b 536{
853bc0ab 537 time64_t modtime;
5dc62b1b
WC
538 unsigned long long actual;
539 int err;
540
541 err = os_file_modtime(file, &modtime);
542 if (err < 0) {
543 printk(KERN_ERR "Failed to get modification time of backing "
544 "file \"%s\", err = %d\n", file, -err);
545 return err;
546 }
547
548 err = os_file_size(file, &actual);
549 if (err < 0) {
550 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
551 "err = %d\n", file, -err);
552 return err;
553 }
554
555 if (actual != size) {
556 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
557 * the typecast.*/
558 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
559 "vs backing file\n", (unsigned long long) size, actual);
560 return -EINVAL;
561 }
562 if (modtime != mtime) {
853bc0ab 563 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
5dc62b1b
WC
564 "backing file\n", mtime, modtime);
565 return -EINVAL;
566 }
567 return 0;
568}
569
570static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
571{
572 struct uml_stat buf1, buf2;
573 int err;
574
575 if (from_cmdline == NULL)
576 return 0;
577 if (!strcmp(from_cmdline, from_cow))
578 return 0;
579
580 err = os_stat_file(from_cmdline, &buf1);
581 if (err < 0) {
582 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
583 -err);
584 return 0;
585 }
586 err = os_stat_file(from_cow, &buf2);
587 if (err < 0) {
588 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
589 -err);
590 return 1;
591 }
592 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
593 return 0;
594
595 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
596 "\"%s\" specified in COW header of \"%s\"\n",
597 from_cmdline, from_cow, cow);
598 return 1;
599}
600
601static int open_ubd_file(char *file, struct openflags *openflags, int shared,
602 char **backing_file_out, int *bitmap_offset_out,
603 unsigned long *bitmap_len_out, int *data_offset_out,
604 int *create_cow_out)
605{
853bc0ab 606 time64_t mtime;
5dc62b1b
WC
607 unsigned long long size;
608 __u32 version, align;
609 char *backing_file;
610 int fd, err, sectorsize, asked_switch, mode = 0644;
611
612 fd = os_open_file(file, *openflags, mode);
613 if (fd < 0) {
614 if ((fd == -ENOENT) && (create_cow_out != NULL))
615 *create_cow_out = 1;
616 if (!openflags->w ||
617 ((fd != -EROFS) && (fd != -EACCES)))
618 return fd;
619 openflags->w = 0;
620 fd = os_open_file(file, *openflags, mode);
621 if (fd < 0)
622 return fd;
623 }
624
625 if (shared)
626 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
627 else {
628 err = os_lock_file(fd, openflags->w);
629 if (err < 0) {
630 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
631 file, -err);
632 goto out_close;
633 }
634 }
635
636 /* Successful return case! */
637 if (backing_file_out == NULL)
638 return fd;
639
640 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
641 &size, &sectorsize, &align, bitmap_offset_out);
642 if (err && (*backing_file_out != NULL)) {
643 printk(KERN_ERR "Failed to read COW header from COW file "
644 "\"%s\", errno = %d\n", file, -err);
645 goto out_close;
646 }
647 if (err)
648 return fd;
649
650 asked_switch = path_requires_switch(*backing_file_out, backing_file,
651 file);
652
653 /* Allow switching only if no mismatch. */
654 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
655 mtime)) {
656 printk(KERN_ERR "Switching backing file to '%s'\n",
657 *backing_file_out);
658 err = write_cow_header(file, fd, *backing_file_out,
659 sectorsize, align, &size);
660 if (err) {
661 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
662 goto out_close;
663 }
664 } else {
665 *backing_file_out = backing_file;
666 err = backing_file_mismatch(*backing_file_out, size, mtime);
667 if (err)
668 goto out_close;
669 }
670
671 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
672 bitmap_len_out, data_offset_out);
673
674 return fd;
675 out_close:
676 os_close_file(fd);
677 return err;
678}
679
680static int create_cow_file(char *cow_file, char *backing_file,
681 struct openflags flags,
682 int sectorsize, int alignment, int *bitmap_offset_out,
683 unsigned long *bitmap_len_out, int *data_offset_out)
684{
685 int err, fd;
686
687 flags.c = 1;
688 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
689 if (fd < 0) {
690 err = fd;
691 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
692 cow_file, -err);
693 goto out;
694 }
695
696 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
697 bitmap_offset_out, bitmap_len_out,
698 data_offset_out);
699 if (!err)
700 return fd;
701 os_close_file(fd);
702 out:
703 return err;
704}
705
5f75a4f8 706static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 707{
7d314e34
PBG
708 os_close_file(ubd_dev->fd);
709 if(ubd_dev->cow.file == NULL)
1da177e4
LT
710 return;
711
7d314e34
PBG
712 os_close_file(ubd_dev->cow.fd);
713 vfree(ubd_dev->cow.bitmap);
714 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
715}
716
7d314e34 717static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
718{
719 struct openflags flags;
720 char **back_ptr;
721 int err, create_cow, *create_ptr;
0bf16bff 722 int fd;
1da177e4 723
7d314e34 724 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 725 create_cow = 0;
7d314e34
PBG
726 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
727 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
728
729 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
730 back_ptr, &ubd_dev->cow.bitmap_offset,
731 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 732 create_ptr);
1da177e4 733
0bf16bff
PBG
734 if((fd == -ENOENT) && create_cow){
735 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
a43c8316 736 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
7d314e34
PBG
737 &ubd_dev->cow.bitmap_offset,
738 &ubd_dev->cow.bitmap_len,
739 &ubd_dev->cow.data_offset);
0bf16bff 740 if(fd >= 0){
1da177e4 741 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 742 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
743 }
744 }
745
0bf16bff 746 if(fd < 0){
7d314e34 747 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
748 -fd);
749 return fd;
1da177e4 750 }
0bf16bff 751 ubd_dev->fd = fd;
1da177e4 752
7d314e34 753 if(ubd_dev->cow.file != NULL){
1da177e4 754 err = -ENOMEM;
da2486ba 755 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 756 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
757 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
758 goto error;
759 }
1da177e4 760
7d314e34
PBG
761 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
762 ubd_dev->cow.bitmap_offset,
763 ubd_dev->cow.bitmap_len);
1da177e4
LT
764 if(err < 0)
765 goto error;
766
7d314e34 767 flags = ubd_dev->openflags;
1da177e4 768 flags.w = 0;
7d314e34 769 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 770 NULL, NULL, NULL, NULL);
1da177e4 771 if(err < 0) goto error;
7d314e34 772 ubd_dev->cow.fd = err;
1da177e4 773 }
dc764e50 774 return 0;
1da177e4 775 error:
7d314e34 776 os_close_file(ubd_dev->fd);
dc764e50 777 return err;
1da177e4
LT
778}
779
2e3f5251
JD
780static void ubd_device_release(struct device *dev)
781{
8691b97b 782 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251 783
4e6da0fe 784 blk_mq_free_tag_set(&ubd_dev->tag_set);
2e3f5251
JD
785 *ubd_dev = ((struct ubd) DEFAULT_UBD);
786}
787
ef3ba87c
CO
788static ssize_t serial_show(struct device *dev,
789 struct device_attribute *attr, char *buf)
790{
791 struct gendisk *disk = dev_to_disk(dev);
792 struct ubd *ubd_dev = disk->private_data;
793
794 if (!ubd_dev)
795 return 0;
796
797 return sprintf(buf, "%s", ubd_dev->serial);
798}
799
800static DEVICE_ATTR_RO(serial);
801
802static struct attribute *ubd_attrs[] = {
803 &dev_attr_serial.attr,
804 NULL,
805};
806
807static umode_t ubd_attrs_are_visible(struct kobject *kobj,
808 struct attribute *a, int n)
809{
810 return a->mode;
811}
812
813static const struct attribute_group ubd_attr_group = {
814 .attrs = ubd_attrs,
815 .is_visible = ubd_attrs_are_visible,
816};
817
818static const struct attribute_group *ubd_attr_groups[] = {
819 &ubd_attr_group,
820 NULL,
821};
822
a43c8316 823#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
1da177e4 824
4e6da0fe
RW
825static const struct blk_mq_ops ubd_mq_ops = {
826 .queue_rq = ubd_queue_rq,
4e6da0fe
RW
827};
828
f28169d2 829static int ubd_add(int n, char **error_out)
1da177e4 830{
7d314e34 831 struct ubd *ubd_dev = &ubd_devs[n];
5d6789ce
CH
832 struct queue_limits lim = {
833 .max_segments = MAX_SG,
834 .seg_boundary_mask = PAGE_SIZE - 1,
1122c0c1 835 .features = BLK_FEAT_WRITE_CACHE,
5d6789ce 836 };
35efb594 837 struct gendisk *disk;
f28169d2 838 int err = 0;
1da177e4 839
7d314e34 840 if(ubd_dev->file == NULL)
ec7cf783 841 goto out;
1da177e4 842
58ebe3e7
CH
843 if (ubd_dev->cow.file)
844 lim.max_hw_sectors = 8 * sizeof(long);
845 if (!ubd_dev->no_trim) {
846 lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
847 lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
848 }
849
7d314e34 850 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
851 if(err < 0){
852 *error_out = "Couldn't determine size of device's file";
80c13749 853 goto out;
f28169d2 854 }
1da177e4 855
fb5d1d38
CH
856 err = ubd_open_dev(ubd_dev);
857 if (err) {
858 pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
859 'a' + n, ubd_dev->file, -err);
860 goto out;
861 }
862
7d314e34 863 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 864
4e6da0fe
RW
865 ubd_dev->tag_set.ops = &ubd_mq_ops;
866 ubd_dev->tag_set.queue_depth = 64;
867 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
868 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
4e6da0fe
RW
869 ubd_dev->tag_set.driver_data = ubd_dev;
870 ubd_dev->tag_set.nr_hw_queues = 1;
a0044bdf 871
4e6da0fe
RW
872 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
873 if (err)
fb5d1d38 874 goto out_close;
4e6da0fe 875
5d6789ce 876 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
35efb594
CH
877 if (IS_ERR(disk)) {
878 err = PTR_ERR(disk);
aea05eb5 879 goto out_cleanup_tags;
62f96cb0 880 }
4e6da0fe 881
0267e9ca
CH
882 disk->major = UBD_MAJOR;
883 disk->first_minor = n << UBD_SHIFT;
884 disk->minors = 1 << UBD_SHIFT;
885 disk->fops = &ubd_blops;
886 set_capacity(disk, ubd_dev->size / 512);
887 sprintf(disk->disk_name, "ubd%c", 'a' + n);
888 disk->private_data = ubd_dev;
5e4e1ff8 889 set_disk_ro(disk, !ubd_dev->openflags.w);
0267e9ca
CH
890
891 ubd_dev->pdev.id = n;
892 ubd_dev->pdev.name = DRIVER_NAME;
893 ubd_dev->pdev.dev.release = ubd_device_release;
894 dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
895 platform_device_register(&ubd_dev->pdev);
896
897 err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
66638f16
LC
898 if (err)
899 goto out_cleanup_disk;
900
35efb594 901 return 0;
62f96cb0 902
66638f16 903out_cleanup_disk:
8b9ab626 904 put_disk(disk);
4e6da0fe
RW
905out_cleanup_tags:
906 blk_mq_free_tag_set(&ubd_dev->tag_set);
fb5d1d38
CH
907out_close:
908 ubd_close_dev(ubd_dev);
35efb594
CH
909out:
910 return err;
1da177e4
LT
911}
912
f28169d2 913static int ubd_config(char *str, char **error_out)
1da177e4 914{
e7f6552f 915 int n, ret;
1da177e4 916
f28169d2
JD
917 /* This string is possibly broken up and stored, so it's only
918 * freed if ubd_setup_common fails, or if only general options
919 * were set.
920 */
970d6e3a 921 str = kstrdup(str, GFP_KERNEL);
e7f6552f 922 if (str == NULL) {
f28169d2
JD
923 *error_out = "Failed to allocate memory";
924 return -ENOMEM;
1da177e4 925 }
f28169d2
JD
926
927 ret = ubd_setup_common(str, &n, error_out);
928 if (ret)
e7f6552f 929 goto err_free;
f28169d2 930
e7f6552f
PBG
931 if (n == -1) {
932 ret = 0;
d8d7c28e 933 goto err_free;
1da177e4 934 }
1da177e4 935
dc764e50 936 mutex_lock(&ubd_lock);
f28169d2 937 ret = ubd_add(n, error_out);
e7f6552f 938 if (ret)
7d314e34 939 ubd_devs[n].file = NULL;
dc764e50 940 mutex_unlock(&ubd_lock);
1da177e4 941
e7f6552f 942out:
dc764e50 943 return ret;
e7f6552f
PBG
944
945err_free:
946 kfree(str);
947 goto out;
1da177e4
LT
948}
949
950static int ubd_get_config(char *name, char *str, int size, char **error_out)
951{
7d314e34 952 struct ubd *ubd_dev;
1da177e4
LT
953 int n, len = 0;
954
955 n = parse_unit(&name);
956 if((n >= MAX_DEV) || (n < 0)){
957 *error_out = "ubd_get_config : device number out of range";
dc764e50 958 return -1;
1da177e4
LT
959 }
960
7d314e34 961 ubd_dev = &ubd_devs[n];
d7fb2c38 962 mutex_lock(&ubd_lock);
1da177e4 963
7d314e34 964 if(ubd_dev->file == NULL){
1da177e4
LT
965 CONFIG_CHUNK(str, size, len, "", 1);
966 goto out;
967 }
968
7d314e34 969 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 970
7d314e34 971 if(ubd_dev->cow.file != NULL){
1da177e4 972 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 973 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
974 }
975 else CONFIG_CHUNK(str, size, len, "", 1);
976
977 out:
d7fb2c38 978 mutex_unlock(&ubd_lock);
dc764e50 979 return len;
1da177e4
LT
980}
981
29d56cfe
JD
982static int ubd_id(char **str, int *start_out, int *end_out)
983{
dc764e50 984 int n;
29d56cfe
JD
985
986 n = parse_unit(str);
dc764e50
JD
987 *start_out = 0;
988 *end_out = MAX_DEV - 1;
989 return n;
29d56cfe
JD
990}
991
f28169d2 992static int ubd_remove(int n, char **error_out)
1da177e4 993{
7d314e34 994 struct ubd *ubd_dev;
29d56cfe 995 int err = -ENODEV;
1da177e4 996
d7fb2c38 997 mutex_lock(&ubd_lock);
1da177e4 998
7d314e34 999 ubd_dev = &ubd_devs[n];
1da177e4 1000
7d314e34 1001 if(ubd_dev->file == NULL)
29d56cfe 1002 goto out;
1da177e4 1003
32621ad7 1004 if (ubd_dev->disk) {
fb5d1d38
CH
1005 /* you cannot remove a open disk */
1006 err = -EBUSY;
1007 if (disk_openers(ubd_dev->disk))
1008 goto out;
1009
32621ad7 1010 del_gendisk(ubd_dev->disk);
fb5d1d38 1011 ubd_close_dev(ubd_dev);
32621ad7 1012 put_disk(ubd_dev->disk);
1da177e4
LT
1013 }
1014
1da177e4 1015 err = 0;
2e3f5251 1016 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1017out:
d7fb2c38 1018 mutex_unlock(&ubd_lock);
29d56cfe 1019 return err;
1da177e4
LT
1020}
1021
f28169d2 1022/* All these are called by mconsole in process context and without
b8831a1d 1023 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1024 */
1da177e4 1025static struct mc_device ubd_mc = {
84f48d4f 1026 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1027 .name = "ubd",
1028 .config = ubd_config,
dc764e50 1029 .get_config = ubd_get_config,
29d56cfe 1030 .id = ubd_id,
1da177e4
LT
1031 .remove = ubd_remove,
1032};
1033
d8d7c28e 1034static int __init ubd_mc_init(void)
1da177e4
LT
1035{
1036 mconsole_register_dev(&ubd_mc);
1037 return 0;
1038}
1039
1040__initcall(ubd_mc_init);
1041
d8d7c28e
PBG
1042static int __init ubd0_init(void)
1043{
1044 struct ubd *ubd_dev = &ubd_devs[0];
1045
b8831a1d 1046 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1047 if(ubd_dev->file == NULL)
1048 ubd_dev->file = "root_fs";
b8831a1d
JD
1049 mutex_unlock(&ubd_lock);
1050
dc764e50 1051 return 0;
d8d7c28e
PBG
1052}
1053
1054__initcall(ubd0_init);
1055
b8831a1d 1056/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1057static struct platform_driver ubd_driver = {
1058 .driver = {
1059 .name = DRIVER_NAME,
1060 },
1da177e4
LT
1061};
1062
d8d7c28e 1063static int __init ubd_init(void)
1da177e4 1064{
f28169d2
JD
1065 char *error;
1066 int i, err;
1da177e4 1067
792dd4fc 1068 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1069 return -1;
1070
6da2ec56
KC
1071 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1072 sizeof(struct io_thread_req *),
1073 GFP_KERNEL
f88f0bdf
AI
1074 );
1075 irq_remainder = 0;
1076
1077 if (irq_req_buffer == NULL) {
1078 printk(KERN_ERR "Failed to initialize ubd buffering\n");
31a5990e 1079 return -ENOMEM;
f88f0bdf 1080 }
6da2ec56
KC
1081 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1082 sizeof(struct io_thread_req *),
1083 GFP_KERNEL
f88f0bdf
AI
1084 );
1085
1086 io_remainder = 0;
1087
1088 if (io_req_buffer == NULL) {
1089 printk(KERN_ERR "Failed to initialize ubd buffering\n");
31a5990e 1090 return -ENOMEM;
f88f0bdf 1091 }
3ae5eaec 1092 platform_driver_register(&ubd_driver);
dc764e50 1093 mutex_lock(&ubd_lock);
f28169d2
JD
1094 for (i = 0; i < MAX_DEV; i++){
1095 err = ubd_add(i, &error);
1096 if(err)
1097 printk(KERN_ERR "Failed to initialize ubd device %d :"
1098 "%s\n", i, error);
1099 }
dc764e50 1100 mutex_unlock(&ubd_lock);
1da177e4
LT
1101 return 0;
1102}
1103
1104late_initcall(ubd_init);
1105
d8d7c28e 1106static int __init ubd_driver_init(void){
91acb21f
JD
1107 unsigned long stack;
1108 int err;
1109
1110 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1111 if(global_openflags.s){
1112 printk(KERN_INFO "ubd: Synchronous mode\n");
1113 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1114 * enough. So use anyway the io thread. */
1115 }
2fcb4090 1116 stack = alloc_stack(0, 0);
558f9b2f 1117 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
91acb21f 1118 if(io_pid < 0){
6c29256c 1119 printk(KERN_ERR
91acb21f
JD
1120 "ubd : Failed to start I/O thread (errno = %d) - "
1121 "falling back to synchronous I/O\n", -io_pid);
1122 io_pid = -1;
dc764e50 1123 return 0;
91acb21f 1124 }
6c29256c 1125 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1126 0, "ubd", ubd_devs);
36d46a59 1127 if(err < 0)
91acb21f 1128 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1129 return 0;
91acb21f
JD
1130}
1131
1132device_initcall(ubd_driver_init);
1133
91acb21f
JD
1134static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1135 __u64 *cow_offset, unsigned long *bitmap,
1136 __u64 bitmap_offset, unsigned long *bitmap_words,
1137 __u64 bitmap_len)
1da177e4 1138{
a43c8316 1139 __u64 sector = io_offset >> SECTOR_SHIFT;
91acb21f
JD
1140 int i, update_bitmap = 0;
1141
a43c8316 1142 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
91acb21f
JD
1143 if(cow_mask != NULL)
1144 ubd_set_bit(i, (unsigned char *) cow_mask);
1145 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1146 continue;
1da177e4 1147
91acb21f
JD
1148 update_bitmap = 1;
1149 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1150 }
1151
1152 if(!update_bitmap)
1153 return;
1da177e4 1154
91acb21f 1155 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1156
91acb21f
JD
1157 /* This takes care of the case where we're exactly at the end of the
1158 * device, and *cow_offset + 1 is off the end. So, just back it up
1159 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1160 * for the original diagnosis.
1161 */
6d074242
JO
1162 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1163 sizeof(unsigned long)) - 1))
91acb21f
JD
1164 (*cow_offset)--;
1165
1166 bitmap_words[0] = bitmap[*cow_offset];
1167 bitmap_words[1] = bitmap[*cow_offset + 1];
1168
1169 *cow_offset *= sizeof(unsigned long);
1170 *cow_offset += bitmap_offset;
1171}
1172
fc6b6a87
GKB
1173static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1174 unsigned long offset, unsigned long *bitmap,
91acb21f
JD
1175 __u64 bitmap_offset, __u64 bitmap_len)
1176{
fc6b6a87 1177 __u64 sector = offset >> SECTOR_SHIFT;
91acb21f
JD
1178 int i;
1179
fc6b6a87 1180 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
91acb21f
JD
1181 panic("Operation too long");
1182
a43c8316 1183 if (req_op(req->req) == REQ_OP_READ) {
fc6b6a87 1184 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
91acb21f 1185 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1186 ubd_set_bit(i, (unsigned char *)
fc6b6a87
GKB
1187 &segment->sector_mask);
1188 }
1189 } else {
1190 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1191 &segment->cow_offset, bitmap, bitmap_offset,
1192 segment->bitmap_words, bitmap_len);
1193 }
1194}
1195
1196static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1197 struct request *req)
1198{
1199 struct bio_vec bvec;
1200 struct req_iterator iter;
1201 int i = 0;
1202 unsigned long byte_offset = io_req->offset;
7ee1de6e 1203 enum req_op op = req_op(req);
fc6b6a87
GKB
1204
1205 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1206 io_req->io_desc[0].buffer = NULL;
1207 io_req->io_desc[0].length = blk_rq_bytes(req);
1208 } else {
1209 rq_for_each_segment(bvec, req, iter) {
1210 BUG_ON(i >= io_req->desc_cnt);
1211
25d84545 1212 io_req->io_desc[i].buffer = bvec_virt(&bvec);
fc6b6a87
GKB
1213 io_req->io_desc[i].length = bvec.bv_len;
1214 i++;
1215 }
1216 }
1217
1218 if (dev->cow.file) {
1219 for (i = 0; i < io_req->desc_cnt; i++) {
1220 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1221 dev->cow.bitmap, dev->cow.bitmap_offset,
1222 dev->cow.bitmap_len);
1223 byte_offset += io_req->io_desc[i].length;
dc764e50 1224 }
fc6b6a87 1225
91acb21f 1226 }
1da177e4
LT
1227}
1228
fc6b6a87
GKB
1229static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1230 int desc_cnt)
1da177e4 1231{
ecb0a83e 1232 struct io_thread_req *io_req;
fc6b6a87 1233 int i;
1da177e4 1234
fc6b6a87
GKB
1235 io_req = kmalloc(sizeof(*io_req) +
1236 (desc_cnt * sizeof(struct io_desc)),
1237 GFP_ATOMIC);
ecb0a83e 1238 if (!io_req)
fc6b6a87 1239 return NULL;
805f11a0
RW
1240
1241 io_req->req = req;
ecb0a83e
CH
1242 if (dev->cow.file)
1243 io_req->fds[0] = dev->cow.fd;
1244 else
1245 io_req->fds[0] = dev->fd;
0033dfd9 1246 io_req->error = 0;
53766def
AI
1247 io_req->sectorsize = SECTOR_SIZE;
1248 io_req->fds[1] = dev->fd;
fc6b6a87 1249 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
53766def
AI
1250 io_req->offsets[0] = 0;
1251 io_req->offsets[1] = dev->cow.data_offset;
1252
fc6b6a87
GKB
1253 for (i = 0 ; i < desc_cnt; i++) {
1254 io_req->io_desc[i].sector_mask = 0;
1255 io_req->io_desc[i].cow_offset = -1;
1256 }
1257
1258 return io_req;
1259}
1260
1261static int ubd_submit_request(struct ubd *dev, struct request *req)
1262{
1263 int segs = 0;
1264 struct io_thread_req *io_req;
1265 int ret;
7ee1de6e 1266 enum req_op op = req_op(req);
fc6b6a87
GKB
1267
1268 if (op == REQ_OP_FLUSH)
1269 segs = 0;
1270 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1271 segs = 1;
1272 else
1273 segs = blk_rq_nr_phys_segments(req);
1274
1275 io_req = ubd_alloc_req(dev, req, segs);
1276 if (!io_req)
1277 return -ENOMEM;
1278
1279 io_req->desc_cnt = segs;
1280 if (segs)
1281 ubd_map_req(dev, io_req, req);
53766def 1282
ecb0a83e
CH
1283 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1284 if (ret != sizeof(io_req)) {
1285 if (ret != -EAGAIN)
1286 pr_err("write to io thread failed: %d\n", -ret);
bc1d72e7 1287 kfree(io_req);
bc1d72e7 1288 }
ecb0a83e 1289 return ret;
bc1d72e7
RW
1290}
1291
4e6da0fe
RW
1292static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1293 const struct blk_mq_queue_data *bd)
1da177e4 1294{
6961cd4d 1295 struct ubd *ubd_dev = hctx->queue->queuedata;
4e6da0fe 1296 struct request *req = bd->rq;
53766def 1297 int ret = 0, res = BLK_STS_OK;
a0044bdf 1298
4e6da0fe
RW
1299 blk_mq_start_request(req);
1300
6961cd4d
JA
1301 spin_lock_irq(&ubd_dev->lock);
1302
53766def 1303 switch (req_op(req)) {
53766def 1304 case REQ_OP_FLUSH:
53766def
AI
1305 case REQ_OP_READ:
1306 case REQ_OP_WRITE:
50109b5a
AI
1307 case REQ_OP_DISCARD:
1308 case REQ_OP_WRITE_ZEROES:
fc6b6a87 1309 ret = ubd_submit_request(ubd_dev, req);
50109b5a 1310 break;
53766def
AI
1311 default:
1312 WARN_ON_ONCE(1);
1313 res = BLK_STS_NOTSUPP;
4e6da0fe 1314 }
53766def 1315
6961cd4d
JA
1316 spin_unlock_irq(&ubd_dev->lock);
1317
d848074b
AI
1318 if (ret < 0) {
1319 if (ret == -ENOMEM)
1320 res = BLK_STS_RESOURCE;
1321 else
1322 res = BLK_STS_DEV_RESOURCE;
1323 }
6961cd4d 1324
53766def 1325 return res;
1da177e4
LT
1326}
1327
a885c8c4
CH
1328static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1329{
7d314e34 1330 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1331
1332 geo->heads = 128;
1333 geo->sectors = 32;
7d314e34 1334 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1335 return 0;
1336}
1337
05bdb996 1338static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4
LT
1339 unsigned int cmd, unsigned long arg)
1340{
a625c998 1341 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1342 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1343
1344 switch (cmd) {
1da177e4 1345 struct cdrom_volctrl volume;
1da177e4 1346 case HDIO_GET_IDENTITY:
73855e13
BZ
1347 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1348 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1349 ubd_id[ATA_ID_HEADS] = 128;
1350 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1351 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1352 sizeof(ubd_id)))
dc764e50
JD
1353 return -EFAULT;
1354 return 0;
b8831a1d 1355
1da177e4
LT
1356 case CDROMVOLREAD:
1357 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1358 return -EFAULT;
1da177e4
LT
1359 volume.channel0 = 255;
1360 volume.channel1 = 255;
1361 volume.channel2 = 255;
1362 volume.channel3 = 255;
1363 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1364 return -EFAULT;
1365 return 0;
1da177e4 1366 }
dc764e50 1367 return -EINVAL;
1da177e4
LT
1368}
1369
a43c8316
AI
1370static int map_error(int error_code)
1371{
1372 switch (error_code) {
1373 case 0:
1374 return BLK_STS_OK;
1375 case ENOSYS:
1376 case EOPNOTSUPP:
1377 return BLK_STS_NOTSUPP;
1378 case ENOSPC:
1379 return BLK_STS_NOSPC;
1380 }
1381 return BLK_STS_IOERR;
1382}
1383
a41421ed
AI
1384/*
1385 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1386 *
1387 * The following functions are part of UML hypervisor code.
1388 * All functions from here onwards are executed as a helper
1389 * thread and are not allowed to execute any kernel functions.
1390 *
1391 * Any communication must occur strictly via shared memory and IPC.
1392 *
1393 * Do not add printks, locks, kernel memory operations, etc - it
1394 * will result in unpredictable behaviour and/or crashes.
1395 */
1396
fc6b6a87 1397static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1da177e4 1398{
91acb21f 1399 int n;
1da177e4 1400
fc6b6a87 1401 if (segment->cow_offset == -1)
a43c8316 1402 return map_error(0);
1da177e4 1403
fc6b6a87
GKB
1404 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1405 sizeof(segment->bitmap_words), segment->cow_offset);
1406 if (n != sizeof(segment->bitmap_words))
a43c8316 1407 return map_error(-n);
1da177e4 1408
a43c8316 1409 return map_error(0);
91acb21f 1410}
1da177e4 1411
fc6b6a87 1412static void do_io(struct io_thread_req *req, struct io_desc *desc)
91acb21f 1413{
50109b5a 1414 char *buf = NULL;
91acb21f
JD
1415 unsigned long len;
1416 int n, nsectors, start, end, bit;
91acb21f
JD
1417 __u64 off;
1418
50109b5a
AI
1419 /* FLUSH is really a special case, we cannot "case" it with others */
1420
a43c8316 1421 if (req_op(req->req) == REQ_OP_FLUSH) {
805f11a0 1422 /* fds[0] is always either the rw image or our cow file */
a41421ed 1423 req->error = map_error(-os_sync_file(req->fds[0]));
805f11a0
RW
1424 return;
1425 }
1426
fc6b6a87 1427 nsectors = desc->length / req->sectorsize;
91acb21f
JD
1428 start = 0;
1429 do {
fc6b6a87 1430 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
91acb21f
JD
1431 end = start;
1432 while((end < nsectors) &&
fc6b6a87 1433 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
91acb21f
JD
1434 end++;
1435
1436 off = req->offset + req->offsets[bit] +
1437 start * req->sectorsize;
1438 len = (end - start) * req->sectorsize;
fc6b6a87
GKB
1439 if (desc->buffer != NULL)
1440 buf = &desc->buffer[start * req->sectorsize];
91acb21f 1441
50109b5a
AI
1442 switch (req_op(req->req)) {
1443 case REQ_OP_READ:
91acb21f
JD
1444 n = 0;
1445 do {
1446 buf = &buf[n];
1447 len -= n;
8c6157b6 1448 n = os_pread_file(req->fds[bit], buf, len, off);
50109b5a 1449 if (n < 0) {
a43c8316 1450 req->error = map_error(-n);
91acb21f
JD
1451 return;
1452 }
1453 } while((n < len) && (n != 0));
1454 if (n < len) memset(&buf[n], 0, len - n);
50109b5a
AI
1455 break;
1456 case REQ_OP_WRITE:
8c6157b6 1457 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f 1458 if(n != len){
a43c8316 1459 req->error = map_error(-n);
91acb21f
JD
1460 return;
1461 }
50109b5a
AI
1462 break;
1463 case REQ_OP_DISCARD:
50109b5a
AI
1464 n = os_falloc_punch(req->fds[bit], off, len);
1465 if (n) {
1466 req->error = map_error(-n);
1467 return;
1468 }
1469 break;
d2a0a616
FD
1470 case REQ_OP_WRITE_ZEROES:
1471 n = os_falloc_zeroes(req->fds[bit], off, len);
1472 if (n) {
1473 req->error = map_error(-n);
1474 return;
1475 }
1476 break;
50109b5a
AI
1477 default:
1478 WARN_ON_ONCE(1);
1479 req->error = BLK_STS_NOTSUPP;
1480 return;
91acb21f
JD
1481 }
1482
1483 start = end;
1484 } while(start < nsectors);
1da177e4 1485
fc6b6a87
GKB
1486 req->offset += len;
1487 req->error = update_bitmap(req, desc);
1da177e4 1488}
91acb21f
JD
1489
1490/* Changed in start_io_thread, which is serialized by being called only
1491 * from ubd_init, which is an initcall.
1492 */
1493int kernel_fd = -1;
1494
d8d7c28e 1495/* Only changed by the io thread. XXX: currently unused. */
4dc5a328 1496static int io_count;
91acb21f
JD
1497
1498int io_thread(void *arg)
1499{
f88f0bdf 1500 int n, count, written, res;
91acb21f 1501
91d44ff8
RW
1502 os_fix_helper_signals();
1503
91acb21f 1504 while(1){
f88f0bdf
AI
1505 n = bulk_req_safe_read(
1506 kernel_fd,
1507 io_req_buffer,
1508 &io_remainder,
1509 &io_remainder_size,
1510 UBD_REQ_BUFFER_SIZE
1511 );
e355b2f5
GKB
1512 if (n <= 0) {
1513 if (n == -EAGAIN)
f88f0bdf 1514 ubd_read_poll(-1);
e355b2f5
GKB
1515
1516 continue;
91acb21f 1517 }
f88f0bdf
AI
1518
1519 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
fc6b6a87
GKB
1520 struct io_thread_req *req = (*io_req_buffer)[count];
1521 int i;
1522
f88f0bdf 1523 io_count++;
fc6b6a87
GKB
1524 for (i = 0; !req->error && i < req->desc_cnt; i++)
1525 do_io(req, &(req->io_desc[i]));
1526
f88f0bdf
AI
1527 }
1528
1529 written = 0;
1530
1531 do {
6e682d53
GKB
1532 res = os_write_file(kernel_fd,
1533 ((char *) io_req_buffer) + written,
1534 n - written);
ff6a1798 1535 if (res >= 0) {
f88f0bdf 1536 written += res;
f88f0bdf
AI
1537 }
1538 if (written < n) {
1539 ubd_write_poll(-1);
1540 }
1541 } while (written < n);
91acb21f 1542 }
91acb21f 1543
1b57e9c2
JD
1544 return 0;
1545}