Merge tag 'pci-v6.16-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
[linux-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
dbddf429 1// SPDX-License-Identifier: GPL-2.0
6c29256c 2/*
a41421ed 3 * Copyright (C) 2018 Cambridge Greys Ltd
f88f0bdf 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
1da177e4 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
1da177e4
LT
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
1da177e4
LT
22#define UBD_SHIFT 4
23
8ea3c06a
AV
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
4e6da0fe 27#include <linux/blk-mq.h>
8ea3c06a
AV
28#include <linux/ata.h>
29#include <linux/hdreg.h>
b81e0c23 30#include <linux/major.h>
8ea3c06a
AV
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
37185b33 39#include <kern_util.h>
1da177e4 40#include "mconsole_kern.h"
37185b33
AV
41#include <init.h>
42#include <irq_kern.h>
8ea3c06a 43#include "ubd.h"
37185b33 44#include <os.h>
1da177e4
LT
45#include "cow.h"
46
a43c8316
AI
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
1da177e4 49
fc6b6a87
GKB
50struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
1da177e4 58struct io_thread_req {
62f96cb0 59 struct request *req;
1da177e4
LT
60 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
1da177e4 63 int sectorsize;
1da177e4 64 int error;
fc6b6a87
GKB
65
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
1da177e4
LT
69};
70
f88f0bdf
AI
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
91acb21f 82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
83{
84 __u64 n;
85 int bits, off;
86
91acb21f 87 bits = sizeof(data[0]) * 8;
1da177e4
LT
88 n = bit / bits;
89 off = bit % bits;
dc764e50 90 return (data[n] & (1 << off)) != 0;
1da177e4
LT
91}
92
91acb21f 93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
94{
95 __u64 n;
96 int bits, off;
97
91acb21f 98 bits = sizeof(data[0]) * 8;
1da177e4
LT
99 n = bit / bits;
100 off = bit % bits;
91acb21f 101 data[n] |= (1 << off);
1da177e4
LT
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
d7fb2c38 107static DEFINE_MUTEX(ubd_lock);
1da177e4 108
05bdb996 109static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4 110 unsigned int cmd, unsigned long arg);
a885c8c4 111static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 112
97d88ac8 113#define MAX_DEV (16)
1da177e4 114
83d5cde4 115static const struct block_device_operations ubd_blops = {
1da177e4 116 .owner = THIS_MODULE,
a625c998 117 .ioctl = ubd_ioctl,
ab0cf1e4 118 .compat_ioctl = blkdev_compat_ptr_ioctl,
a885c8c4 119 .getgeo = ubd_getgeo,
1da177e4
LT
120};
121
1da177e4
LT
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
1da177e4
LT
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
2a9d32f6 132 /* backing file name */
1da177e4 133 char *file;
2a9d32f6 134 /* backing file fd */
1da177e4
LT
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
dc764e50 139 int data_offset;
1da177e4
LT
140};
141
a0044bdf
JD
142#define MAX_SG 64
143
1da177e4 144struct ubd {
2a9d32f6
PBG
145 /* name (and fd, below) of the file opened for writing, either the
146 * backing or the cow file. */
1da177e4 147 char *file;
ef3ba87c 148 char *serial;
1da177e4
LT
149 int fd;
150 __u64 size;
151 struct openflags boot_openflags;
152 struct openflags openflags;
84e945e3
PBG
153 unsigned shared:1;
154 unsigned no_cow:1;
50109b5a 155 unsigned no_trim:1;
1da177e4
LT
156 struct cow cow;
157 struct platform_device pdev;
32621ad7 158 struct gendisk *disk;
4e6da0fe 159 struct blk_mq_tag_set tag_set;
62f96cb0 160 spinlock_t lock;
4e6da0fe
RW
161};
162
1da177e4
LT
163#define DEFAULT_COW { \
164 .file = NULL, \
dc764e50
JD
165 .fd = -1, \
166 .bitmap = NULL, \
1da177e4 167 .bitmap_offset = 0, \
dc764e50 168 .data_offset = 0, \
1da177e4
LT
169}
170
171#define DEFAULT_UBD { \
172 .file = NULL, \
ef3ba87c 173 .serial = NULL, \
1da177e4
LT
174 .fd = -1, \
175 .size = -1, \
176 .boot_openflags = OPEN_FLAGS, \
177 .openflags = OPEN_FLAGS, \
dc764e50 178 .no_cow = 0, \
50109b5a 179 .no_trim = 0, \
6c29256c 180 .shared = 0, \
dc764e50 181 .cow = DEFAULT_COW, \
22e65004 182 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
1da177e4
LT
183}
184
b8831a1d 185/* Protected by ubd_lock */
5dc62b1b 186static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 187
4e6da0fe
RW
188static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
189 const struct blk_mq_queue_data *bd);
4e6da0fe 190
1da177e4
LT
191static int fake_ide_setup(char *str)
192{
7eb90f7e 193 pr_warn("The fake_ide option has been removed\n");
dc764e50 194 return 1;
1da177e4 195}
1da177e4
LT
196__setup("fake_ide", fake_ide_setup);
197
198__uml_help(fake_ide_setup,
199"fake_ide\n"
7eb90f7e 200" Obsolete stub.\n\n"
1da177e4
LT
201);
202
203static int parse_unit(char **ptr)
204{
205 char *str = *ptr, *end;
206 int n = -1;
207
208 if(isdigit(*str)) {
209 n = simple_strtoul(str, &end, 0);
210 if(end == str)
dc764e50 211 return -1;
1da177e4
LT
212 *ptr = end;
213 }
97d88ac8 214 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
215 n = *str - 'a';
216 str++;
217 *ptr = str;
218 }
dc764e50 219 return n;
1da177e4
LT
220}
221
d8d7c28e
PBG
222/* If *index_out == -1 at exit, the passed option was a general one;
223 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
224 * should not be freed on exit.
225 */
f28169d2 226static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 227{
7d314e34 228 struct ubd *ubd_dev;
1da177e4 229 struct openflags flags = global_openflags;
ef3ba87c 230 char *file, *backing_file, *serial;
b8831a1d 231 int n, err = 0, i;
1da177e4
LT
232
233 if(index_out) *index_out = -1;
234 n = *str;
235 if(n == '='){
1da177e4 236 str++;
1da177e4
LT
237 if(!strcmp(str, "sync")){
238 global_openflags = of_sync(global_openflags);
9ca55299 239 return err;
1da177e4 240 }
b8831a1d 241
7eb90f7e
CH
242 pr_warn("fake major not supported any more\n");
243 return 0;
1da177e4
LT
244 }
245
246 n = parse_unit(&str);
247 if(n < 0){
f28169d2
JD
248 *error_out = "Couldn't parse device number";
249 return -EINVAL;
1da177e4
LT
250 }
251 if(n >= MAX_DEV){
f28169d2
JD
252 *error_out = "Device number out of range";
253 return 1;
1da177e4
LT
254 }
255
f28169d2 256 err = -EBUSY;
d7fb2c38 257 mutex_lock(&ubd_lock);
1da177e4 258
7d314e34
PBG
259 ubd_dev = &ubd_devs[n];
260 if(ubd_dev->file != NULL){
f28169d2 261 *error_out = "Device is already configured";
1da177e4
LT
262 goto out;
263 }
264
265 if (index_out)
266 *index_out = n;
267
f28169d2 268 err = -EINVAL;
50109b5a 269 for (i = 0; i < sizeof("rscdt="); i++) {
1da177e4
LT
270 switch (*str) {
271 case 'r':
272 flags.w = 0;
273 break;
274 case 's':
275 flags.s = 1;
276 break;
277 case 'd':
7d314e34 278 ubd_dev->no_cow = 1;
1da177e4 279 break;
6c29256c 280 case 'c':
7d314e34 281 ubd_dev->shared = 1;
6c29256c 282 break;
50109b5a
AI
283 case 't':
284 ubd_dev->no_trim = 1;
285 break;
1da177e4
LT
286 case '=':
287 str++;
288 goto break_loop;
289 default:
f28169d2 290 *error_out = "Expected '=' or flag letter "
50109b5a 291 "(r, s, c, t or d)";
1da177e4
LT
292 goto out;
293 }
294 str++;
295 }
296
f28169d2
JD
297 if (*str == '=')
298 *error_out = "Too many flags specified";
299 else
300 *error_out = "Missing '='";
1da177e4
LT
301 goto out;
302
303break_loop:
ef3ba87c
CO
304 file = strsep(&str, ",:");
305 if (*file == '\0')
306 file = NULL;
1da177e4 307
ef3ba87c 308 backing_file = strsep(&str, ",:");
94c41b3a 309 if (backing_file && *backing_file == '\0')
ef3ba87c 310 backing_file = NULL;
1da177e4 311
ef3ba87c 312 serial = strsep(&str, ",:");
94c41b3a 313 if (serial && *serial == '\0')
ef3ba87c
CO
314 serial = NULL;
315
316 if (backing_file && ubd_dev->no_cow) {
317 *error_out = "Can't specify both 'd' and a cow file";
318 goto out;
1da177e4 319 }
ef3ba87c 320
f28169d2 321 err = 0;
ef3ba87c 322 ubd_dev->file = file;
7d314e34 323 ubd_dev->cow.file = backing_file;
ef3ba87c 324 ubd_dev->serial = serial;
7d314e34 325 ubd_dev->boot_openflags = flags;
1da177e4 326out:
d7fb2c38 327 mutex_unlock(&ubd_lock);
f28169d2 328 return err;
1da177e4
LT
329}
330
331static int ubd_setup(char *str)
332{
f28169d2
JD
333 char *error;
334 int err;
335
336 err = ubd_setup_common(str, NULL, &error);
337 if(err)
338 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
339 "%s\n", str, error);
340 return 1;
1da177e4
LT
341}
342
343__setup("ubd", ubd_setup);
344__uml_help(ubd_setup,
ef3ba87c 345"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
1da177e4
LT
346" This is used to associate a device with a file in the underlying\n"
347" filesystem. When specifying two filenames, the first one is the\n"
348" COW name and the second is the backing file name. As separator you can\n"
349" use either a ':' or a ',': the first one allows writing things like;\n"
350" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
351" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 352" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
353" a COW file or a backing file. To override this detection, add the 'd'\n"
354" flag:\n"
355" ubd0d=BackingFile\n"
356" Usually, there is a filesystem in the file, but \n"
357" that's not required. Swap devices containing swap files can be\n"
358" specified like this. Also, a file which doesn't contain a\n"
359" filesystem can have its contents read in the virtual \n"
360" machine by running 'dd' on the device. <n> must be in the range\n"
361" 0 to 7. Appending an 'r' to the number will cause that device\n"
362" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
363" an 's' will cause data to be written to disk on the host immediately.\n"
364" 'c' will cause the device to be treated as being shared between multiple\n"
365" UMLs and file locking will be turned off - this is appropriate for a\n"
366" cluster filesystem and inappropriate at almost all other times.\n\n"
50109b5a 367" 't' will disable trim/discard support on the device (enabled by default).\n\n"
ef3ba87c
CO
368" An optional device serial number can be exposed using the serial parameter\n"
369" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
370" useful when a unique number should be given to the device. Note when\n"
371" specifying a label, the filename2 must be also presented. It can be\n"
372" an empty string, in which case the backing file is not used:\n"
373" ubd0=File,,Serial\n"
1da177e4
LT
374);
375
8299ca5c 376static int udb_setup(char *str)
1da177e4
LT
377{
378 printk("udb%s specified on command line is almost certainly a ubd -> "
379 "udb TYPO\n", str);
dc764e50 380 return 1;
1da177e4
LT
381}
382
383__setup("udb", udb_setup);
384__uml_help(udb_setup,
385"udb\n"
0894e27e
JD
386" This option is here solely to catch ubd -> udb typos, which can be\n"
387" to impossible to catch visually unless you specifically look for\n"
388" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
389" in the boot output.\n\n"
390);
391
91acb21f 392/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 393static int thread_fd = -1;
a0044bdf 394
f88f0bdf
AI
395/* Function to read several request pointers at a time
396* handling fractional reads if (and as) needed
397*/
398
399static int bulk_req_safe_read(
400 int fd,
401 struct io_thread_req * (*request_buffer)[],
402 struct io_thread_req **remainder,
403 int *remainder_size,
404 int max_recs
405 )
406{
407 int n = 0;
408 int res = 0;
409
410 if (*remainder_size > 0) {
411 memmove(
412 (char *) request_buffer,
413 (char *) remainder, *remainder_size
414 );
415 n = *remainder_size;
416 }
417
418 res = os_read_file(
419 fd,
420 ((char *) request_buffer) + *remainder_size,
421 sizeof(struct io_thread_req *)*max_recs
422 - *remainder_size
423 );
424 if (res > 0) {
425 n += res;
426 if ((n % sizeof(struct io_thread_req *)) > 0) {
427 /*
428 * Read somehow returned not a multiple of dword
429 * theoretically possible, but never observed in the
430 * wild, so read routine must be able to handle it
431 */
432 *remainder_size = n % sizeof(struct io_thread_req *);
433 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
434 memmove(
435 remainder,
436 ((char *) request_buffer) +
437 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
438 *remainder_size
439 );
440 n = n - *remainder_size;
441 }
442 } else {
443 n = res;
444 }
445 return n;
446}
447
5db755fb 448static void ubd_end_request(struct io_thread_req *io_req)
1da177e4 449{
31ade7d4
CH
450 if (io_req->error == BLK_STS_NOTSUPP) {
451 if (req_op(io_req->req) == REQ_OP_DISCARD)
73e3715e 452 blk_queue_disable_discard(io_req->req->q);
31ade7d4 453 else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
73e3715e 454 blk_queue_disable_write_zeroes(io_req->req->q);
a0044bdf 455 }
5db755fb
CH
456 blk_mq_end_request(io_req->req, io_req->error);
457 kfree(io_req);
1da177e4
LT
458}
459
7bea96fd 460static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 461{
5db755fb
CH
462 int len, i;
463
464 while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
465 &irq_remainder, &irq_remainder_size,
466 UBD_REQ_BUFFER_SIZE)) >= 0) {
467 for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
468 ubd_end_request((*irq_req_buffer)[i]);
469 }
470
471 if (len < 0 && len != -EAGAIN)
472 pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
dc764e50 473 return IRQ_HANDLED;
91acb21f 474}
09ace81c 475
91acb21f 476/* Only changed by ubd_init, which is an initcall. */
d7f89a9d 477static struct os_helper_thread *io_td;
09ace81c 478
5dc62b1b 479static void kill_io_thread(void)
91acb21f 480{
d7f89a9d
TB
481 if (io_td)
482 os_kill_helper_thread(io_td);
09ace81c 483}
1da177e4 484
91acb21f
JD
485__uml_exitcall(kill_io_thread);
486
d8d7c28e 487static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
488{
489 char *file;
85356398
RW
490 int fd;
491 int err;
492
493 __u32 version;
494 __u32 align;
495 char *backing_file;
853bc0ab 496 time64_t mtime;
85356398
RW
497 unsigned long long size;
498 int sector_size;
499 int bitmap_offset;
500
501 if (ubd_dev->file && ubd_dev->cow.file) {
502 file = ubd_dev->cow.file;
503
504 goto out;
505 }
506
d4afcba9 507 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
508 if (fd < 0)
509 return fd;
510
511 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
512 &mtime, &size, &sector_size, &align, &bitmap_offset);
513 os_close_file(fd);
1da177e4 514
85356398
RW
515 if(err == -EINVAL)
516 file = ubd_dev->file;
517 else
518 file = backing_file;
519
520out:
dc764e50 521 return os_file_size(file, size_out);
1da177e4
LT
522}
523
5dc62b1b
WC
524static int read_cow_bitmap(int fd, void *buf, int offset, int len)
525{
526 int err;
527
8c6157b6 528 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
529 if (err < 0)
530 return err;
531
532 return 0;
533}
534
853bc0ab 535static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
5dc62b1b 536{
853bc0ab 537 time64_t modtime;
5dc62b1b
WC
538 unsigned long long actual;
539 int err;
540
541 err = os_file_modtime(file, &modtime);
542 if (err < 0) {
543 printk(KERN_ERR "Failed to get modification time of backing "
544 "file \"%s\", err = %d\n", file, -err);
545 return err;
546 }
547
548 err = os_file_size(file, &actual);
549 if (err < 0) {
550 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
551 "err = %d\n", file, -err);
552 return err;
553 }
554
555 if (actual != size) {
556 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
557 * the typecast.*/
558 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
559 "vs backing file\n", (unsigned long long) size, actual);
560 return -EINVAL;
561 }
562 if (modtime != mtime) {
853bc0ab 563 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
5dc62b1b
WC
564 "backing file\n", mtime, modtime);
565 return -EINVAL;
566 }
567 return 0;
568}
569
570static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
571{
572 struct uml_stat buf1, buf2;
573 int err;
574
575 if (from_cmdline == NULL)
576 return 0;
577 if (!strcmp(from_cmdline, from_cow))
578 return 0;
579
580 err = os_stat_file(from_cmdline, &buf1);
581 if (err < 0) {
582 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
583 -err);
584 return 0;
585 }
586 err = os_stat_file(from_cow, &buf2);
587 if (err < 0) {
588 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
589 -err);
590 return 1;
591 }
592 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
593 return 0;
594
595 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
596 "\"%s\" specified in COW header of \"%s\"\n",
597 from_cmdline, from_cow, cow);
598 return 1;
599}
600
601static int open_ubd_file(char *file, struct openflags *openflags, int shared,
602 char **backing_file_out, int *bitmap_offset_out,
603 unsigned long *bitmap_len_out, int *data_offset_out,
604 int *create_cow_out)
605{
853bc0ab 606 time64_t mtime;
5dc62b1b
WC
607 unsigned long long size;
608 __u32 version, align;
609 char *backing_file;
610 int fd, err, sectorsize, asked_switch, mode = 0644;
611
612 fd = os_open_file(file, *openflags, mode);
613 if (fd < 0) {
614 if ((fd == -ENOENT) && (create_cow_out != NULL))
615 *create_cow_out = 1;
616 if (!openflags->w ||
617 ((fd != -EROFS) && (fd != -EACCES)))
618 return fd;
619 openflags->w = 0;
620 fd = os_open_file(file, *openflags, mode);
621 if (fd < 0)
622 return fd;
623 }
624
625 if (shared)
626 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
627 else {
628 err = os_lock_file(fd, openflags->w);
629 if (err < 0) {
630 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
631 file, -err);
632 goto out_close;
633 }
634 }
635
636 /* Successful return case! */
637 if (backing_file_out == NULL)
638 return fd;
639
640 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
641 &size, &sectorsize, &align, bitmap_offset_out);
642 if (err && (*backing_file_out != NULL)) {
643 printk(KERN_ERR "Failed to read COW header from COW file "
644 "\"%s\", errno = %d\n", file, -err);
645 goto out_close;
646 }
647 if (err)
648 return fd;
649
650 asked_switch = path_requires_switch(*backing_file_out, backing_file,
651 file);
652
653 /* Allow switching only if no mismatch. */
654 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
655 mtime)) {
656 printk(KERN_ERR "Switching backing file to '%s'\n",
657 *backing_file_out);
658 err = write_cow_header(file, fd, *backing_file_out,
659 sectorsize, align, &size);
660 if (err) {
661 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
662 goto out_close;
663 }
664 } else {
665 *backing_file_out = backing_file;
666 err = backing_file_mismatch(*backing_file_out, size, mtime);
667 if (err)
668 goto out_close;
669 }
670
671 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
672 bitmap_len_out, data_offset_out);
673
674 return fd;
675 out_close:
676 os_close_file(fd);
677 return err;
678}
679
680static int create_cow_file(char *cow_file, char *backing_file,
681 struct openflags flags,
682 int sectorsize, int alignment, int *bitmap_offset_out,
683 unsigned long *bitmap_len_out, int *data_offset_out)
684{
685 int err, fd;
686
687 flags.c = 1;
688 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
689 if (fd < 0) {
690 err = fd;
691 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
692 cow_file, -err);
693 goto out;
694 }
695
696 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
697 bitmap_offset_out, bitmap_len_out,
698 data_offset_out);
699 if (!err)
700 return fd;
701 os_close_file(fd);
702 out:
703 return err;
704}
705
5f75a4f8 706static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 707{
7d314e34
PBG
708 os_close_file(ubd_dev->fd);
709 if(ubd_dev->cow.file == NULL)
1da177e4
LT
710 return;
711
7d314e34
PBG
712 os_close_file(ubd_dev->cow.fd);
713 vfree(ubd_dev->cow.bitmap);
714 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
715}
716
7d314e34 717static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
718{
719 struct openflags flags;
720 char **back_ptr;
721 int err, create_cow, *create_ptr;
0bf16bff 722 int fd;
1da177e4 723
7d314e34 724 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 725 create_cow = 0;
7d314e34
PBG
726 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
727 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
728
729 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
730 back_ptr, &ubd_dev->cow.bitmap_offset,
731 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 732 create_ptr);
1da177e4 733
0bf16bff
PBG
734 if((fd == -ENOENT) && create_cow){
735 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
a43c8316 736 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
7d314e34
PBG
737 &ubd_dev->cow.bitmap_offset,
738 &ubd_dev->cow.bitmap_len,
739 &ubd_dev->cow.data_offset);
0bf16bff 740 if(fd >= 0){
1da177e4 741 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 742 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
743 }
744 }
745
0bf16bff 746 if(fd < 0){
7d314e34 747 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
748 -fd);
749 return fd;
1da177e4 750 }
0bf16bff 751 ubd_dev->fd = fd;
1da177e4 752
7d314e34 753 if(ubd_dev->cow.file != NULL){
1da177e4 754 err = -ENOMEM;
da2486ba 755 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 756 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
757 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
758 goto error;
759 }
1da177e4 760
7d314e34
PBG
761 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
762 ubd_dev->cow.bitmap_offset,
763 ubd_dev->cow.bitmap_len);
1da177e4
LT
764 if(err < 0)
765 goto error;
766
7d314e34 767 flags = ubd_dev->openflags;
1da177e4 768 flags.w = 0;
7d314e34 769 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 770 NULL, NULL, NULL, NULL);
1da177e4 771 if(err < 0) goto error;
7d314e34 772 ubd_dev->cow.fd = err;
1da177e4 773 }
dc764e50 774 return 0;
1da177e4 775 error:
7d314e34 776 os_close_file(ubd_dev->fd);
dc764e50 777 return err;
1da177e4
LT
778}
779
2e3f5251
JD
780static void ubd_device_release(struct device *dev)
781{
5bee35e5 782 struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev);
2e3f5251 783
4e6da0fe 784 blk_mq_free_tag_set(&ubd_dev->tag_set);
2e3f5251
JD
785 *ubd_dev = ((struct ubd) DEFAULT_UBD);
786}
787
ef3ba87c
CO
788static ssize_t serial_show(struct device *dev,
789 struct device_attribute *attr, char *buf)
790{
791 struct gendisk *disk = dev_to_disk(dev);
792 struct ubd *ubd_dev = disk->private_data;
793
794 if (!ubd_dev)
795 return 0;
796
797 return sprintf(buf, "%s", ubd_dev->serial);
798}
799
800static DEVICE_ATTR_RO(serial);
801
802static struct attribute *ubd_attrs[] = {
803 &dev_attr_serial.attr,
804 NULL,
805};
806
807static umode_t ubd_attrs_are_visible(struct kobject *kobj,
808 struct attribute *a, int n)
809{
810 return a->mode;
811}
812
813static const struct attribute_group ubd_attr_group = {
814 .attrs = ubd_attrs,
815 .is_visible = ubd_attrs_are_visible,
816};
817
818static const struct attribute_group *ubd_attr_groups[] = {
819 &ubd_attr_group,
820 NULL,
821};
822
a43c8316 823#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
1da177e4 824
4e6da0fe
RW
825static const struct blk_mq_ops ubd_mq_ops = {
826 .queue_rq = ubd_queue_rq,
4e6da0fe
RW
827};
828
f28169d2 829static int ubd_add(int n, char **error_out)
1da177e4 830{
7d314e34 831 struct ubd *ubd_dev = &ubd_devs[n];
5d6789ce
CH
832 struct queue_limits lim = {
833 .max_segments = MAX_SG,
834 .seg_boundary_mask = PAGE_SIZE - 1,
1122c0c1 835 .features = BLK_FEAT_WRITE_CACHE,
5d6789ce 836 };
35efb594 837 struct gendisk *disk;
f28169d2 838 int err = 0;
1da177e4 839
7d314e34 840 if(ubd_dev->file == NULL)
ec7cf783 841 goto out;
1da177e4 842
58ebe3e7
CH
843 if (ubd_dev->cow.file)
844 lim.max_hw_sectors = 8 * sizeof(long);
845 if (!ubd_dev->no_trim) {
846 lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
847 lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
848 }
849
7d314e34 850 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
851 if(err < 0){
852 *error_out = "Couldn't determine size of device's file";
80c13749 853 goto out;
f28169d2 854 }
1da177e4 855
fb5d1d38
CH
856 err = ubd_open_dev(ubd_dev);
857 if (err) {
858 pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
859 'a' + n, ubd_dev->file, -err);
860 goto out;
861 }
862
7d314e34 863 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 864
4e6da0fe
RW
865 ubd_dev->tag_set.ops = &ubd_mq_ops;
866 ubd_dev->tag_set.queue_depth = 64;
867 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
4e6da0fe
RW
868 ubd_dev->tag_set.driver_data = ubd_dev;
869 ubd_dev->tag_set.nr_hw_queues = 1;
a0044bdf 870
4e6da0fe
RW
871 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
872 if (err)
fb5d1d38 873 goto out_close;
4e6da0fe 874
5d6789ce 875 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
35efb594
CH
876 if (IS_ERR(disk)) {
877 err = PTR_ERR(disk);
aea05eb5 878 goto out_cleanup_tags;
62f96cb0 879 }
4e6da0fe 880
0267e9ca
CH
881 disk->major = UBD_MAJOR;
882 disk->first_minor = n << UBD_SHIFT;
883 disk->minors = 1 << UBD_SHIFT;
884 disk->fops = &ubd_blops;
885 set_capacity(disk, ubd_dev->size / 512);
886 sprintf(disk->disk_name, "ubd%c", 'a' + n);
887 disk->private_data = ubd_dev;
5e4e1ff8 888 set_disk_ro(disk, !ubd_dev->openflags.w);
0267e9ca
CH
889
890 ubd_dev->pdev.id = n;
891 ubd_dev->pdev.name = DRIVER_NAME;
892 ubd_dev->pdev.dev.release = ubd_device_release;
893 dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
894 platform_device_register(&ubd_dev->pdev);
895
896 err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
66638f16
LC
897 if (err)
898 goto out_cleanup_disk;
899
df700802
TB
900 ubd_dev->disk = disk;
901
35efb594 902 return 0;
62f96cb0 903
66638f16 904out_cleanup_disk:
8b9ab626 905 put_disk(disk);
4e6da0fe
RW
906out_cleanup_tags:
907 blk_mq_free_tag_set(&ubd_dev->tag_set);
fb5d1d38
CH
908out_close:
909 ubd_close_dev(ubd_dev);
35efb594
CH
910out:
911 return err;
1da177e4
LT
912}
913
f28169d2 914static int ubd_config(char *str, char **error_out)
1da177e4 915{
e7f6552f 916 int n, ret;
1da177e4 917
f28169d2
JD
918 /* This string is possibly broken up and stored, so it's only
919 * freed if ubd_setup_common fails, or if only general options
920 * were set.
921 */
970d6e3a 922 str = kstrdup(str, GFP_KERNEL);
e7f6552f 923 if (str == NULL) {
f28169d2
JD
924 *error_out = "Failed to allocate memory";
925 return -ENOMEM;
1da177e4 926 }
f28169d2
JD
927
928 ret = ubd_setup_common(str, &n, error_out);
929 if (ret)
e7f6552f 930 goto err_free;
f28169d2 931
e7f6552f
PBG
932 if (n == -1) {
933 ret = 0;
d8d7c28e 934 goto err_free;
1da177e4 935 }
1da177e4 936
dc764e50 937 mutex_lock(&ubd_lock);
f28169d2 938 ret = ubd_add(n, error_out);
e7f6552f 939 if (ret)
7d314e34 940 ubd_devs[n].file = NULL;
dc764e50 941 mutex_unlock(&ubd_lock);
1da177e4 942
e7f6552f 943out:
dc764e50 944 return ret;
e7f6552f
PBG
945
946err_free:
947 kfree(str);
948 goto out;
1da177e4
LT
949}
950
951static int ubd_get_config(char *name, char *str, int size, char **error_out)
952{
7d314e34 953 struct ubd *ubd_dev;
1da177e4
LT
954 int n, len = 0;
955
956 n = parse_unit(&name);
957 if((n >= MAX_DEV) || (n < 0)){
958 *error_out = "ubd_get_config : device number out of range";
dc764e50 959 return -1;
1da177e4
LT
960 }
961
7d314e34 962 ubd_dev = &ubd_devs[n];
d7fb2c38 963 mutex_lock(&ubd_lock);
1da177e4 964
7d314e34 965 if(ubd_dev->file == NULL){
1da177e4
LT
966 CONFIG_CHUNK(str, size, len, "", 1);
967 goto out;
968 }
969
7d314e34 970 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 971
7d314e34 972 if(ubd_dev->cow.file != NULL){
1da177e4 973 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 974 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
975 }
976 else CONFIG_CHUNK(str, size, len, "", 1);
977
978 out:
d7fb2c38 979 mutex_unlock(&ubd_lock);
dc764e50 980 return len;
1da177e4
LT
981}
982
29d56cfe
JD
983static int ubd_id(char **str, int *start_out, int *end_out)
984{
dc764e50 985 int n;
29d56cfe
JD
986
987 n = parse_unit(str);
dc764e50
JD
988 *start_out = 0;
989 *end_out = MAX_DEV - 1;
990 return n;
29d56cfe
JD
991}
992
f28169d2 993static int ubd_remove(int n, char **error_out)
1da177e4 994{
7d314e34 995 struct ubd *ubd_dev;
29d56cfe 996 int err = -ENODEV;
1da177e4 997
d7fb2c38 998 mutex_lock(&ubd_lock);
1da177e4 999
7d314e34 1000 ubd_dev = &ubd_devs[n];
1da177e4 1001
7d314e34 1002 if(ubd_dev->file == NULL)
29d56cfe 1003 goto out;
1da177e4 1004
32621ad7 1005 if (ubd_dev->disk) {
fb5d1d38
CH
1006 /* you cannot remove a open disk */
1007 err = -EBUSY;
1008 if (disk_openers(ubd_dev->disk))
1009 goto out;
1010
32621ad7 1011 del_gendisk(ubd_dev->disk);
fb5d1d38 1012 ubd_close_dev(ubd_dev);
32621ad7 1013 put_disk(ubd_dev->disk);
1da177e4
LT
1014 }
1015
1da177e4 1016 err = 0;
2e3f5251 1017 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1018out:
d7fb2c38 1019 mutex_unlock(&ubd_lock);
29d56cfe 1020 return err;
1da177e4
LT
1021}
1022
f28169d2 1023/* All these are called by mconsole in process context and without
b8831a1d 1024 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1025 */
1da177e4 1026static struct mc_device ubd_mc = {
84f48d4f 1027 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1028 .name = "ubd",
1029 .config = ubd_config,
dc764e50 1030 .get_config = ubd_get_config,
29d56cfe 1031 .id = ubd_id,
1da177e4
LT
1032 .remove = ubd_remove,
1033};
1034
d8d7c28e 1035static int __init ubd_mc_init(void)
1da177e4
LT
1036{
1037 mconsole_register_dev(&ubd_mc);
1038 return 0;
1039}
1040
1041__initcall(ubd_mc_init);
1042
d8d7c28e
PBG
1043static int __init ubd0_init(void)
1044{
1045 struct ubd *ubd_dev = &ubd_devs[0];
1046
b8831a1d 1047 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1048 if(ubd_dev->file == NULL)
1049 ubd_dev->file = "root_fs";
b8831a1d
JD
1050 mutex_unlock(&ubd_lock);
1051
dc764e50 1052 return 0;
d8d7c28e
PBG
1053}
1054
1055__initcall(ubd0_init);
1056
b8831a1d 1057/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1058static struct platform_driver ubd_driver = {
1059 .driver = {
1060 .name = DRIVER_NAME,
1061 },
1da177e4
LT
1062};
1063
d8d7c28e 1064static int __init ubd_init(void)
1da177e4 1065{
f28169d2
JD
1066 char *error;
1067 int i, err;
1da177e4 1068
792dd4fc 1069 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1070 return -1;
1071
6da2ec56
KC
1072 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1073 sizeof(struct io_thread_req *),
1074 GFP_KERNEL
f88f0bdf
AI
1075 );
1076 irq_remainder = 0;
1077
1078 if (irq_req_buffer == NULL) {
1079 printk(KERN_ERR "Failed to initialize ubd buffering\n");
31a5990e 1080 return -ENOMEM;
f88f0bdf 1081 }
6da2ec56
KC
1082 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1083 sizeof(struct io_thread_req *),
1084 GFP_KERNEL
f88f0bdf
AI
1085 );
1086
1087 io_remainder = 0;
1088
1089 if (io_req_buffer == NULL) {
1090 printk(KERN_ERR "Failed to initialize ubd buffering\n");
31a5990e 1091 return -ENOMEM;
f88f0bdf 1092 }
3ae5eaec 1093 platform_driver_register(&ubd_driver);
dc764e50 1094 mutex_lock(&ubd_lock);
f28169d2
JD
1095 for (i = 0; i < MAX_DEV; i++){
1096 err = ubd_add(i, &error);
1097 if(err)
1098 printk(KERN_ERR "Failed to initialize ubd device %d :"
1099 "%s\n", i, error);
1100 }
dc764e50 1101 mutex_unlock(&ubd_lock);
1da177e4
LT
1102 return 0;
1103}
1104
1105late_initcall(ubd_init);
1106
d7f89a9d
TB
1107static int __init ubd_driver_init(void)
1108{
91acb21f
JD
1109 int err;
1110
1111 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1112 if(global_openflags.s){
1113 printk(KERN_INFO "ubd: Synchronous mode\n");
1114 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1115 * enough. So use anyway the io thread. */
1116 }
d7f89a9d
TB
1117 err = start_io_thread(&io_td, &thread_fd);
1118 if (err < 0) {
6c29256c 1119 printk(KERN_ERR
91acb21f 1120 "ubd : Failed to start I/O thread (errno = %d) - "
d7f89a9d 1121 "falling back to synchronous I/O\n", -err);
dc764e50 1122 return 0;
91acb21f 1123 }
6c29256c 1124 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1125 0, "ubd", ubd_devs);
36d46a59 1126 if(err < 0)
91acb21f 1127 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1128 return 0;
91acb21f
JD
1129}
1130
1131device_initcall(ubd_driver_init);
1132
91acb21f
JD
1133static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1134 __u64 *cow_offset, unsigned long *bitmap,
1135 __u64 bitmap_offset, unsigned long *bitmap_words,
1136 __u64 bitmap_len)
1da177e4 1137{
a43c8316 1138 __u64 sector = io_offset >> SECTOR_SHIFT;
91acb21f
JD
1139 int i, update_bitmap = 0;
1140
a43c8316 1141 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
91acb21f
JD
1142 if(cow_mask != NULL)
1143 ubd_set_bit(i, (unsigned char *) cow_mask);
1144 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1145 continue;
1da177e4 1146
91acb21f
JD
1147 update_bitmap = 1;
1148 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1149 }
1150
1151 if(!update_bitmap)
1152 return;
1da177e4 1153
91acb21f 1154 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1155
91acb21f
JD
1156 /* This takes care of the case where we're exactly at the end of the
1157 * device, and *cow_offset + 1 is off the end. So, just back it up
1158 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1159 * for the original diagnosis.
1160 */
6d074242
JO
1161 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1162 sizeof(unsigned long)) - 1))
91acb21f
JD
1163 (*cow_offset)--;
1164
1165 bitmap_words[0] = bitmap[*cow_offset];
1166 bitmap_words[1] = bitmap[*cow_offset + 1];
1167
1168 *cow_offset *= sizeof(unsigned long);
1169 *cow_offset += bitmap_offset;
1170}
1171
fc6b6a87
GKB
1172static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1173 unsigned long offset, unsigned long *bitmap,
91acb21f
JD
1174 __u64 bitmap_offset, __u64 bitmap_len)
1175{
fc6b6a87 1176 __u64 sector = offset >> SECTOR_SHIFT;
91acb21f
JD
1177 int i;
1178
fc6b6a87 1179 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
91acb21f
JD
1180 panic("Operation too long");
1181
a43c8316 1182 if (req_op(req->req) == REQ_OP_READ) {
fc6b6a87 1183 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
91acb21f 1184 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1185 ubd_set_bit(i, (unsigned char *)
fc6b6a87
GKB
1186 &segment->sector_mask);
1187 }
1188 } else {
1189 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1190 &segment->cow_offset, bitmap, bitmap_offset,
1191 segment->bitmap_words, bitmap_len);
1192 }
1193}
1194
1195static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1196 struct request *req)
1197{
1198 struct bio_vec bvec;
1199 struct req_iterator iter;
1200 int i = 0;
1201 unsigned long byte_offset = io_req->offset;
7ee1de6e 1202 enum req_op op = req_op(req);
fc6b6a87
GKB
1203
1204 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1205 io_req->io_desc[0].buffer = NULL;
1206 io_req->io_desc[0].length = blk_rq_bytes(req);
1207 } else {
1208 rq_for_each_segment(bvec, req, iter) {
1209 BUG_ON(i >= io_req->desc_cnt);
1210
25d84545 1211 io_req->io_desc[i].buffer = bvec_virt(&bvec);
fc6b6a87
GKB
1212 io_req->io_desc[i].length = bvec.bv_len;
1213 i++;
1214 }
1215 }
1216
1217 if (dev->cow.file) {
1218 for (i = 0; i < io_req->desc_cnt; i++) {
1219 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1220 dev->cow.bitmap, dev->cow.bitmap_offset,
1221 dev->cow.bitmap_len);
1222 byte_offset += io_req->io_desc[i].length;
dc764e50 1223 }
fc6b6a87 1224
91acb21f 1225 }
1da177e4
LT
1226}
1227
fc6b6a87
GKB
1228static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1229 int desc_cnt)
1da177e4 1230{
ecb0a83e 1231 struct io_thread_req *io_req;
fc6b6a87 1232 int i;
1da177e4 1233
fc6b6a87
GKB
1234 io_req = kmalloc(sizeof(*io_req) +
1235 (desc_cnt * sizeof(struct io_desc)),
1236 GFP_ATOMIC);
ecb0a83e 1237 if (!io_req)
fc6b6a87 1238 return NULL;
805f11a0
RW
1239
1240 io_req->req = req;
ecb0a83e
CH
1241 if (dev->cow.file)
1242 io_req->fds[0] = dev->cow.fd;
1243 else
1244 io_req->fds[0] = dev->fd;
0033dfd9 1245 io_req->error = 0;
53766def
AI
1246 io_req->sectorsize = SECTOR_SIZE;
1247 io_req->fds[1] = dev->fd;
fc6b6a87 1248 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
53766def
AI
1249 io_req->offsets[0] = 0;
1250 io_req->offsets[1] = dev->cow.data_offset;
1251
fc6b6a87
GKB
1252 for (i = 0 ; i < desc_cnt; i++) {
1253 io_req->io_desc[i].sector_mask = 0;
1254 io_req->io_desc[i].cow_offset = -1;
1255 }
1256
1257 return io_req;
1258}
1259
1260static int ubd_submit_request(struct ubd *dev, struct request *req)
1261{
1262 int segs = 0;
1263 struct io_thread_req *io_req;
1264 int ret;
7ee1de6e 1265 enum req_op op = req_op(req);
fc6b6a87
GKB
1266
1267 if (op == REQ_OP_FLUSH)
1268 segs = 0;
1269 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1270 segs = 1;
1271 else
1272 segs = blk_rq_nr_phys_segments(req);
1273
1274 io_req = ubd_alloc_req(dev, req, segs);
1275 if (!io_req)
1276 return -ENOMEM;
1277
1278 io_req->desc_cnt = segs;
1279 if (segs)
1280 ubd_map_req(dev, io_req, req);
53766def 1281
ecb0a83e
CH
1282 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1283 if (ret != sizeof(io_req)) {
1284 if (ret != -EAGAIN)
1285 pr_err("write to io thread failed: %d\n", -ret);
bc1d72e7 1286 kfree(io_req);
bc1d72e7 1287 }
ecb0a83e 1288 return ret;
bc1d72e7
RW
1289}
1290
4e6da0fe
RW
1291static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1292 const struct blk_mq_queue_data *bd)
1da177e4 1293{
6961cd4d 1294 struct ubd *ubd_dev = hctx->queue->queuedata;
4e6da0fe 1295 struct request *req = bd->rq;
53766def 1296 int ret = 0, res = BLK_STS_OK;
a0044bdf 1297
4e6da0fe
RW
1298 blk_mq_start_request(req);
1299
6961cd4d
JA
1300 spin_lock_irq(&ubd_dev->lock);
1301
53766def 1302 switch (req_op(req)) {
53766def 1303 case REQ_OP_FLUSH:
53766def
AI
1304 case REQ_OP_READ:
1305 case REQ_OP_WRITE:
50109b5a
AI
1306 case REQ_OP_DISCARD:
1307 case REQ_OP_WRITE_ZEROES:
fc6b6a87 1308 ret = ubd_submit_request(ubd_dev, req);
50109b5a 1309 break;
53766def
AI
1310 default:
1311 WARN_ON_ONCE(1);
1312 res = BLK_STS_NOTSUPP;
4e6da0fe 1313 }
53766def 1314
6961cd4d
JA
1315 spin_unlock_irq(&ubd_dev->lock);
1316
d848074b
AI
1317 if (ret < 0) {
1318 if (ret == -ENOMEM)
1319 res = BLK_STS_RESOURCE;
1320 else
1321 res = BLK_STS_DEV_RESOURCE;
1322 }
6961cd4d 1323
53766def 1324 return res;
1da177e4
LT
1325}
1326
a885c8c4
CH
1327static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1328{
7d314e34 1329 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1330
1331 geo->heads = 128;
1332 geo->sectors = 32;
7d314e34 1333 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1334 return 0;
1335}
1336
05bdb996 1337static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1da177e4
LT
1338 unsigned int cmd, unsigned long arg)
1339{
a625c998 1340 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1341 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1342
1343 switch (cmd) {
1da177e4 1344 struct cdrom_volctrl volume;
1da177e4 1345 case HDIO_GET_IDENTITY:
73855e13
BZ
1346 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1347 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1348 ubd_id[ATA_ID_HEADS] = 128;
1349 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1350 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1351 sizeof(ubd_id)))
dc764e50
JD
1352 return -EFAULT;
1353 return 0;
b8831a1d 1354
1da177e4
LT
1355 case CDROMVOLREAD:
1356 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1357 return -EFAULT;
1da177e4
LT
1358 volume.channel0 = 255;
1359 volume.channel1 = 255;
1360 volume.channel2 = 255;
1361 volume.channel3 = 255;
1362 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1363 return -EFAULT;
1364 return 0;
1da177e4 1365 }
dc764e50 1366 return -EINVAL;
1da177e4
LT
1367}
1368
a43c8316
AI
1369static int map_error(int error_code)
1370{
1371 switch (error_code) {
1372 case 0:
1373 return BLK_STS_OK;
1374 case ENOSYS:
1375 case EOPNOTSUPP:
1376 return BLK_STS_NOTSUPP;
1377 case ENOSPC:
1378 return BLK_STS_NOSPC;
1379 }
1380 return BLK_STS_IOERR;
1381}
1382
a41421ed
AI
1383/*
1384 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1385 *
1386 * The following functions are part of UML hypervisor code.
1387 * All functions from here onwards are executed as a helper
1388 * thread and are not allowed to execute any kernel functions.
1389 *
1390 * Any communication must occur strictly via shared memory and IPC.
1391 *
1392 * Do not add printks, locks, kernel memory operations, etc - it
1393 * will result in unpredictable behaviour and/or crashes.
1394 */
1395
fc6b6a87 1396static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1da177e4 1397{
91acb21f 1398 int n;
1da177e4 1399
fc6b6a87 1400 if (segment->cow_offset == -1)
a43c8316 1401 return map_error(0);
1da177e4 1402
fc6b6a87
GKB
1403 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1404 sizeof(segment->bitmap_words), segment->cow_offset);
1405 if (n != sizeof(segment->bitmap_words))
a43c8316 1406 return map_error(-n);
1da177e4 1407
a43c8316 1408 return map_error(0);
91acb21f 1409}
1da177e4 1410
fc6b6a87 1411static void do_io(struct io_thread_req *req, struct io_desc *desc)
91acb21f 1412{
50109b5a 1413 char *buf = NULL;
91acb21f
JD
1414 unsigned long len;
1415 int n, nsectors, start, end, bit;
91acb21f
JD
1416 __u64 off;
1417
50109b5a
AI
1418 /* FLUSH is really a special case, we cannot "case" it with others */
1419
a43c8316 1420 if (req_op(req->req) == REQ_OP_FLUSH) {
805f11a0 1421 /* fds[0] is always either the rw image or our cow file */
a41421ed 1422 req->error = map_error(-os_sync_file(req->fds[0]));
805f11a0
RW
1423 return;
1424 }
1425
fc6b6a87 1426 nsectors = desc->length / req->sectorsize;
91acb21f
JD
1427 start = 0;
1428 do {
fc6b6a87 1429 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
91acb21f
JD
1430 end = start;
1431 while((end < nsectors) &&
fc6b6a87 1432 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
91acb21f
JD
1433 end++;
1434
1435 off = req->offset + req->offsets[bit] +
1436 start * req->sectorsize;
1437 len = (end - start) * req->sectorsize;
fc6b6a87
GKB
1438 if (desc->buffer != NULL)
1439 buf = &desc->buffer[start * req->sectorsize];
91acb21f 1440
50109b5a
AI
1441 switch (req_op(req->req)) {
1442 case REQ_OP_READ:
91acb21f
JD
1443 n = 0;
1444 do {
1445 buf = &buf[n];
1446 len -= n;
8c6157b6 1447 n = os_pread_file(req->fds[bit], buf, len, off);
50109b5a 1448 if (n < 0) {
a43c8316 1449 req->error = map_error(-n);
91acb21f
JD
1450 return;
1451 }
1452 } while((n < len) && (n != 0));
1453 if (n < len) memset(&buf[n], 0, len - n);
50109b5a
AI
1454 break;
1455 case REQ_OP_WRITE:
8c6157b6 1456 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f 1457 if(n != len){
a43c8316 1458 req->error = map_error(-n);
91acb21f
JD
1459 return;
1460 }
50109b5a
AI
1461 break;
1462 case REQ_OP_DISCARD:
50109b5a
AI
1463 n = os_falloc_punch(req->fds[bit], off, len);
1464 if (n) {
1465 req->error = map_error(-n);
1466 return;
1467 }
1468 break;
d2a0a616
FD
1469 case REQ_OP_WRITE_ZEROES:
1470 n = os_falloc_zeroes(req->fds[bit], off, len);
1471 if (n) {
1472 req->error = map_error(-n);
1473 return;
1474 }
1475 break;
50109b5a
AI
1476 default:
1477 WARN_ON_ONCE(1);
1478 req->error = BLK_STS_NOTSUPP;
1479 return;
91acb21f
JD
1480 }
1481
1482 start = end;
1483 } while(start < nsectors);
1da177e4 1484
fc6b6a87
GKB
1485 req->offset += len;
1486 req->error = update_bitmap(req, desc);
1da177e4 1487}
91acb21f
JD
1488
1489/* Changed in start_io_thread, which is serialized by being called only
1490 * from ubd_init, which is an initcall.
1491 */
1492int kernel_fd = -1;
1493
d8d7c28e 1494/* Only changed by the io thread. XXX: currently unused. */
4dc5a328 1495static int io_count;
91acb21f 1496
d7f89a9d 1497void *io_thread(void *arg)
91acb21f 1498{
f88f0bdf 1499 int n, count, written, res;
91acb21f 1500
d7f89a9d 1501 os_fix_helper_thread_signals();
91d44ff8 1502
91acb21f 1503 while(1){
f88f0bdf
AI
1504 n = bulk_req_safe_read(
1505 kernel_fd,
1506 io_req_buffer,
1507 &io_remainder,
1508 &io_remainder_size,
1509 UBD_REQ_BUFFER_SIZE
1510 );
e355b2f5
GKB
1511 if (n <= 0) {
1512 if (n == -EAGAIN)
f88f0bdf 1513 ubd_read_poll(-1);
e355b2f5
GKB
1514
1515 continue;
91acb21f 1516 }
f88f0bdf
AI
1517
1518 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
fc6b6a87
GKB
1519 struct io_thread_req *req = (*io_req_buffer)[count];
1520 int i;
1521
f88f0bdf 1522 io_count++;
fc6b6a87
GKB
1523 for (i = 0; !req->error && i < req->desc_cnt; i++)
1524 do_io(req, &(req->io_desc[i]));
1525
f88f0bdf
AI
1526 }
1527
1528 written = 0;
1529
1530 do {
6e682d53
GKB
1531 res = os_write_file(kernel_fd,
1532 ((char *) io_req_buffer) + written,
1533 n - written);
ff6a1798 1534 if (res >= 0) {
f88f0bdf 1535 written += res;
f88f0bdf
AI
1536 }
1537 if (written < n) {
1538 ubd_write_poll(-1);
1539 }
1540 } while (written < n);
91acb21f 1541 }
91acb21f 1542
d7f89a9d 1543 return NULL;
1b57e9c2 1544}