Merge tag 'powerpc-6.10-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[linux-2.6-block.git] / drivers / vfio / pci / vfio_pci_rdwr.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19
20 #include "vfio_pci_priv.h"
21
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64   ioread64
24 #define vfio_iowrite64  iowrite64
25 #define vfio_ioread32   ioread32
26 #define vfio_iowrite32  iowrite32
27 #define vfio_ioread16   ioread16
28 #define vfio_iowrite16  iowrite16
29 #else
30 #define vfio_ioread64   ioread64be
31 #define vfio_iowrite64  iowrite64be
32 #define vfio_ioread32   ioread32be
33 #define vfio_iowrite32  iowrite32be
34 #define vfio_ioread16   ioread16be
35 #define vfio_iowrite16  iowrite16be
36 #endif
37 #define vfio_ioread8    ioread8
38 #define vfio_iowrite8   iowrite8
39
40 #define VFIO_IOWRITE(size) \
41 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,      \
42                         bool test_mem, u##size val, void __iomem *io)   \
43 {                                                                       \
44         if (test_mem) {                                                 \
45                 down_read(&vdev->memory_lock);                          \
46                 if (!__vfio_pci_memory_enabled(vdev)) {                 \
47                         up_read(&vdev->memory_lock);                    \
48                         return -EIO;                                    \
49                 }                                                       \
50         }                                                               \
51                                                                         \
52         vfio_iowrite##size(val, io);                                    \
53                                                                         \
54         if (test_mem)                                                   \
55                 up_read(&vdev->memory_lock);                            \
56                                                                         \
57         return 0;                                                       \
58 }                                                                       \
59 EXPORT_SYMBOL_GPL(vfio_pci_core_iowrite##size);
60
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67
68 #define VFIO_IOREAD(size) \
69 int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev,       \
70                         bool test_mem, u##size *val, void __iomem *io)  \
71 {                                                                       \
72         if (test_mem) {                                                 \
73                 down_read(&vdev->memory_lock);                          \
74                 if (!__vfio_pci_memory_enabled(vdev)) {                 \
75                         up_read(&vdev->memory_lock);                    \
76                         return -EIO;                                    \
77                 }                                                       \
78         }                                                               \
79                                                                         \
80         *val = vfio_ioread##size(io);                                   \
81                                                                         \
82         if (test_mem)                                                   \
83                 up_read(&vdev->memory_lock);                            \
84                                                                         \
85         return 0;                                                       \
86 }                                                                       \
87 EXPORT_SYMBOL_GPL(vfio_pci_core_ioread##size);
88
89 VFIO_IOREAD(8)
90 VFIO_IOREAD(16)
91 VFIO_IOREAD(32)
92
93 /*
94  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
95  * range which is inaccessible.  The excluded range drops writes and fills
96  * reads with -1.  This is intended for handling MSI-X vector tables and
97  * leftover space for ROM BARs.
98  */
99 ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
100                                void __iomem *io, char __user *buf,
101                                loff_t off, size_t count, size_t x_start,
102                                size_t x_end, bool iswrite)
103 {
104         ssize_t done = 0;
105         int ret;
106
107         while (count) {
108                 size_t fillable, filled;
109
110                 if (off < x_start)
111                         fillable = min(count, (size_t)(x_start - off));
112                 else if (off >= x_end)
113                         fillable = count;
114                 else
115                         fillable = 0;
116
117                 if (fillable >= 4 && !(off % 4)) {
118                         u32 val;
119
120                         if (iswrite) {
121                                 if (copy_from_user(&val, buf, 4))
122                                         return -EFAULT;
123
124                                 ret = vfio_pci_core_iowrite32(vdev, test_mem,
125                                                               val, io + off);
126                                 if (ret)
127                                         return ret;
128                         } else {
129                                 ret = vfio_pci_core_ioread32(vdev, test_mem,
130                                                              &val, io + off);
131                                 if (ret)
132                                         return ret;
133
134                                 if (copy_to_user(buf, &val, 4))
135                                         return -EFAULT;
136                         }
137
138                         filled = 4;
139                 } else if (fillable >= 2 && !(off % 2)) {
140                         u16 val;
141
142                         if (iswrite) {
143                                 if (copy_from_user(&val, buf, 2))
144                                         return -EFAULT;
145
146                                 ret = vfio_pci_core_iowrite16(vdev, test_mem,
147                                                               val, io + off);
148                                 if (ret)
149                                         return ret;
150                         } else {
151                                 ret = vfio_pci_core_ioread16(vdev, test_mem,
152                                                              &val, io + off);
153                                 if (ret)
154                                         return ret;
155
156                                 if (copy_to_user(buf, &val, 2))
157                                         return -EFAULT;
158                         }
159
160                         filled = 2;
161                 } else if (fillable) {
162                         u8 val;
163
164                         if (iswrite) {
165                                 if (copy_from_user(&val, buf, 1))
166                                         return -EFAULT;
167
168                                 ret = vfio_pci_core_iowrite8(vdev, test_mem,
169                                                              val, io + off);
170                                 if (ret)
171                                         return ret;
172                         } else {
173                                 ret = vfio_pci_core_ioread8(vdev, test_mem,
174                                                             &val, io + off);
175                                 if (ret)
176                                         return ret;
177
178                                 if (copy_to_user(buf, &val, 1))
179                                         return -EFAULT;
180                         }
181
182                         filled = 1;
183                 } else {
184                         /* Fill reads with -1, drop writes */
185                         filled = min(count, (size_t)(x_end - off));
186                         if (!iswrite) {
187                                 u8 val = 0xFF;
188                                 size_t i;
189
190                                 for (i = 0; i < filled; i++)
191                                         if (copy_to_user(buf + i, &val, 1))
192                                                 return -EFAULT;
193                         }
194                 }
195
196                 count -= filled;
197                 done += filled;
198                 off += filled;
199                 buf += filled;
200         }
201
202         return done;
203 }
204 EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
205
206 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
207 {
208         struct pci_dev *pdev = vdev->pdev;
209         int ret;
210         void __iomem *io;
211
212         if (vdev->barmap[bar])
213                 return 0;
214
215         ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
216         if (ret)
217                 return ret;
218
219         io = pci_iomap(pdev, bar, 0);
220         if (!io) {
221                 pci_release_selected_regions(pdev, 1 << bar);
222                 return -ENOMEM;
223         }
224
225         vdev->barmap[bar] = io;
226
227         return 0;
228 }
229 EXPORT_SYMBOL_GPL(vfio_pci_core_setup_barmap);
230
231 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
232                         size_t count, loff_t *ppos, bool iswrite)
233 {
234         struct pci_dev *pdev = vdev->pdev;
235         loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
236         int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
237         size_t x_start = 0, x_end = 0;
238         resource_size_t end;
239         void __iomem *io;
240         struct resource *res = &vdev->pdev->resource[bar];
241         ssize_t done;
242
243         if (pci_resource_start(pdev, bar))
244                 end = pci_resource_len(pdev, bar);
245         else if (bar == PCI_ROM_RESOURCE &&
246                  pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
247                 end = 0x20000;
248         else
249                 return -EINVAL;
250
251         if (pos >= end)
252                 return -EINVAL;
253
254         count = min(count, (size_t)(end - pos));
255
256         if (bar == PCI_ROM_RESOURCE) {
257                 /*
258                  * The ROM can fill less space than the BAR, so we start the
259                  * excluded range at the end of the actual ROM.  This makes
260                  * filling large ROM BARs much faster.
261                  */
262                 io = pci_map_rom(pdev, &x_start);
263                 if (!io) {
264                         done = -ENOMEM;
265                         goto out;
266                 }
267                 x_end = end;
268         } else {
269                 int ret = vfio_pci_core_setup_barmap(vdev, bar);
270                 if (ret) {
271                         done = ret;
272                         goto out;
273                 }
274
275                 io = vdev->barmap[bar];
276         }
277
278         if (bar == vdev->msix_bar) {
279                 x_start = vdev->msix_offset;
280                 x_end = vdev->msix_offset + vdev->msix_size;
281         }
282
283         done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
284                                       count, x_start, x_end, iswrite);
285
286         if (done >= 0)
287                 *ppos += done;
288
289         if (bar == PCI_ROM_RESOURCE)
290                 pci_unmap_rom(pdev, io);
291 out:
292         return done;
293 }
294
295 #ifdef CONFIG_VFIO_PCI_VGA
296 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
297                                size_t count, loff_t *ppos, bool iswrite)
298 {
299         int ret;
300         loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
301         void __iomem *iomem = NULL;
302         unsigned int rsrc;
303         bool is_ioport;
304         ssize_t done;
305
306         if (!vdev->has_vga)
307                 return -EINVAL;
308
309         if (pos > 0xbfffful)
310                 return -EINVAL;
311
312         switch ((u32)pos) {
313         case 0xa0000 ... 0xbffff:
314                 count = min(count, (size_t)(0xc0000 - pos));
315                 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
316                 off = pos - 0xa0000;
317                 rsrc = VGA_RSRC_LEGACY_MEM;
318                 is_ioport = false;
319                 break;
320         case 0x3b0 ... 0x3bb:
321                 count = min(count, (size_t)(0x3bc - pos));
322                 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
323                 off = pos - 0x3b0;
324                 rsrc = VGA_RSRC_LEGACY_IO;
325                 is_ioport = true;
326                 break;
327         case 0x3c0 ... 0x3df:
328                 count = min(count, (size_t)(0x3e0 - pos));
329                 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
330                 off = pos - 0x3c0;
331                 rsrc = VGA_RSRC_LEGACY_IO;
332                 is_ioport = true;
333                 break;
334         default:
335                 return -EINVAL;
336         }
337
338         if (!iomem)
339                 return -ENOMEM;
340
341         ret = vga_get_interruptible(vdev->pdev, rsrc);
342         if (ret) {
343                 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
344                 return ret;
345         }
346
347         /*
348          * VGA MMIO is a legacy, non-BAR resource that hopefully allows
349          * probing, so we don't currently worry about access in relation
350          * to the memory enable bit in the command register.
351          */
352         done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
353                                       0, 0, iswrite);
354
355         vga_put(vdev->pdev, rsrc);
356
357         is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
358
359         if (done >= 0)
360                 *ppos += done;
361
362         return done;
363 }
364 #endif
365
366 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
367                                         bool test_mem)
368 {
369         switch (ioeventfd->count) {
370         case 1:
371                 vfio_pci_core_iowrite8(ioeventfd->vdev, test_mem,
372                                        ioeventfd->data, ioeventfd->addr);
373                 break;
374         case 2:
375                 vfio_pci_core_iowrite16(ioeventfd->vdev, test_mem,
376                                         ioeventfd->data, ioeventfd->addr);
377                 break;
378         case 4:
379                 vfio_pci_core_iowrite32(ioeventfd->vdev, test_mem,
380                                         ioeventfd->data, ioeventfd->addr);
381                 break;
382 #ifdef iowrite64
383         case 8:
384                 vfio_pci_core_iowrite64(ioeventfd->vdev, test_mem,
385                                         ioeventfd->data, ioeventfd->addr);
386                 break;
387 #endif
388         }
389 }
390
391 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
392 {
393         struct vfio_pci_ioeventfd *ioeventfd = opaque;
394         struct vfio_pci_core_device *vdev = ioeventfd->vdev;
395
396         if (ioeventfd->test_mem) {
397                 if (!down_read_trylock(&vdev->memory_lock))
398                         return 1; /* Lock contended, use thread */
399                 if (!__vfio_pci_memory_enabled(vdev)) {
400                         up_read(&vdev->memory_lock);
401                         return 0;
402                 }
403         }
404
405         vfio_pci_ioeventfd_do_write(ioeventfd, false);
406
407         if (ioeventfd->test_mem)
408                 up_read(&vdev->memory_lock);
409
410         return 0;
411 }
412
413 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
414 {
415         struct vfio_pci_ioeventfd *ioeventfd = opaque;
416
417         vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
418 }
419
420 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
421                        uint64_t data, int count, int fd)
422 {
423         struct pci_dev *pdev = vdev->pdev;
424         loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
425         int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
426         struct vfio_pci_ioeventfd *ioeventfd;
427
428         /* Only support ioeventfds into BARs */
429         if (bar > VFIO_PCI_BAR5_REGION_INDEX)
430                 return -EINVAL;
431
432         if (pos + count > pci_resource_len(pdev, bar))
433                 return -EINVAL;
434
435         /* Disallow ioeventfds working around MSI-X table writes */
436         if (bar == vdev->msix_bar &&
437             !(pos + count <= vdev->msix_offset ||
438               pos >= vdev->msix_offset + vdev->msix_size))
439                 return -EINVAL;
440
441 #ifndef iowrite64
442         if (count == 8)
443                 return -EINVAL;
444 #endif
445
446         ret = vfio_pci_core_setup_barmap(vdev, bar);
447         if (ret)
448                 return ret;
449
450         mutex_lock(&vdev->ioeventfds_lock);
451
452         list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
453                 if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
454                     ioeventfd->data == data && ioeventfd->count == count) {
455                         if (fd == -1) {
456                                 vfio_virqfd_disable(&ioeventfd->virqfd);
457                                 list_del(&ioeventfd->next);
458                                 vdev->ioeventfds_nr--;
459                                 kfree(ioeventfd);
460                                 ret = 0;
461                         } else
462                                 ret = -EEXIST;
463
464                         goto out_unlock;
465                 }
466         }
467
468         if (fd < 0) {
469                 ret = -ENODEV;
470                 goto out_unlock;
471         }
472
473         if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
474                 ret = -ENOSPC;
475                 goto out_unlock;
476         }
477
478         ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
479         if (!ioeventfd) {
480                 ret = -ENOMEM;
481                 goto out_unlock;
482         }
483
484         ioeventfd->vdev = vdev;
485         ioeventfd->addr = vdev->barmap[bar] + pos;
486         ioeventfd->data = data;
487         ioeventfd->pos = pos;
488         ioeventfd->bar = bar;
489         ioeventfd->count = count;
490         ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
491
492         ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
493                                  vfio_pci_ioeventfd_thread, NULL,
494                                  &ioeventfd->virqfd, fd);
495         if (ret) {
496                 kfree(ioeventfd);
497                 goto out_unlock;
498         }
499
500         list_add(&ioeventfd->next, &vdev->ioeventfds_list);
501         vdev->ioeventfds_nr++;
502
503 out_unlock:
504         mutex_unlock(&vdev->ioeventfds_lock);
505
506         return ret;
507 }