[PATCH] x86_64: Reject SRAT tables that don't cover all memory
[linux-2.6-block.git] / arch / x86_64 / kernel / pci-gart.c
CommitLineData
1da177e4
LT
1/*
2 * Dynamic DMA mapping support for AMD Hammer.
3 *
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB.
7 *
8 * See Documentation/DMA-mapping.txt for the interface specification.
9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 */
12
13#include <linux/config.h>
14#include <linux/types.h>
15#include <linux/ctype.h>
16#include <linux/agp_backend.h>
17#include <linux/init.h>
18#include <linux/mm.h>
19#include <linux/string.h>
20#include <linux/spinlock.h>
21#include <linux/pci.h>
22#include <linux/module.h>
23#include <linux/topology.h>
24#include <linux/interrupt.h>
25#include <linux/bitops.h>
26#include <asm/atomic.h>
27#include <asm/io.h>
28#include <asm/mtrr.h>
29#include <asm/pgtable.h>
30#include <asm/proto.h>
31#include <asm/cacheflush.h>
32#include <asm/kdebug.h>
33
34dma_addr_t bad_dma_address;
35
36unsigned long iommu_bus_base; /* GART remapping area (physical) */
37static unsigned long iommu_size; /* size of remapping area bytes */
38static unsigned long iommu_pages; /* .. and in pages */
39
40u32 *iommu_gatt_base; /* Remapping table */
41
42int no_iommu;
43static int no_agp;
44#ifdef CONFIG_IOMMU_DEBUG
45int panic_on_overflow = 1;
46int force_iommu = 1;
47#else
48int panic_on_overflow = 0;
49int force_iommu = 0;
50#endif
51int iommu_merge = 1;
52int iommu_sac_force = 0;
53
54/* If this is disabled the IOMMU will use an optimized flushing strategy
55 of only flushing when an mapping is reused. With it true the GART is flushed
56 for every mapping. Problem is that doing the lazy flush seems to trigger
57 bugs with some popular PCI cards, in particular 3ware (but has been also
58 also seen with Qlogic at least). */
59int iommu_fullflush = 1;
60
61/* This tells the BIO block layer to assume merging. Default to off
62 because we cannot guarantee merging later. */
63int iommu_bio_merge = 0;
64
65#define MAX_NB 8
66
67/* Allocation bitmap for the remapping area */
68static DEFINE_SPINLOCK(iommu_bitmap_lock);
69static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
70
71static u32 gart_unmapped_entry;
72
73#define GPTE_VALID 1
74#define GPTE_COHERENT 2
75#define GPTE_ENCODE(x) \
76 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
77#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
78
79#define to_pages(addr,size) \
80 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
81
82#define for_all_nb(dev) \
83 dev = NULL; \
84 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
85 if (dev->bus->number == 0 && \
86 (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
87
88static struct pci_dev *northbridges[MAX_NB];
89static u32 northbridge_flush_word[MAX_NB];
90
91#define EMERGENCY_PAGES 32 /* = 128KB */
92
93#ifdef CONFIG_AGP
94#define AGPEXTERN extern
95#else
96#define AGPEXTERN
97#endif
98
99/* backdoor interface to AGP driver */
100AGPEXTERN int agp_memory_reserved;
101AGPEXTERN __u32 *agp_gatt_table;
102
103static unsigned long next_bit; /* protected by iommu_bitmap_lock */
104static int need_flush; /* global flush state. set for each gart wrap */
105static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
106 size_t size, int dir, int do_panic);
107
108/* Dummy device used for NULL arguments (normally ISA). Better would
109 be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */
110static struct device fallback_dev = {
111 .bus_id = "fallback device",
112 .coherent_dma_mask = 0xffffffff,
113 .dma_mask = &fallback_dev.coherent_dma_mask,
114};
115
116static unsigned long alloc_iommu(int size)
117{
118 unsigned long offset, flags;
119
120 spin_lock_irqsave(&iommu_bitmap_lock, flags);
121 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
122 if (offset == -1) {
123 need_flush = 1;
124 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
125 }
126 if (offset != -1) {
127 set_bit_string(iommu_gart_bitmap, offset, size);
128 next_bit = offset+size;
129 if (next_bit >= iommu_pages) {
130 next_bit = 0;
131 need_flush = 1;
132 }
133 }
134 if (iommu_fullflush)
135 need_flush = 1;
136 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
137 return offset;
138}
139
140static void free_iommu(unsigned long offset, int size)
141{
142 unsigned long flags;
143 if (size == 1) {
144 clear_bit(offset, iommu_gart_bitmap);
145 return;
146 }
147 spin_lock_irqsave(&iommu_bitmap_lock, flags);
148 __clear_bit_string(iommu_gart_bitmap, offset, size);
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
150}
151
152/*
153 * Use global flush state to avoid races with multiple flushers.
154 */
155static void flush_gart(struct device *dev)
156{
157 unsigned long flags;
158 int flushed = 0;
159 int i, max;
160
161 spin_lock_irqsave(&iommu_bitmap_lock, flags);
162 if (need_flush) {
163 max = 0;
164 for (i = 0; i < MAX_NB; i++) {
165 if (!northbridges[i])
166 continue;
167 pci_write_config_dword(northbridges[i], 0x9c,
168 northbridge_flush_word[i] | 1);
169 flushed++;
170 max = i;
171 }
172 for (i = 0; i <= max; i++) {
173 u32 w;
174 if (!northbridges[i])
175 continue;
176 /* Make sure the hardware actually executed the flush. */
177 do {
178 pci_read_config_dword(northbridges[i], 0x9c, &w);
179 } while (w & 1);
180 }
181 if (!flushed)
182 printk("nothing to flush?\n");
183 need_flush = 0;
184 }
185 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
186}
187
188/* Allocate DMA memory on node near device */
189noinline
f80aabb0 190static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
1da177e4
LT
191{
192 struct page *page;
193 int node;
117090b5
AK
194 if (dev->bus == &pci_bus_type)
195 node = pcibus_to_node(to_pci_dev(dev)->bus);
196 else
1da177e4
LT
197 node = numa_node_id();
198 page = alloc_pages_node(node, gfp, order);
199 return page ? page_address(page) : NULL;
200}
201
202/*
203 * Allocate memory for a coherent mapping.
204 */
205void *
206dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
f80aabb0 207 gfp_t gfp)
1da177e4
LT
208{
209 void *memory;
210 unsigned long dma_mask = 0;
211 u64 bus;
212
213 if (!dev)
214 dev = &fallback_dev;
215 dma_mask = dev->coherent_dma_mask;
216 if (dma_mask == 0)
217 dma_mask = 0xffffffff;
218
219 /* Kludge to make it bug-to-bug compatible with i386. i386
220 uses the normal dma_mask for alloc_coherent. */
221 dma_mask &= *dev->dma_mask;
222
47492d36
AK
223 /* Why <=? Even when the mask is smaller than 4GB it is often larger
224 than 16MB and in this case we have a chance of finding fitting memory
225 in the next higher zone first. If not retry with true GFP_DMA. -AK */
226 if (dma_mask <= 0xffffffff)
227 gfp |= GFP_DMA32;
228
1da177e4
LT
229 again:
230 memory = dma_alloc_pages(dev, gfp, get_order(size));
231 if (memory == NULL)
232 return NULL;
233
234 {
235 int high, mmu;
236 bus = virt_to_bus(memory);
237 high = (bus + size) >= dma_mask;
238 mmu = high;
239 if (force_iommu && !(gfp & GFP_DMA))
240 mmu = 1;
241 if (no_iommu || dma_mask < 0xffffffffUL) {
242 if (high) {
243 free_pages((unsigned long)memory,
244 get_order(size));
245
246 if (swiotlb) {
247 return
248 swiotlb_alloc_coherent(dev, size,
249 dma_handle,
250 gfp);
251 }
252
253 if (!(gfp & GFP_DMA)) {
47492d36 254 gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
1da177e4
LT
255 goto again;
256 }
257 return NULL;
258 }
259 mmu = 0;
260 }
261 memset(memory, 0, size);
262 if (!mmu) {
263 *dma_handle = virt_to_bus(memory);
264 return memory;
265 }
266 }
267
268 *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0);
269 if (*dma_handle == bad_dma_address)
270 goto error;
271 flush_gart(dev);
272 return memory;
273
274error:
275 if (panic_on_overflow)
276 panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size);
277 free_pages((unsigned long)memory, get_order(size));
278 return NULL;
279}
280
281/*
282 * Unmap coherent memory.
283 * The caller must ensure that the device has finished accessing the mapping.
284 */
285void dma_free_coherent(struct device *dev, size_t size,
286 void *vaddr, dma_addr_t bus)
287{
288 if (swiotlb) {
289 swiotlb_free_coherent(dev, size, vaddr, bus);
290 return;
291 }
292
293 dma_unmap_single(dev, bus, size, 0);
294 free_pages((unsigned long)vaddr, get_order(size));
295}
296
297#ifdef CONFIG_IOMMU_LEAK
298
299#define SET_LEAK(x) if (iommu_leak_tab) \
300 iommu_leak_tab[x] = __builtin_return_address(0);
301#define CLEAR_LEAK(x) if (iommu_leak_tab) \
302 iommu_leak_tab[x] = NULL;
303
304/* Debugging aid for drivers that don't free their IOMMU tables */
305static void **iommu_leak_tab;
306static int leak_trace;
307int iommu_leak_pages = 20;
308void dump_leak(void)
309{
310 int i;
311 static int dump;
312 if (dump || !iommu_leak_tab) return;
313 dump = 1;
314 show_stack(NULL,NULL);
315 /* Very crude. dump some from the end of the table too */
316 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
317 for (i = 0; i < iommu_leak_pages; i+=2) {
318 printk("%lu: ", iommu_pages-i);
319 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
320 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
321 }
322 printk("\n");
323}
324#else
325#define SET_LEAK(x)
326#define CLEAR_LEAK(x)
327#endif
328
329static void iommu_full(struct device *dev, size_t size, int dir, int do_panic)
330{
331 /*
332 * Ran out of IOMMU space for this operation. This is very bad.
333 * Unfortunately the drivers cannot handle this operation properly.
334 * Return some non mapped prereserved space in the aperture and
335 * let the Northbridge deal with it. This will result in garbage
336 * in the IO operation. When the size exceeds the prereserved space
337 * memory corruption will occur or random memory will be DMAed
338 * out. Hopefully no network devices use single mappings that big.
339 */
340
341 printk(KERN_ERR
342 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
343 size, dev->bus_id);
344
345 if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) {
346 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
347 panic("PCI-DMA: Memory would be corrupted\n");
348 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
349 panic("PCI-DMA: Random memory would be DMAed\n");
350 }
351
352#ifdef CONFIG_IOMMU_LEAK
353 dump_leak();
354#endif
355}
356
357static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
358{
359 u64 mask = *dev->dma_mask;
360 int high = addr + size >= mask;
361 int mmu = high;
362 if (force_iommu)
363 mmu = 1;
364 if (no_iommu) {
365 if (high)
366 panic("PCI-DMA: high address but no IOMMU.\n");
367 mmu = 0;
368 }
369 return mmu;
370}
371
372static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
373{
374 u64 mask = *dev->dma_mask;
375 int high = addr + size >= mask;
376 int mmu = high;
377 if (no_iommu) {
378 if (high)
379 panic("PCI-DMA: high address but no IOMMU.\n");
380 mmu = 0;
381 }
382 return mmu;
383}
384
385/* Map a single continuous physical area into the IOMMU.
386 * Caller needs to check if the iommu is needed and flush.
387 */
388static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
389 size_t size, int dir, int do_panic)
390{
391 unsigned long npages = to_pages(phys_mem, size);
392 unsigned long iommu_page = alloc_iommu(npages);
393 int i;
394 if (iommu_page == -1) {
395 if (!nonforced_iommu(dev, phys_mem, size))
396 return phys_mem;
397 if (panic_on_overflow)
398 panic("dma_map_area overflow %lu bytes\n", size);
399 iommu_full(dev, size, dir, do_panic);
400 return bad_dma_address;
401 }
402
403 for (i = 0; i < npages; i++) {
404 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
405 SET_LEAK(iommu_page + i);
406 phys_mem += PAGE_SIZE;
407 }
408 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
409}
410
411/* Map a single area into the IOMMU */
412dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir)
413{
414 unsigned long phys_mem, bus;
415
416 BUG_ON(dir == DMA_NONE);
417
418 if (swiotlb)
419 return swiotlb_map_single(dev,addr,size,dir);
420 if (!dev)
421 dev = &fallback_dev;
422
423 phys_mem = virt_to_phys(addr);
424 if (!need_iommu(dev, phys_mem, size))
425 return phys_mem;
426
427 bus = dma_map_area(dev, phys_mem, size, dir, 1);
428 flush_gart(dev);
429 return bus;
430}
431
432/* Fallback for dma_map_sg in case of overflow */
433static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
434 int nents, int dir)
435{
436 int i;
437
438#ifdef CONFIG_IOMMU_DEBUG
439 printk(KERN_DEBUG "dma_map_sg overflow\n");
440#endif
441
442 for (i = 0; i < nents; i++ ) {
443 struct scatterlist *s = &sg[i];
444 unsigned long addr = page_to_phys(s->page) + s->offset;
445 if (nonforced_iommu(dev, addr, s->length)) {
446 addr = dma_map_area(dev, addr, s->length, dir, 0);
447 if (addr == bad_dma_address) {
448 if (i > 0)
449 dma_unmap_sg(dev, sg, i, dir);
450 nents = 0;
451 sg[0].dma_length = 0;
452 break;
453 }
454 }
455 s->dma_address = addr;
456 s->dma_length = s->length;
457 }
458 flush_gart(dev);
459 return nents;
460}
461
462/* Map multiple scatterlist entries continuous into the first. */
463static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
464 struct scatterlist *sout, unsigned long pages)
465{
466 unsigned long iommu_start = alloc_iommu(pages);
467 unsigned long iommu_page = iommu_start;
468 int i;
469
470 if (iommu_start == -1)
471 return -1;
472
473 for (i = start; i < stopat; i++) {
474 struct scatterlist *s = &sg[i];
475 unsigned long pages, addr;
476 unsigned long phys_addr = s->dma_address;
477
478 BUG_ON(i > start && s->offset);
479 if (i == start) {
480 *sout = *s;
481 sout->dma_address = iommu_bus_base;
482 sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
483 sout->dma_length = s->length;
484 } else {
485 sout->dma_length += s->length;
486 }
487
488 addr = phys_addr;
489 pages = to_pages(s->offset, s->length);
490 while (pages--) {
491 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
492 SET_LEAK(iommu_page);
493 addr += PAGE_SIZE;
494 iommu_page++;
495 }
496 }
497 BUG_ON(iommu_page - iommu_start != pages);
498 return 0;
499}
500
501static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
502 struct scatterlist *sout,
503 unsigned long pages, int need)
504{
505 if (!need) {
506 BUG_ON(stopat - start != 1);
507 *sout = sg[start];
508 sout->dma_length = sg[start].length;
509 return 0;
510 }
511 return __dma_map_cont(sg, start, stopat, sout, pages);
512}
513
514/*
515 * DMA map all entries in a scatterlist.
516 * Merge chunks that have page aligned sizes into a continuous mapping.
517 */
518int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
519{
520 int i;
521 int out;
522 int start;
523 unsigned long pages = 0;
524 int need = 0, nextneed;
525
526 BUG_ON(dir == DMA_NONE);
527 if (nents == 0)
528 return 0;
529
530 if (swiotlb)
531 return swiotlb_map_sg(dev,sg,nents,dir);
532 if (!dev)
533 dev = &fallback_dev;
534
535 out = 0;
536 start = 0;
537 for (i = 0; i < nents; i++) {
538 struct scatterlist *s = &sg[i];
539 dma_addr_t addr = page_to_phys(s->page) + s->offset;
540 s->dma_address = addr;
541 BUG_ON(s->length == 0);
542
543 nextneed = need_iommu(dev, addr, s->length);
544
545 /* Handle the previous not yet processed entries */
546 if (i > start) {
547 struct scatterlist *ps = &sg[i-1];
548 /* Can only merge when the last chunk ends on a page
549 boundary and the new one doesn't have an offset. */
550 if (!iommu_merge || !nextneed || !need || s->offset ||
551 (ps->offset + ps->length) % PAGE_SIZE) {
552 if (dma_map_cont(sg, start, i, sg+out, pages,
553 need) < 0)
554 goto error;
555 out++;
556 pages = 0;
557 start = i;
558 }
559 }
560
561 need = nextneed;
562 pages += to_pages(s->offset, s->length);
563 }
564 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
565 goto error;
566 out++;
567 flush_gart(dev);
568 if (out < nents)
569 sg[out].dma_length = 0;
570 return out;
571
572error:
573 flush_gart(NULL);
574 dma_unmap_sg(dev, sg, nents, dir);
575 /* When it was forced try again unforced */
576 if (force_iommu)
577 return dma_map_sg_nonforce(dev, sg, nents, dir);
578 if (panic_on_overflow)
579 panic("dma_map_sg: overflow on %lu pages\n", pages);
580 iommu_full(dev, pages << PAGE_SHIFT, dir, 0);
581 for (i = 0; i < nents; i++)
582 sg[i].dma_address = bad_dma_address;
583 return 0;
584}
585
586/*
587 * Free a DMA mapping.
588 */
589void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
590 size_t size, int direction)
591{
592 unsigned long iommu_page;
593 int npages;
594 int i;
595
596 if (swiotlb) {
597 swiotlb_unmap_single(dev,dma_addr,size,direction);
598 return;
599 }
600
601 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
602 dma_addr >= iommu_bus_base + iommu_size)
603 return;
604 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
605 npages = to_pages(dma_addr, size);
606 for (i = 0; i < npages; i++) {
607 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
608 CLEAR_LEAK(iommu_page + i);
609 }
610 free_iommu(iommu_page, npages);
611}
612
613/*
614 * Wrapper for pci_unmap_single working with scatterlists.
615 */
616void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
617{
618 int i;
619 if (swiotlb) {
620 swiotlb_unmap_sg(dev,sg,nents,dir);
621 return;
622 }
623 for (i = 0; i < nents; i++) {
624 struct scatterlist *s = &sg[i];
625 if (!s->dma_length || !s->length)
626 break;
627 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
628 }
629}
630
631int dma_supported(struct device *dev, u64 mask)
632{
633 /* Copied from i386. Doesn't make much sense, because it will
634 only work for pci_alloc_coherent.
635 The caller just has to use GFP_DMA in this case. */
636 if (mask < 0x00ffffff)
637 return 0;
638
639 /* Tell the device to use SAC when IOMMU force is on.
640 This allows the driver to use cheaper accesses in some cases.
641
642 Problem with this is that if we overflow the IOMMU area
643 and return DAC as fallback address the device may not handle it correctly.
644
645 As a special case some controllers have a 39bit address mode
646 that is as efficient as 32bit (aic79xx). Don't force SAC for these.
647 Assume all masks <= 40 bits are of this type. Normally this doesn't
648 make any difference, but gives more gentle handling of IOMMU overflow. */
649 if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
650 printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
651 return 0;
652 }
653
654 return 1;
655}
656
657int dma_get_cache_alignment(void)
658{
659 return boot_cpu_data.x86_clflush_size;
660}
661
662EXPORT_SYMBOL(dma_unmap_sg);
663EXPORT_SYMBOL(dma_map_sg);
664EXPORT_SYMBOL(dma_map_single);
665EXPORT_SYMBOL(dma_unmap_single);
666EXPORT_SYMBOL(dma_supported);
667EXPORT_SYMBOL(no_iommu);
668EXPORT_SYMBOL(force_iommu);
669EXPORT_SYMBOL(bad_dma_address);
670EXPORT_SYMBOL(iommu_bio_merge);
671EXPORT_SYMBOL(iommu_sac_force);
672EXPORT_SYMBOL(dma_get_cache_alignment);
673EXPORT_SYMBOL(dma_alloc_coherent);
674EXPORT_SYMBOL(dma_free_coherent);
675
676static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
677{
678 unsigned long a;
679 if (!iommu_size) {
680 iommu_size = aper_size;
681 if (!no_agp)
682 iommu_size /= 2;
683 }
684
685 a = aper + iommu_size;
686 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
687
688 if (iommu_size < 64*1024*1024)
689 printk(KERN_WARNING
690 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
691
692 return iommu_size;
693}
694
695static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
696{
697 unsigned aper_size = 0, aper_base_32;
698 u64 aper_base;
699 unsigned aper_order;
700
701 pci_read_config_dword(dev, 0x94, &aper_base_32);
702 pci_read_config_dword(dev, 0x90, &aper_order);
703 aper_order = (aper_order >> 1) & 7;
704
705 aper_base = aper_base_32 & 0x7fff;
706 aper_base <<= 25;
707
708 aper_size = (32 * 1024 * 1024) << aper_order;
709 if (aper_base + aper_size >= 0xffffffff || !aper_size)
710 aper_base = 0;
711
712 *size = aper_size;
713 return aper_base;
714}
715
716/*
717 * Private Northbridge GATT initialization in case we cannot use the
718 * AGP driver for some reason.
719 */
720static __init int init_k8_gatt(struct agp_kern_info *info)
721{
722 struct pci_dev *dev;
723 void *gatt;
724 unsigned aper_base, new_aper_base;
725 unsigned aper_size, gatt_size, new_aper_size;
726
727 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
728 aper_size = aper_base = info->aper_size = 0;
729 for_all_nb(dev) {
730 new_aper_base = read_aperture(dev, &new_aper_size);
731 if (!new_aper_base)
732 goto nommu;
733
734 if (!aper_base) {
735 aper_size = new_aper_size;
736 aper_base = new_aper_base;
737 }
738 if (aper_size != new_aper_size || aper_base != new_aper_base)
739 goto nommu;
740 }
741 if (!aper_base)
742 goto nommu;
743 info->aper_base = aper_base;
744 info->aper_size = aper_size>>20;
745
746 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
747 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
748 if (!gatt)
749 panic("Cannot allocate GATT table");
750 memset(gatt, 0, gatt_size);
751 agp_gatt_table = gatt;
752
753 for_all_nb(dev) {
754 u32 ctl;
755 u32 gatt_reg;
756
757 gatt_reg = __pa(gatt) >> 12;
758 gatt_reg <<= 4;
759 pci_write_config_dword(dev, 0x98, gatt_reg);
760 pci_read_config_dword(dev, 0x90, &ctl);
761
762 ctl |= 1;
763 ctl &= ~((1<<4) | (1<<5));
764
765 pci_write_config_dword(dev, 0x90, ctl);
766 }
767 flush_gart(NULL);
768
769 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
770 return 0;
771
772 nommu:
773 /* Should not happen anymore */
774 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
f46ace69 775 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
1da177e4
LT
776 return -1;
777}
778
779extern int agp_amd64_init(void);
780
781static int __init pci_iommu_init(void)
782{
783 struct agp_kern_info info;
784 unsigned long aper_size;
785 unsigned long iommu_start;
786 struct pci_dev *dev;
787 unsigned long scratch;
788 long i;
789
790#ifndef CONFIG_AGP_AMD64
791 no_agp = 1;
792#else
793 /* Makefile puts PCI initialization via subsys_initcall first. */
794 /* Add other K8 AGP bridge drivers here */
795 no_agp = no_agp ||
796 (agp_amd64_init() < 0) ||
797 (agp_copy_info(agp_bridge, &info) < 0);
798#endif
799
800 if (swiotlb) {
801 no_iommu = 1;
802 printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
803 return -1;
804 }
805
806 if (no_iommu ||
ca8642f6 807 (!force_iommu && (end_pfn-1) < 0xffffffff>>PAGE_SHIFT) ||
1da177e4
LT
808 !iommu_aperture ||
809 (no_agp && init_k8_gatt(&info) < 0)) {
810 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
811 no_iommu = 1;
812 return -1;
813 }
814
815 aper_size = info.aper_size * 1024 * 1024;
816 iommu_size = check_iommu_size(info.aper_base, aper_size);
817 iommu_pages = iommu_size >> PAGE_SHIFT;
818
819 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
820 get_order(iommu_pages/8));
821 if (!iommu_gart_bitmap)
822 panic("Cannot allocate iommu bitmap\n");
823 memset(iommu_gart_bitmap, 0, iommu_pages/8);
824
825#ifdef CONFIG_IOMMU_LEAK
826 if (leak_trace) {
827 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
828 get_order(iommu_pages*sizeof(void *)));
829 if (iommu_leak_tab)
830 memset(iommu_leak_tab, 0, iommu_pages * 8);
831 else
832 printk("PCI-DMA: Cannot allocate leak trace area\n");
833 }
834#endif
835
836 /*
837 * Out of IOMMU space handling.
838 * Reserve some invalid pages at the beginning of the GART.
839 */
840 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
841
842 agp_memory_reserved = iommu_size;
843 printk(KERN_INFO
844 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
845 iommu_size>>20);
846
847 iommu_start = aper_size - iommu_size;
848 iommu_bus_base = info.aper_base + iommu_start;
849 bad_dma_address = iommu_bus_base;
850 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
851
852 /*
853 * Unmap the IOMMU part of the GART. The alias of the page is
854 * always mapped with cache enabled and there is no full cache
855 * coherency across the GART remapping. The unmapping avoids
856 * automatic prefetches from the CPU allocating cache lines in
857 * there. All CPU accesses are done via the direct mapping to
858 * the backing memory. The GART address is only used by PCI
859 * devices.
860 */
861 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
862
863 /*
864 * Try to workaround a bug (thanks to BenH)
865 * Set unmapped entries to a scratch page instead of 0.
866 * Any prefetches that hit unmapped entries won't get an bus abort
867 * then.
868 */
869 scratch = get_zeroed_page(GFP_KERNEL);
870 if (!scratch)
871 panic("Cannot allocate iommu scratch page");
872 gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
873 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
874 iommu_gatt_base[i] = gart_unmapped_entry;
875
876 for_all_nb(dev) {
877 u32 flag;
878 int cpu = PCI_SLOT(dev->devfn) - 24;
879 if (cpu >= MAX_NB)
880 continue;
881 northbridges[cpu] = dev;
882 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
883 northbridge_flush_word[cpu] = flag;
884 }
885
886 flush_gart(NULL);
887
888 return 0;
889}
890
891/* Must execute after PCI subsystem */
892fs_initcall(pci_iommu_init);
893
894/* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
895 [,forcesac][,fullflush][,nomerge][,biomerge]
896 size set size of iommu (in bytes)
897 noagp don't initialize the AGP driver and use full aperture.
898 off don't use the IOMMU
899 leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
900 memaper[=order] allocate an own aperture over RAM with size 32MB^order.
901 noforce don't force IOMMU usage. Default.
902 force Force IOMMU.
903 merge Do lazy merging. This may improve performance on some block devices.
904 Implies force (experimental)
905 biomerge Do merging at the BIO layer. This is more efficient than merge,
906 but should be only done with very big IOMMUs. Implies merge,force.
907 nomerge Don't do SG merging.
908 forcesac For SAC mode for masks <40bits (experimental)
909 fullflush Flush IOMMU on each allocation (default)
910 nofullflush Don't use IOMMU fullflush
911 allowed overwrite iommu off workarounds for specific chipsets.
912 soft Use software bounce buffering (default for Intel machines)
913 noaperture Don't touch the aperture for AGP.
914*/
915__init int iommu_setup(char *p)
916{
917 int arg;
918
919 while (*p) {
920 if (!strncmp(p,"noagp",5))
921 no_agp = 1;
922 if (!strncmp(p,"off",3))
923 no_iommu = 1;
924 if (!strncmp(p,"force",5)) {
925 force_iommu = 1;
926 iommu_aperture_allowed = 1;
927 }
928 if (!strncmp(p,"allowed",7))
929 iommu_aperture_allowed = 1;
930 if (!strncmp(p,"noforce",7)) {
931 iommu_merge = 0;
932 force_iommu = 0;
933 }
934 if (!strncmp(p, "memaper", 7)) {
935 fallback_aper_force = 1;
936 p += 7;
937 if (*p == '=') {
938 ++p;
939 if (get_option(&p, &arg))
940 fallback_aper_order = arg;
941 }
942 }
943 if (!strncmp(p, "biomerge",8)) {
944 iommu_bio_merge = 4096;
945 iommu_merge = 1;
946 force_iommu = 1;
947 }
948 if (!strncmp(p, "panic",5))
949 panic_on_overflow = 1;
950 if (!strncmp(p, "nopanic",7))
951 panic_on_overflow = 0;
952 if (!strncmp(p, "merge",5)) {
953 iommu_merge = 1;
954 force_iommu = 1;
955 }
956 if (!strncmp(p, "nomerge",7))
957 iommu_merge = 0;
958 if (!strncmp(p, "forcesac",8))
959 iommu_sac_force = 1;
960 if (!strncmp(p, "fullflush",8))
961 iommu_fullflush = 1;
962 if (!strncmp(p, "nofullflush",11))
963 iommu_fullflush = 0;
964 if (!strncmp(p, "soft",4))
965 swiotlb = 1;
966 if (!strncmp(p, "noaperture",10))
967 fix_aperture = 0;
968#ifdef CONFIG_IOMMU_LEAK
969 if (!strncmp(p,"leak",4)) {
970 leak_trace = 1;
971 p += 4;
972 if (*p == '=') ++p;
973 if (isdigit(*p) && get_option(&p, &arg))
974 iommu_leak_pages = arg;
975 } else
976#endif
977 if (isdigit(*p) && get_option(&p, &arg))
978 iommu_size = arg;
979 p += strcspn(p, ",");
980 if (*p == ',')
981 ++p;
982 }
983 return 1;
984}