| 1 | /* |
| 2 | * Firmware replacement code. |
| 3 | * |
| 4 | * Work around broken BIOSes that don't set an aperture, only set the |
| 5 | * aperture in the AGP bridge, or set too small aperture. |
| 6 | * |
| 7 | * If all fails map the aperture over some low memory. This is cheaper than |
| 8 | * doing bounce buffering. The memory is lost. This is done at early boot |
| 9 | * because only the bootmem allocator can allocate 32+MB. |
| 10 | * |
| 11 | * Copyright 2002 Andi Kleen, SuSE Labs. |
| 12 | */ |
| 13 | #include <linux/kernel.h> |
| 14 | #include <linux/types.h> |
| 15 | #include <linux/init.h> |
| 16 | #include <linux/bootmem.h> |
| 17 | #include <linux/mmzone.h> |
| 18 | #include <linux/pci_ids.h> |
| 19 | #include <linux/pci.h> |
| 20 | #include <linux/bitops.h> |
| 21 | #include <linux/ioport.h> |
| 22 | #include <linux/suspend.h> |
| 23 | #include <linux/kmemleak.h> |
| 24 | #include <asm/e820.h> |
| 25 | #include <asm/io.h> |
| 26 | #include <asm/iommu.h> |
| 27 | #include <asm/gart.h> |
| 28 | #include <asm/pci-direct.h> |
| 29 | #include <asm/dma.h> |
| 30 | #include <asm/k8.h> |
| 31 | #include <asm/x86_init.h> |
| 32 | |
| 33 | int gart_iommu_aperture; |
| 34 | int gart_iommu_aperture_disabled __initdata; |
| 35 | int gart_iommu_aperture_allowed __initdata; |
| 36 | |
| 37 | int fallback_aper_order __initdata = 1; /* 64MB */ |
| 38 | int fallback_aper_force __initdata; |
| 39 | |
| 40 | int fix_aperture __initdata = 1; |
| 41 | |
| 42 | struct bus_dev_range { |
| 43 | int bus; |
| 44 | int dev_base; |
| 45 | int dev_limit; |
| 46 | }; |
| 47 | |
| 48 | static struct bus_dev_range bus_dev_ranges[] __initdata = { |
| 49 | { 0x00, 0x18, 0x20}, |
| 50 | { 0xff, 0x00, 0x20}, |
| 51 | { 0xfe, 0x00, 0x20} |
| 52 | }; |
| 53 | |
| 54 | static struct resource gart_resource = { |
| 55 | .name = "GART", |
| 56 | .flags = IORESOURCE_MEM, |
| 57 | }; |
| 58 | |
| 59 | static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) |
| 60 | { |
| 61 | gart_resource.start = aper_base; |
| 62 | gart_resource.end = aper_base + aper_size - 1; |
| 63 | insert_resource(&iomem_resource, &gart_resource); |
| 64 | } |
| 65 | |
| 66 | /* This code runs before the PCI subsystem is initialized, so just |
| 67 | access the northbridge directly. */ |
| 68 | |
| 69 | static u32 __init allocate_aperture(void) |
| 70 | { |
| 71 | u32 aper_size; |
| 72 | void *p; |
| 73 | |
| 74 | /* aper_size should <= 1G */ |
| 75 | if (fallback_aper_order > 5) |
| 76 | fallback_aper_order = 5; |
| 77 | aper_size = (32 * 1024 * 1024) << fallback_aper_order; |
| 78 | |
| 79 | /* |
| 80 | * Aperture has to be naturally aligned. This means a 2GB aperture |
| 81 | * won't have much chance of finding a place in the lower 4GB of |
| 82 | * memory. Unfortunately we cannot move it up because that would |
| 83 | * make the IOMMU useless. |
| 84 | */ |
| 85 | /* |
| 86 | * using 512M as goal, in case kexec will load kernel_big |
| 87 | * that will do the on position decompress, and could overlap with |
| 88 | * that positon with gart that is used. |
| 89 | * sequende: |
| 90 | * kernel_small |
| 91 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) |
| 92 | * ==> kernel_small(gart area become e820_reserved) |
| 93 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) |
| 94 | * ==> kerne_big (uncompressed size will be big than 64M or 128M) |
| 95 | * so don't use 512M below as gart iommu, leave the space for kernel |
| 96 | * code for safe |
| 97 | */ |
| 98 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); |
| 99 | /* |
| 100 | * Kmemleak should not scan this block as it may not be mapped via the |
| 101 | * kernel direct mapping. |
| 102 | */ |
| 103 | kmemleak_ignore(p); |
| 104 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
| 105 | printk(KERN_ERR |
| 106 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
| 107 | p, aper_size>>10); |
| 108 | if (p) |
| 109 | free_bootmem(__pa(p), aper_size); |
| 110 | return 0; |
| 111 | } |
| 112 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", |
| 113 | aper_size >> 10, __pa(p)); |
| 114 | insert_aperture_resource((u32)__pa(p), aper_size); |
| 115 | register_nosave_region((u32)__pa(p) >> PAGE_SHIFT, |
| 116 | (u32)__pa(p+aper_size) >> PAGE_SHIFT); |
| 117 | |
| 118 | return (u32)__pa(p); |
| 119 | } |
| 120 | |
| 121 | |
| 122 | /* Find a PCI capability */ |
| 123 | static u32 __init find_cap(int bus, int slot, int func, int cap) |
| 124 | { |
| 125 | int bytes; |
| 126 | u8 pos; |
| 127 | |
| 128 | if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & |
| 129 | PCI_STATUS_CAP_LIST)) |
| 130 | return 0; |
| 131 | |
| 132 | pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); |
| 133 | for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { |
| 134 | u8 id; |
| 135 | |
| 136 | pos &= ~3; |
| 137 | id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); |
| 138 | if (id == 0xff) |
| 139 | break; |
| 140 | if (id == cap) |
| 141 | return pos; |
| 142 | pos = read_pci_config_byte(bus, slot, func, |
| 143 | pos+PCI_CAP_LIST_NEXT); |
| 144 | } |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | /* Read a standard AGPv3 bridge header */ |
| 149 | static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) |
| 150 | { |
| 151 | u32 apsize; |
| 152 | u32 apsizereg; |
| 153 | int nbits; |
| 154 | u32 aper_low, aper_hi; |
| 155 | u64 aper; |
| 156 | u32 old_order; |
| 157 | |
| 158 | printk(KERN_INFO "AGP bridge at %02x:%02x:%02x\n", bus, slot, func); |
| 159 | apsizereg = read_pci_config_16(bus, slot, func, cap + 0x14); |
| 160 | if (apsizereg == 0xffffffff) { |
| 161 | printk(KERN_ERR "APSIZE in AGP bridge unreadable\n"); |
| 162 | return 0; |
| 163 | } |
| 164 | |
| 165 | /* old_order could be the value from NB gart setting */ |
| 166 | old_order = *order; |
| 167 | |
| 168 | apsize = apsizereg & 0xfff; |
| 169 | /* Some BIOS use weird encodings not in the AGPv3 table. */ |
| 170 | if (apsize & 0xff) |
| 171 | apsize |= 0xf00; |
| 172 | nbits = hweight16(apsize); |
| 173 | *order = 7 - nbits; |
| 174 | if ((int)*order < 0) /* < 32MB */ |
| 175 | *order = 0; |
| 176 | |
| 177 | aper_low = read_pci_config(bus, slot, func, 0x10); |
| 178 | aper_hi = read_pci_config(bus, slot, func, 0x14); |
| 179 | aper = (aper_low & ~((1<<22)-1)) | ((u64)aper_hi << 32); |
| 180 | |
| 181 | /* |
| 182 | * On some sick chips, APSIZE is 0. It means it wants 4G |
| 183 | * so let double check that order, and lets trust AMD NB settings: |
| 184 | */ |
| 185 | printk(KERN_INFO "Aperture from AGP @ %Lx old size %u MB\n", |
| 186 | aper, 32 << old_order); |
| 187 | if (aper + (32ULL<<(20 + *order)) > 0x100000000ULL) { |
| 188 | printk(KERN_INFO "Aperture size %u MB (APSIZE %x) is not right, using settings from NB\n", |
| 189 | 32 << *order, apsizereg); |
| 190 | *order = old_order; |
| 191 | } |
| 192 | |
| 193 | printk(KERN_INFO "Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", |
| 194 | aper, 32 << *order, apsizereg); |
| 195 | |
| 196 | if (!aperture_valid(aper, (32*1024*1024) << *order, 32<<20)) |
| 197 | return 0; |
| 198 | return (u32)aper; |
| 199 | } |
| 200 | |
| 201 | /* |
| 202 | * Look for an AGP bridge. Windows only expects the aperture in the |
| 203 | * AGP bridge and some BIOS forget to initialize the Northbridge too. |
| 204 | * Work around this here. |
| 205 | * |
| 206 | * Do an PCI bus scan by hand because we're running before the PCI |
| 207 | * subsystem. |
| 208 | * |
| 209 | * All K8 AGP bridges are AGPv3 compliant, so we can do this scan |
| 210 | * generically. It's probably overkill to always scan all slots because |
| 211 | * the AGP bridges should be always an own bus on the HT hierarchy, |
| 212 | * but do it here for future safety. |
| 213 | */ |
| 214 | static u32 __init search_agp_bridge(u32 *order, int *valid_agp) |
| 215 | { |
| 216 | int bus, slot, func; |
| 217 | |
| 218 | /* Poor man's PCI discovery */ |
| 219 | for (bus = 0; bus < 256; bus++) { |
| 220 | for (slot = 0; slot < 32; slot++) { |
| 221 | for (func = 0; func < 8; func++) { |
| 222 | u32 class, cap; |
| 223 | u8 type; |
| 224 | class = read_pci_config(bus, slot, func, |
| 225 | PCI_CLASS_REVISION); |
| 226 | if (class == 0xffffffff) |
| 227 | break; |
| 228 | |
| 229 | switch (class >> 16) { |
| 230 | case PCI_CLASS_BRIDGE_HOST: |
| 231 | case PCI_CLASS_BRIDGE_OTHER: /* needed? */ |
| 232 | /* AGP bridge? */ |
| 233 | cap = find_cap(bus, slot, func, |
| 234 | PCI_CAP_ID_AGP); |
| 235 | if (!cap) |
| 236 | break; |
| 237 | *valid_agp = 1; |
| 238 | return read_agp(bus, slot, func, cap, |
| 239 | order); |
| 240 | } |
| 241 | |
| 242 | /* No multi-function device? */ |
| 243 | type = read_pci_config_byte(bus, slot, func, |
| 244 | PCI_HEADER_TYPE); |
| 245 | if (!(type & 0x80)) |
| 246 | break; |
| 247 | } |
| 248 | } |
| 249 | } |
| 250 | printk(KERN_INFO "No AGP bridge found\n"); |
| 251 | |
| 252 | return 0; |
| 253 | } |
| 254 | |
| 255 | static int gart_fix_e820 __initdata = 1; |
| 256 | |
| 257 | static int __init parse_gart_mem(char *p) |
| 258 | { |
| 259 | if (!p) |
| 260 | return -EINVAL; |
| 261 | |
| 262 | if (!strncmp(p, "off", 3)) |
| 263 | gart_fix_e820 = 0; |
| 264 | else if (!strncmp(p, "on", 2)) |
| 265 | gart_fix_e820 = 1; |
| 266 | |
| 267 | return 0; |
| 268 | } |
| 269 | early_param("gart_fix_e820", parse_gart_mem); |
| 270 | |
| 271 | void __init early_gart_iommu_check(void) |
| 272 | { |
| 273 | /* |
| 274 | * in case it is enabled before, esp for kexec/kdump, |
| 275 | * previous kernel already enable that. memset called |
| 276 | * by allocate_aperture/__alloc_bootmem_nopanic cause restart. |
| 277 | * or second kernel have different position for GART hole. and new |
| 278 | * kernel could use hole as RAM that is still used by GART set by |
| 279 | * first kernel |
| 280 | * or BIOS forget to put that in reserved. |
| 281 | * try to update e820 to make that region as reserved. |
| 282 | */ |
| 283 | u32 agp_aper_base = 0, agp_aper_order = 0; |
| 284 | int i, fix, slot, valid_agp = 0; |
| 285 | u32 ctl; |
| 286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
| 287 | u64 aper_base = 0, last_aper_base = 0; |
| 288 | int aper_enabled = 0, last_aper_enabled = 0, last_valid = 0; |
| 289 | |
| 290 | if (!early_pci_allowed()) |
| 291 | return; |
| 292 | |
| 293 | /* This is mostly duplicate of iommu_hole_init */ |
| 294 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); |
| 295 | |
| 296 | fix = 0; |
| 297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 298 | int bus; |
| 299 | int dev_base, dev_limit; |
| 300 | |
| 301 | bus = bus_dev_ranges[i].bus; |
| 302 | dev_base = bus_dev_ranges[i].dev_base; |
| 303 | dev_limit = bus_dev_ranges[i].dev_limit; |
| 304 | |
| 305 | for (slot = dev_base; slot < dev_limit; slot++) { |
| 306 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 307 | continue; |
| 308 | |
| 309 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 310 | aper_enabled = ctl & AMD64_GARTEN; |
| 311 | aper_order = (ctl >> 1) & 7; |
| 312 | aper_size = (32 * 1024 * 1024) << aper_order; |
| 313 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
| 314 | aper_base <<= 25; |
| 315 | |
| 316 | if (last_valid) { |
| 317 | if ((aper_order != last_aper_order) || |
| 318 | (aper_base != last_aper_base) || |
| 319 | (aper_enabled != last_aper_enabled)) { |
| 320 | fix = 1; |
| 321 | break; |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | last_aper_order = aper_order; |
| 326 | last_aper_base = aper_base; |
| 327 | last_aper_enabled = aper_enabled; |
| 328 | last_valid = 1; |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | if (!fix && !aper_enabled) |
| 333 | return; |
| 334 | |
| 335 | if (!aper_base || !aper_size || aper_base + aper_size > 0x100000000UL) |
| 336 | fix = 1; |
| 337 | |
| 338 | if (gart_fix_e820 && !fix && aper_enabled) { |
| 339 | if (e820_any_mapped(aper_base, aper_base + aper_size, |
| 340 | E820_RAM)) { |
| 341 | /* reserve it, so we can reuse it in second kernel */ |
| 342 | printk(KERN_INFO "update e820 for GART\n"); |
| 343 | e820_add_region(aper_base, aper_size, E820_RESERVED); |
| 344 | update_e820(); |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | if (valid_agp) |
| 349 | return; |
| 350 | |
| 351 | /* disable them all at first */ |
| 352 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 353 | int bus; |
| 354 | int dev_base, dev_limit; |
| 355 | |
| 356 | bus = bus_dev_ranges[i].bus; |
| 357 | dev_base = bus_dev_ranges[i].dev_base; |
| 358 | dev_limit = bus_dev_ranges[i].dev_limit; |
| 359 | |
| 360 | for (slot = dev_base; slot < dev_limit; slot++) { |
| 361 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 362 | continue; |
| 363 | |
| 364 | ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); |
| 365 | ctl &= ~AMD64_GARTEN; |
| 366 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | } |
| 371 | |
| 372 | static int __initdata printed_gart_size_msg; |
| 373 | |
| 374 | void __init gart_iommu_hole_init(void) |
| 375 | { |
| 376 | u32 agp_aper_base = 0, agp_aper_order = 0; |
| 377 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
| 378 | u64 aper_base, last_aper_base = 0; |
| 379 | int fix, slot, valid_agp = 0; |
| 380 | int i, node; |
| 381 | |
| 382 | if (gart_iommu_aperture_disabled || !fix_aperture || |
| 383 | !early_pci_allowed()) |
| 384 | return; |
| 385 | |
| 386 | printk(KERN_INFO "Checking aperture...\n"); |
| 387 | |
| 388 | if (!fallback_aper_force) |
| 389 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); |
| 390 | |
| 391 | fix = 0; |
| 392 | node = 0; |
| 393 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 394 | int bus; |
| 395 | int dev_base, dev_limit; |
| 396 | u32 ctl; |
| 397 | |
| 398 | bus = bus_dev_ranges[i].bus; |
| 399 | dev_base = bus_dev_ranges[i].dev_base; |
| 400 | dev_limit = bus_dev_ranges[i].dev_limit; |
| 401 | |
| 402 | for (slot = dev_base; slot < dev_limit; slot++) { |
| 403 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 404 | continue; |
| 405 | |
| 406 | iommu_detected = 1; |
| 407 | gart_iommu_aperture = 1; |
| 408 | x86_init.iommu.iommu_init = gart_iommu_init; |
| 409 | |
| 410 | ctl = read_pci_config(bus, slot, 3, |
| 411 | AMD64_GARTAPERTURECTL); |
| 412 | |
| 413 | /* |
| 414 | * Before we do anything else disable the GART. It may |
| 415 | * still be enabled if we boot into a crash-kernel here. |
| 416 | * Reconfiguring the GART while it is enabled could have |
| 417 | * unknown side-effects. |
| 418 | */ |
| 419 | ctl &= ~GARTEN; |
| 420 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); |
| 421 | |
| 422 | aper_order = (ctl >> 1) & 7; |
| 423 | aper_size = (32 * 1024 * 1024) << aper_order; |
| 424 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
| 425 | aper_base <<= 25; |
| 426 | |
| 427 | printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n", |
| 428 | node, aper_base, aper_size >> 20); |
| 429 | node++; |
| 430 | |
| 431 | if (!aperture_valid(aper_base, aper_size, 64<<20)) { |
| 432 | if (valid_agp && agp_aper_base && |
| 433 | agp_aper_base == aper_base && |
| 434 | agp_aper_order == aper_order) { |
| 435 | /* the same between two setting from NB and agp */ |
| 436 | if (!no_iommu && |
| 437 | max_pfn > MAX_DMA32_PFN && |
| 438 | !printed_gart_size_msg) { |
| 439 | printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n"); |
| 440 | printk(KERN_ERR "please increase GART size in your BIOS setup\n"); |
| 441 | printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n"); |
| 442 | printed_gart_size_msg = 1; |
| 443 | } |
| 444 | } else { |
| 445 | fix = 1; |
| 446 | goto out; |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | if ((last_aper_order && aper_order != last_aper_order) || |
| 451 | (last_aper_base && aper_base != last_aper_base)) { |
| 452 | fix = 1; |
| 453 | goto out; |
| 454 | } |
| 455 | last_aper_order = aper_order; |
| 456 | last_aper_base = aper_base; |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | out: |
| 461 | if (!fix && !fallback_aper_force) { |
| 462 | if (last_aper_base) { |
| 463 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; |
| 464 | |
| 465 | insert_aperture_resource((u32)last_aper_base, n); |
| 466 | } |
| 467 | return; |
| 468 | } |
| 469 | |
| 470 | if (!fallback_aper_force) { |
| 471 | aper_alloc = agp_aper_base; |
| 472 | aper_order = agp_aper_order; |
| 473 | } |
| 474 | |
| 475 | if (aper_alloc) { |
| 476 | /* Got the aperture from the AGP bridge */ |
| 477 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || |
| 478 | force_iommu || |
| 479 | valid_agp || |
| 480 | fallback_aper_force) { |
| 481 | printk(KERN_INFO |
| 482 | "Your BIOS doesn't leave a aperture memory hole\n"); |
| 483 | printk(KERN_INFO |
| 484 | "Please enable the IOMMU option in the BIOS setup\n"); |
| 485 | printk(KERN_INFO |
| 486 | "This costs you %d MB of RAM\n", |
| 487 | 32 << fallback_aper_order); |
| 488 | |
| 489 | aper_order = fallback_aper_order; |
| 490 | aper_alloc = allocate_aperture(); |
| 491 | if (!aper_alloc) { |
| 492 | /* |
| 493 | * Could disable AGP and IOMMU here, but it's |
| 494 | * probably not worth it. But the later users |
| 495 | * cannot deal with bad apertures and turning |
| 496 | * on the aperture over memory causes very |
| 497 | * strange problems, so it's better to panic |
| 498 | * early. |
| 499 | */ |
| 500 | panic("Not enough memory for aperture"); |
| 501 | } |
| 502 | } else { |
| 503 | return; |
| 504 | } |
| 505 | |
| 506 | /* Fix up the north bridges */ |
| 507 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
| 508 | int bus; |
| 509 | int dev_base, dev_limit; |
| 510 | |
| 511 | bus = bus_dev_ranges[i].bus; |
| 512 | dev_base = bus_dev_ranges[i].dev_base; |
| 513 | dev_limit = bus_dev_ranges[i].dev_limit; |
| 514 | for (slot = dev_base; slot < dev_limit; slot++) { |
| 515 | if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) |
| 516 | continue; |
| 517 | |
| 518 | /* Don't enable translation yet. That is done later. |
| 519 | Assume this BIOS didn't initialise the GART so |
| 520 | just overwrite all previous bits */ |
| 521 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1); |
| 522 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); |
| 523 | } |
| 524 | } |
| 525 | |
| 526 | set_up_gart_resume(aper_order, aper_alloc); |
| 527 | } |