drm/nouveau/fb/gf100-: rework ram detection
authorBen Skeggs <bskeggs@redhat.com>
Thu, 2 Mar 2017 03:53:05 +0000 (13:53 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Tue, 7 Mar 2017 07:05:17 +0000 (17:05 +1000)
This commit reworks the RAM detection algorithm, using RAM-per-LTC to
determine whether a board has a mixed-memory configuration instead of
using RAM-per-FBPA.  I'm not certain the algorithm is perfect, but it
should handle all currently known configurations in the very least.

This should fix GTX 970 boards with 4GiB of RAM where the last 512MiB
isn't fully accessible, as well as only detecting half the VRAM on
GF108 boards.

As a nice side-effect, GP10x memory detection now reuses the majority
of the code from earlier chipsets.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c
drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c

index 6876b269a447fb206e5056d29906856ca7f55ce5..891497a0fe3be2812be17e0787bb267a72ddacf7 100644 (file)
@@ -147,6 +147,12 @@ struct nvkm_ram {
 };
 
 struct nvkm_ram_func {
+       u64 upper;
+       u32 (*probe_fbp)(const struct nvkm_ram_func *, struct nvkm_device *,
+                        int fbp, int *pltcs);
+       u32 (*probe_fbp_amount)(const struct nvkm_ram_func *, u32 fbpao,
+                               struct nvkm_device *, int fbp, int *pltcs);
+       u32 (*probe_fbpa_amount)(struct nvkm_device *, int fbpa);
        void *(*dtor)(struct nvkm_ram *);
        int (*init)(struct nvkm_ram *);
 
index 893fbb95718144c7dfc9b40c080f64bb5a8ae5b4..fac7e73c3ddfb8d85c68b8ec9adecae682ff215a 100644 (file)
@@ -22,7 +22,12 @@ void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *);
 int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *,
                   struct nvkm_ram **);
 int  gf100_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *,
-                   u32, struct nvkm_ram *);
+                   struct nvkm_ram *);
+u32  gf100_ram_probe_fbp(const struct nvkm_ram_func *,
+                        struct nvkm_device *, int, int *);
+u32  gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
+                               struct nvkm_device *, int, int *);
+u32  gf100_ram_probe_fbpa_amount(struct nvkm_device *, int);
 int  gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **);
 void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **);
 int gf100_ram_init(struct nvkm_ram *);
@@ -30,14 +35,23 @@ int gf100_ram_calc(struct nvkm_ram *, u32);
 int gf100_ram_prog(struct nvkm_ram *);
 void gf100_ram_tidy(struct nvkm_ram *);
 
+u32 gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
+                              struct nvkm_device *, int, int *);
+
 int gk104_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *,
-                  struct nvkm_ram **, u32);
+                  struct nvkm_ram **);
 void *gk104_ram_dtor(struct nvkm_ram *);
 int gk104_ram_init(struct nvkm_ram *);
 int gk104_ram_calc(struct nvkm_ram *, u32);
 int gk104_ram_prog(struct nvkm_ram *);
 void gk104_ram_tidy(struct nvkm_ram *);
 
+u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *,
+                       struct nvkm_device *, int, int *);
+
+u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32,
+                              struct nvkm_device *, int, int *);
+
 /* RAM type-specific MR calculation routines */
 int nvkm_sddr2_calc(struct nvkm_ram *);
 int nvkm_sddr3_calc(struct nvkm_ram *);
index ef28514af56f87457dbfa6a5802e1dc7daa6e352..53c32fc694e949b1e1d3bb461704f2beb3366827 100644 (file)
@@ -543,67 +543,96 @@ gf100_ram_init(struct nvkm_ram *base)
        return 0;
 }
 
+u32
+gf100_ram_probe_fbpa_amount(struct nvkm_device *device, int fbpa)
+{
+       return nvkm_rd32(device, 0x11020c + (fbpa * 0x1000));
+}
+
+u32
+gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
+                          struct nvkm_device *device, int fbp, int *pltcs)
+{
+       if (!(fbpao & BIT(fbp))) {
+               *pltcs = 1;
+               return func->probe_fbpa_amount(device, fbp);
+       }
+       return 0;
+}
+
+u32
+gf100_ram_probe_fbp(const struct nvkm_ram_func *func,
+                   struct nvkm_device *device, int fbp, int *pltcs)
+{
+       u32 fbpao = nvkm_rd32(device, 0x022554);
+       return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs);
+}
+
 int
 gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
-              u32 maskaddr, struct nvkm_ram *ram)
+              struct nvkm_ram *ram)
 {
        struct nvkm_subdev *subdev = &fb->subdev;
        struct nvkm_device *device = subdev->device;
        struct nvkm_bios *bios = device->bios;
        const u32 rsvd_head = ( 256 * 1024); /* vga memory */
        const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
-       u32 parts = nvkm_rd32(device, 0x022438);
-       u32 pmask = nvkm_rd32(device, maskaddr);
-       u64 bsize = (u64)nvkm_rd32(device, 0x10f20c) << 20;
-       u64 psize, size = 0;
        enum nvkm_ram_type type = nvkm_fb_bios_memtype(bios);
-       bool uniform = true;
-       int ret, i;
-
-       nvkm_debug(subdev, "100800: %08x\n", nvkm_rd32(device, 0x100800));
-       nvkm_debug(subdev, "parts %08x mask %08x\n", parts, pmask);
-
-       /* read amount of vram attached to each memory controller */
-       for (i = 0; i < parts; i++) {
-               if (pmask & (1 << i))
-                       continue;
-
-               psize = (u64)nvkm_rd32(device, 0x11020c + (i * 0x1000)) << 20;
-               if (psize != bsize) {
-                       if (psize < bsize)
-                               bsize = psize;
-                       uniform = false;
+       u32 fbps = nvkm_rd32(device, 0x022438);
+       u64 total = 0, lcomm = ~0, lower, ubase, usize;
+       int ret, fbp, ltcs, ltcn = 0;
+
+       nvkm_debug(subdev, "%d FBP(s)\n", fbps);
+       for (fbp = 0; fbp < fbps; fbp++) {
+               u32 size = func->probe_fbp(func, device, fbp, &ltcs);
+               if (size) {
+                       nvkm_debug(subdev, "FBP %d: %4d MiB, %d LTC(s)\n",
+                                  fbp, size, ltcs);
+                       lcomm  = min(lcomm, (u64)(size / ltcs) << 20);
+                       total += size << 20;
+                       ltcn  += ltcs;
+               } else {
+                       nvkm_debug(subdev, "FBP %d: disabled\n", fbp);
                }
-
-               nvkm_debug(subdev, "%d: %d MiB\n", i, (u32)(psize >> 20));
-               size += psize;
        }
 
-       ret = nvkm_ram_ctor(func, fb, type, size, 0, ram);
+       lower = lcomm * ltcn;
+       ubase = lcomm + func->upper;
+       usize = total - lower;
+
+       nvkm_debug(subdev, "Lower: %4lld MiB @ %010llx\n", lower >> 20, 0ULL);
+       nvkm_debug(subdev, "Upper: %4lld MiB @ %010llx\n", usize >> 20, ubase);
+       nvkm_debug(subdev, "Total: %4lld MiB\n", total >> 20);
+
+       ret = nvkm_ram_ctor(func, fb, type, total, 0, ram);
        if (ret)
                return ret;
 
        nvkm_mm_fini(&ram->vram);
 
-       /* if all controllers have the same amount attached, there's no holes */
-       if (uniform) {
+       /* Some GPUs are in what's known as a "mixed memory" configuration.
+        *
+        * This is either where some FBPs have more memory than the others,
+        * or where LTCs have been disabled on a FBP.
+        */
+       if (lower != total) {
+               /* The common memory amount is addressed normally. */
                ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
-                                  (size - rsvd_head - rsvd_tail) >>
-                                  NVKM_RAM_MM_SHIFT, 1);
+                                  (lower - rsvd_head) >> NVKM_RAM_MM_SHIFT, 1);
                if (ret)
                        return ret;
-       } else {
-               /* otherwise, address lowest common amount from 0GiB */
-               ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
-                                  ((bsize * parts) - rsvd_head) >>
-                                  NVKM_RAM_MM_SHIFT, 1);
+
+               /* And the rest is much higher in the physical address
+                * space, and may not be usable for certain operations.
+                */
+               ret = nvkm_mm_init(&ram->vram, ubase >> NVKM_RAM_MM_SHIFT,
+                                  (usize - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1);
                if (ret)
                        return ret;
-
-               /* and the rest starting from (8GiB + common_size) */
-               ret = nvkm_mm_init(&ram->vram, (0x0200000000ULL + bsize) >>
-                                  NVKM_RAM_MM_SHIFT,
-                                  (size - (bsize * parts) - rsvd_tail) >>
+       } else {
+               /* GPUs without mixed-memory are a lot nicer... */
+               ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+                                  (total - rsvd_head - rsvd_tail) >>
                                   NVKM_RAM_MM_SHIFT, 1);
                if (ret)
                        return ret;
@@ -626,7 +655,7 @@ gf100_ram_new_(const struct nvkm_ram_func *func,
                return -ENOMEM;
        *pram = &ram->base;
 
-       ret = gf100_ram_ctor(func, fb, 0x022554, &ram->base);
+       ret = gf100_ram_ctor(func, fb, &ram->base);
        if (ret)
                return ret;
 
@@ -705,6 +734,10 @@ gf100_ram_new_(const struct nvkm_ram_func *func,
 
 static const struct nvkm_ram_func
 gf100_ram = {
+       .upper = 0x0200000000,
+       .probe_fbp = gf100_ram_probe_fbp,
+       .probe_fbp_amount = gf100_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
        .init = gf100_ram_init,
        .get = gf100_ram_get,
        .put = gf100_ram_put,
index ddab0db3a7a5303718161dc38112406cd751a3d4..985ec64cf369bc7258a9d1e310ddba3b78e3e490 100644 (file)
  */
 #include "ram.h"
 
+u32
+gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
+                          struct nvkm_device *device, int fbp, int *pltcs)
+{
+       u32 fbpt  = nvkm_rd32(device, 0x022438);
+       u32 fbpat = nvkm_rd32(device, 0x02243c);
+       u32 fbpas = fbpat / fbpt;
+       u32 fbpa  = fbp * fbpas;
+       u32 size  = 0;
+       while (fbpas--) {
+               if (!(fbpao & BIT(fbpa)))
+                       size += func->probe_fbpa_amount(device, fbpa);
+               fbpa++;
+       }
+       *pltcs = 1;
+       return size;
+}
+
 static const struct nvkm_ram_func
 gf108_ram = {
+       .upper = 0x0200000000,
+       .probe_fbp = gf100_ram_probe_fbp,
+       .probe_fbp_amount = gf108_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
        .init = gf100_ram_init,
        .get = gf100_ram_get,
        .put = gf100_ram_put,
index 2575d6c370a87f7e992f0087415624af34e95816..f6c00791722c7b53b94561056b5dcf752c3d233c 100644 (file)
@@ -1524,7 +1524,7 @@ gk104_ram_dtor(struct nvkm_ram *base)
 
 int
 gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
-              struct nvkm_ram **pram, u32 maskaddr)
+              struct nvkm_ram **pram)
 {
        struct nvkm_subdev *subdev = &fb->subdev;
        struct nvkm_device *device = subdev->device;
@@ -1539,7 +1539,7 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
                return -ENOMEM;
        *pram = &ram->base;
 
-       ret = gf100_ram_ctor(func, fb, maskaddr, &ram->base);
+       ret = gf100_ram_ctor(func, fb, &ram->base);
        if (ret)
                return ret;
 
@@ -1703,6 +1703,10 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
 
 static const struct nvkm_ram_func
 gk104_ram = {
+       .upper = 0x0200000000,
+       .probe_fbp = gf100_ram_probe_fbp,
+       .probe_fbp_amount = gf108_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
        .dtor = gk104_ram_dtor,
        .init = gk104_ram_init,
        .get = gf100_ram_get,
@@ -1715,5 +1719,5 @@ gk104_ram = {
 int
 gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
-       return gk104_ram_new_(&gk104_ram, fb, pram, 0x022554);
+       return gk104_ram_new_(&gk104_ram, fb, pram);
 }
index af5a97a9061ec45548fe807e7a195e83a46b8c6b..3f0b56347291bf433327c4541e2f443d5dc3cb8c 100644 (file)
  */
 #include "ram.h"
 
+u32
+gm107_ram_probe_fbp(const struct nvkm_ram_func *func,
+                   struct nvkm_device *device, int fbp, int *pltcs)
+{
+       u32 fbpao = nvkm_rd32(device, 0x021c14);
+       return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs);
+}
+
 static const struct nvkm_ram_func
 gm107_ram = {
+       .upper = 0x1000000000,
+       .probe_fbp = gm107_ram_probe_fbp,
+       .probe_fbp_amount = gf108_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
        .dtor = gk104_ram_dtor,
        .init = gk104_ram_init,
        .get = gf100_ram_get,
@@ -37,5 +49,5 @@ gm107_ram = {
 int
 gm107_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
-       return gk104_ram_new_(&gm107_ram, fb, pram, 0x021c14);
+       return gk104_ram_new_(&gm107_ram, fb, pram);
 }
index 04ae81f0db1033da3b2690695b1c26edf9d3a2b0..fd8facf904766bf30eb4eb07dac4278c44a5f192 100644 (file)
  */
 #include "ram.h"
 
+u32
+gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao,
+                          struct nvkm_device *device, int fbp, int *pltcs)
+{
+       u32 ltcs  = nvkm_rd32(device, 0x022450);
+       u32 fbpas = nvkm_rd32(device, 0x022458);
+       u32 fbpa  = fbp * fbpas;
+       u32 size  = 0;
+       if (!(nvkm_rd32(device, 0x021d38) & BIT(fbp))) {
+               u32 ltco = nvkm_rd32(device, 0x021d70 + (fbp * 4));
+               u32 ltcm = ~ltco & ((1 << ltcs) - 1);
+
+               while (fbpas--) {
+                       if (!(fbpao & (1 << fbpa)))
+                               size += func->probe_fbpa_amount(device, fbpa);
+                       fbpa++;
+               }
+
+               *pltcs = hweight32(ltcm);
+       }
+       return size;
+}
+
 static const struct nvkm_ram_func
 gm200_ram = {
+       .upper = 0x1000000000,
+       .probe_fbp = gm107_ram_probe_fbp,
+       .probe_fbp_amount = gm200_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gf100_ram_probe_fbpa_amount,
        .dtor = gk104_ram_dtor,
        .init = gk104_ram_init,
        .get = gf100_ram_get,
@@ -37,5 +64,5 @@ gm200_ram = {
 int
 gm200_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
-       return gk104_ram_new_(&gm200_ram, fb, pram, 0x021c14);
+       return gk104_ram_new_(&gm200_ram, fb, pram);
 }
index 405faabe8dcd63ffb5a98017d41a9f98fba92b15..cac70047ad5a56e7a7a59a301efa49110340aea8 100644 (file)
@@ -76,8 +76,18 @@ gp100_ram_init(struct nvkm_ram *ram)
        return 0;
 }
 
+static u32
+gp100_ram_probe_fbpa(struct nvkm_device *device, int fbpa)
+{
+       return nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
+}
+
 static const struct nvkm_ram_func
-gp100_ram_func = {
+gp100_ram = {
+       .upper = 0x1000000000,
+       .probe_fbp = gm107_ram_probe_fbp,
+       .probe_fbp_amount = gm200_ram_probe_fbp_amount,
+       .probe_fbpa_amount = gp100_ram_probe_fbpa,
        .init = gp100_ram_init,
        .get = gf100_ram_get,
        .put = gf100_ram_put,
@@ -87,60 +97,10 @@ int
 gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
        struct nvkm_ram *ram;
-       struct nvkm_subdev *subdev = &fb->subdev;
-       struct nvkm_device *device = subdev->device;
-       enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
-       const u32 rsvd_head = ( 256 * 1024); /* vga memory */
-       const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
-       u32 fbpa_num = nvkm_rd32(device, 0x02243c), fbpa;
-       u32 fbio_opt = nvkm_rd32(device, 0x021c14);
-       u64 part, size = 0, comm = ~0ULL;
-       bool mixed = false;
-       int ret;
-
-       nvkm_debug(subdev, "02243c: %08x\n", fbpa_num);
-       nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
-       for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
-               if (!(fbio_opt & (1 << fbpa))) {
-                       part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
-                       nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part);
-                       part = part << 20;
-                       if (part != comm) {
-                               if (comm != ~0ULL)
-                                       mixed = true;
-                               comm = min(comm, part);
-                       }
-                       size = size + part;
-               }
-       }
-
-       ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram);
-       *pram = ram;
-       if (ret)
-               return ret;
 
-       nvkm_mm_fini(&ram->vram);
+       if (!(ram = *pram = kzalloc(sizeof(*ram), GFP_KERNEL)))
+               return -ENOMEM;
 
-       if (mixed) {
-               ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
-                                  ((comm * fbpa_num) - rsvd_head) >>
-                                  NVKM_RAM_MM_SHIFT, 1);
-               if (ret)
-                       return ret;
+       return gf100_ram_ctor(&gp100_ram, fb, ram);
 
-               ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >>
-                                  NVKM_RAM_MM_SHIFT,
-                                  (size - (comm * fbpa_num) - rsvd_tail) >>
-                                  NVKM_RAM_MM_SHIFT, 1);
-               if (ret)
-                       return ret;
-       } else {
-               ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
-                                  (size - rsvd_head - rsvd_tail) >>
-                                  NVKM_RAM_MM_SHIFT, 1);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
 }