drm/amdkfd: Fix saving the ACC vgprs for Aldebaran
authorLaurent Morichetti <laurent.morichetti@amd.com>
Tue, 22 Dec 2020 19:42:46 +0000 (11:42 -0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 Mar 2021 02:56:55 +0000 (22:56 -0400)
get_num_acc_vgprs does not set status.scc if the number of acc vgprs
is 0, so use an and instruction to set the condition code.

The Aldebaran handler binary was not based on the latest version of
the sources, so this update to the binary is the minimal change only
adding two instructions to set the condition code.

A newer version of the handler should be generated and tested in
another commit.

Signed-off-by: Laurent Morichetti <laurent.morichetti@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm

index c8453dad4ea55e706a5bad9521f55307d344decd..475f89700c74f502d1ad35411326c1e378e33565 100644 (file)
@@ -1576,7 +1576,7 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
 };
 
 static const uint32_t cwsr_trap_aldebaran_hex[] = {
-       0xbf820001, 0xbf8202cd,
+       0xbf820001, 0xbf8202ce,
        0xb8f8f802, 0x89788678,
        0xb8eef801, 0x866eff6e,
        0x00000800, 0xbf840003,
@@ -1873,114 +1873,115 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
        0xb8fb2985, 0x807b817b,
        0x8e7b837b, 0xb8fa2b05,
        0x807a817a, 0x8e7a827a,
-       0x80fb7a7b, 0xbf84007a,
-       0x807bff7b, 0x00001000,
-       0xbefc0080, 0xbf11017c,
-       0x867aff78, 0x00400000,
-       0xbf850003, 0xb8faf803,
-       0x897a7aff, 0x10000000,
-       0xbf850059, 0xd3d84000,
-       0x18000100, 0xd3d84001,
-       0x18000101, 0xd3d84002,
-       0x18000102, 0xd3d84003,
-       0x18000103, 0xbe840080,
-       0xd2890000, 0x00000900,
-       0x80048104, 0xd2890001,
-       0x00000900, 0x80048104,
-       0xd2890002, 0x00000900,
-       0x80048104, 0xd2890003,
-       0x00000900, 0x80048104,
-       0xc069003a, 0x00000070,
-       0xbf8cc07f, 0x80709070,
-       0xbf06c004, 0xbf84ffee,
+       0x80fb7a7b, 0x867b7b7b,
+       0xbf84007a, 0x807bff7b,
+       0x00001000, 0xbefc0080,
+       0xbf11017c, 0x867aff78,
+       0x00400000, 0xbf850003,
+       0xb8faf803, 0x897a7aff,
+       0x10000000, 0xbf850059,
+       0xd3d84000, 0x18000100,
+       0xd3d84001, 0x18000101,
+       0xd3d84002, 0x18000102,
+       0xd3d84003, 0x18000103,
        0xbe840080, 0xd2890000,
-       0x00000901, 0x80048104,
-       0xd2890001, 0x00000901,
+       0x00000900, 0x80048104,
+       0xd2890001, 0x00000900,
        0x80048104, 0xd2890002,
-       0x00000901, 0x80048104,
-       0xd2890003, 0x00000901,
+       0x00000900, 0x80048104,
+       0xd2890003, 0x00000900,
        0x80048104, 0xc069003a,
        0x00000070, 0xbf8cc07f,
        0x80709070, 0xbf06c004,
        0xbf84ffee, 0xbe840080,
-       0xd2890000, 0x00000902,
+       0xd2890000, 0x00000901,
        0x80048104, 0xd2890001,
-       0x00000902, 0x80048104,
-       0xd2890002, 0x00000902,
+       0x00000901, 0x80048104,
+       0xd2890002, 0x00000901,
        0x80048104, 0xd2890003,
-       0x00000902, 0x80048104,
+       0x00000901, 0x80048104,
        0xc069003a, 0x00000070,
        0xbf8cc07f, 0x80709070,
        0xbf06c004, 0xbf84ffee,
        0xbe840080, 0xd2890000,
-       0x00000903, 0x80048104,
-       0xd2890001, 0x00000903,
+       0x00000902, 0x80048104,
+       0xd2890001, 0x00000902,
        0x80048104, 0xd2890002,
-       0x00000903, 0x80048104,
-       0xd2890003, 0x00000903,
+       0x00000902, 0x80048104,
+       0xd2890003, 0x00000902,
        0x80048104, 0xc069003a,
        0x00000070, 0xbf8cc07f,
        0x80709070, 0xbf06c004,
-       0xbf84ffee, 0x807c847c,
-       0xbf0a7b7c, 0xbf85ffa9,
-       0xbf9c0000, 0xbf820016,
-       0xd3d84000, 0x18000100,
-       0xd3d84001, 0x18000101,
-       0xd3d84002, 0x18000102,
-       0xd3d84003, 0x18000103,
-       0xe0724000, 0x701d0000,
-       0xe0724100, 0x701d0100,
-       0xe0724200, 0x701d0200,
-       0xe0724300, 0x701d0300,
-       0x807c847c, 0x8070ff70,
-       0x00000400, 0xbf0a7b7c,
-       0xbf85ffeb, 0xbf9c0000,
-       0xbf820100, 0xbef4007e,
-       0x8675ff7f, 0x0000ffff,
-       0x8775ff75, 0x00040000,
-       0xbef60080, 0xbef700ff,
-       0x00807fac, 0x866eff7f,
-       0x08000000, 0x8f6e836e,
-       0x87776e77, 0x866eff7f,
-       0x70000000, 0x8f6e816e,
-       0x87776e77, 0x866eff7f,
-       0x04000000, 0xbf84001f,
+       0xbf84ffee, 0xbe840080,
+       0xd2890000, 0x00000903,
+       0x80048104, 0xd2890001,
+       0x00000903, 0x80048104,
+       0xd2890002, 0x00000903,
+       0x80048104, 0xd2890003,
+       0x00000903, 0x80048104,
+       0xc069003a, 0x00000070,
+       0xbf8cc07f, 0x80709070,
+       0xbf06c004, 0xbf84ffee,
+       0x807c847c, 0xbf0a7b7c,
+       0xbf85ffa9, 0xbf9c0000,
+       0xbf820016, 0xd3d84000,
+       0x18000100, 0xd3d84001,
+       0x18000101, 0xd3d84002,
+       0x18000102, 0xd3d84003,
+       0x18000103, 0xe0724000,
+       0x701d0000, 0xe0724100,
+       0x701d0100, 0xe0724200,
+       0x701d0200, 0xe0724300,
+       0x701d0300, 0x807c847c,
+       0x8070ff70, 0x00000400,
+       0xbf0a7b7c, 0xbf85ffeb,
+       0xbf9c0000, 0xbf820101,
+       0xbef4007e, 0x8675ff7f,
+       0x0000ffff, 0x8775ff75,
+       0x00040000, 0xbef60080,
+       0xbef700ff, 0x00807fac,
+       0x866eff7f, 0x08000000,
+       0x8f6e836e, 0x87776e77,
+       0x866eff7f, 0x70000000,
+       0x8f6e816e, 0x87776e77,
+       0x866eff7f, 0x04000000,
+       0xbf84001f, 0xbefe00c1,
+       0xbeff00c1, 0xb8ef4306,
+       0x866fc16f, 0xbf84001a,
+       0x8e6f866f, 0x8e6f826f,
+       0xbef6006f, 0xb8f82985,
+       0x80788178, 0x8e788a78,
+       0x8e788178, 0xb8ee1605,
+       0x806e816e, 0x8e6e866e,
+       0x80786e78, 0x8078ff78,
+       0x00000080, 0xbef600ff,
+       0x01000000, 0xbefc0080,
+       0xe0510000, 0x781d0000,
+       0xe0510100, 0x781d0000,
+       0x807cff7c, 0x00000200,
+       0x8078ff78, 0x00000200,
+       0xbf0a6f7c, 0xbf85fff6,
        0xbefe00c1, 0xbeff00c1,
-       0xb8ef4306, 0x866fc16f,
-       0xbf84001a, 0x8e6f866f,
-       0x8e6f826f, 0xbef6006f,
-       0xb8f82985, 0x80788178,
-       0x8e788a78, 0x8e788178,
-       0xb8ee1605, 0x806e816e,
-       0x8e6e866e, 0x80786e78,
-       0x8078ff78, 0x00000080,
        0xbef600ff, 0x01000000,
-       0xbefc0080, 0xe0510000,
-       0x781d0000, 0xe0510100,
-       0x781d0000, 0x807cff7c,
-       0x00000200, 0x8078ff78,
-       0x00000200, 0xbf0a6f7c,
-       0xbf85fff6, 0xbefe00c1,
-       0xbeff00c1, 0xbef600ff,
-       0x01000000, 0xb8ef2b05,
-       0x806f816f, 0x8e6f826f,
-       0x806fff6f, 0x00008000,
-       0xbef80080, 0xbeee0078,
-       0x8078ff78, 0x00000400,
-       0xbefc0084, 0xbf11087c,
-       0xe0524000, 0x781d0000,
-       0xe0524100, 0x781d0100,
-       0xe0524200, 0x781d0200,
-       0xe0524300, 0x781d0300,
-       0xbf8c0f70, 0x7e000300,
-       0x7e020301, 0x7e040302,
-       0x7e060303, 0x807c847c,
-       0x8078ff78, 0x00000400,
-       0xbf0a6f7c, 0xbf85ffee,
-       0xb8ef2985, 0x806f816f,
-       0x8e6f836f, 0xb8f92b05,
-       0x80798179, 0x8e798279,
-       0x80ef796f, 0xbf84001a,
+       0xb8ef2b05, 0x806f816f,
+       0x8e6f826f, 0x806fff6f,
+       0x00008000, 0xbef80080,
+       0xbeee0078, 0x8078ff78,
+       0x00000400, 0xbefc0084,
+       0xbf11087c, 0xe0524000,
+       0x781d0000, 0xe0524100,
+       0x781d0100, 0xe0524200,
+       0x781d0200, 0xe0524300,
+       0x781d0300, 0xbf8c0f70,
+       0x7e000300, 0x7e020301,
+       0x7e040302, 0x7e060303,
+       0x807c847c, 0x8078ff78,
+       0x00000400, 0xbf0a6f7c,
+       0xbf85ffee, 0xb8ef2985,
+       0x806f816f, 0x8e6f836f,
+       0xb8f92b05, 0x80798179,
+       0x8e798279, 0x80ef796f,
+       0x866f6f6f, 0xbf84001a,
        0x806fff6f, 0x00008000,
        0xbefc0080, 0xbf11087c,
        0xe0524000, 0x781d0000,
index 4b3f0762a779bc327b11323e1c5f288260518cd0..eed78a04e7c77b4714e652cd71a6cbc7ce5ea9a2 100644 (file)
@@ -632,6 +632,7 @@ L_SAVE_VGPR_END:
 #if ASIC_FAMILY >= CHIP_ALDEBARAN
     // ACC VGPR count may differ from ARCH VGPR count.
     get_num_acc_vgprs(s_save_alloc_size, s_save_tmp)
+    s_and_b32       s_save_alloc_size, s_save_alloc_size, s_save_alloc_size
     s_cbranch_scc0  L_SAVE_ACCVGPR_END
     s_add_u32      s_save_alloc_size, s_save_alloc_size, 0x1000                    //add 0x1000 since we compare m0 against it later
 #endif
@@ -769,6 +770,7 @@ L_RESTORE:
 #if ASIC_FAMILY >= CHIP_ALDEBARAN
     // ACC VGPR count may differ from ARCH VGPR count.
     get_num_acc_vgprs(s_restore_alloc_size, s_restore_tmp2)
+    s_and_b32       s_restore_alloc_size, s_restore_alloc_size, s_restore_alloc_size
     s_cbranch_scc0  L_RESTORE_ACCVGPR_END
     s_add_u32      s_restore_alloc_size, s_restore_alloc_size, 0x8000                      //add 0x8000 since we compare m0 against it later
 #endif