Commit | Line | Data |
---|---|---|
4a488a7a OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
4a488a7a OG |
23 | #include <linux/bsearch.h> |
24 | #include <linux/pci.h> | |
25 | #include <linux/slab.h> | |
26 | #include "kfd_priv.h" | |
64c7f8cf | 27 | #include "kfd_device_queue_manager.h" |
507968dd | 28 | #include "kfd_pm4_headers_vi.h" |
0db54b24 | 29 | #include "cwsr_trap_handler.h" |
64d1c3a4 | 30 | #include "kfd_iommu.h" |
5b87245f | 31 | #include "amdgpu_amdkfd.h" |
2c2b0d88 | 32 | #include "kfd_smi_events.h" |
4a488a7a | 33 | |
19f6d2a6 | 34 | #define MQD_SIZE_ALIGNED 768 |
e42051d2 SL |
35 | |
36 | /* | |
37 | * kfd_locked is used to lock the kfd driver during suspend or reset | |
38 | * once locked, kfd driver will stop any further GPU execution. | |
39 | * create process (open) will return -EAGAIN. | |
40 | */ | |
41 | static atomic_t kfd_locked = ATOMIC_INIT(0); | |
19f6d2a6 | 42 | |
a3e520a2 | 43 | #ifdef CONFIG_DRM_AMDGPU_CIK |
e392c887 | 44 | extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; |
a3e520a2 | 45 | #endif |
e392c887 YZ |
46 | extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; |
47 | extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; | |
48 | extern const struct kfd2kgd_calls arcturus_kfd2kgd; | |
5073506c | 49 | extern const struct kfd2kgd_calls aldebaran_kfd2kgd; |
e392c887 | 50 | extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; |
3a2f0c81 | 51 | extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; |
e392c887 YZ |
52 | |
53 | static const struct kfd2kgd_calls *kfd2kgd_funcs[] = { | |
54 | #ifdef KFD_SUPPORT_IOMMU_V2 | |
a3e520a2 | 55 | #ifdef CONFIG_DRM_AMDGPU_CIK |
e392c887 | 56 | [CHIP_KAVERI] = &gfx_v7_kfd2kgd, |
a3e520a2 | 57 | #endif |
e392c887 YZ |
58 | [CHIP_CARRIZO] = &gfx_v8_kfd2kgd, |
59 | [CHIP_RAVEN] = &gfx_v9_kfd2kgd, | |
60 | #endif | |
a3e520a2 | 61 | #ifdef CONFIG_DRM_AMDGPU_CIK |
e392c887 | 62 | [CHIP_HAWAII] = &gfx_v7_kfd2kgd, |
a3e520a2 | 63 | #endif |
e392c887 YZ |
64 | [CHIP_TONGA] = &gfx_v8_kfd2kgd, |
65 | [CHIP_FIJI] = &gfx_v8_kfd2kgd, | |
66 | [CHIP_POLARIS10] = &gfx_v8_kfd2kgd, | |
67 | [CHIP_POLARIS11] = &gfx_v8_kfd2kgd, | |
68 | [CHIP_POLARIS12] = &gfx_v8_kfd2kgd, | |
69 | [CHIP_VEGAM] = &gfx_v8_kfd2kgd, | |
70 | [CHIP_VEGA10] = &gfx_v9_kfd2kgd, | |
71 | [CHIP_VEGA12] = &gfx_v9_kfd2kgd, | |
72 | [CHIP_VEGA20] = &gfx_v9_kfd2kgd, | |
73 | [CHIP_RENOIR] = &gfx_v9_kfd2kgd, | |
74 | [CHIP_ARCTURUS] = &arcturus_kfd2kgd, | |
5073506c | 75 | [CHIP_ALDEBARAN] = &aldebaran_kfd2kgd, |
e392c887 YZ |
76 | [CHIP_NAVI10] = &gfx_v10_kfd2kgd, |
77 | [CHIP_NAVI12] = &gfx_v10_kfd2kgd, | |
78 | [CHIP_NAVI14] = &gfx_v10_kfd2kgd, | |
3a2f0c81 | 79 | [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd, |
09759e13 | 80 | [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd, |
3a5e715d | 81 | [CHIP_VANGOGH] = &gfx_v10_3_kfd2kgd, |
8f72ce64 | 82 | [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd, |
e392c887 YZ |
83 | }; |
84 | ||
64d1c3a4 | 85 | #ifdef KFD_SUPPORT_IOMMU_V2 |
4a488a7a | 86 | static const struct kfd_device_info kaveri_device_info = { |
0da7558c | 87 | .asic_family = CHIP_KAVERI, |
c181159a | 88 | .asic_name = "kaveri", |
0da7558c | 89 | .max_pasid_bits = 16, |
992839ad YS |
90 | /* max num of queues for KV.TODO should be a dynamic value */ |
91 | .max_no_of_hqd = 24, | |
ada2b29c | 92 | .doorbell_size = 4, |
0da7558c | 93 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
f3a39818 | 94 | .event_interrupt_class = &event_interrupt_class_cik, |
fbeb661b | 95 | .num_of_watch_points = 4, |
373d7080 FK |
96 | .mqd_size_aligned = MQD_SIZE_ALIGNED, |
97 | .supports_cwsr = false, | |
64d1c3a4 | 98 | .needs_iommu_device = true, |
3ee2d00c | 99 | .needs_pci_atomics = false, |
98bb9222 | 100 | .num_sdma_engines = 2, |
1b4670f6 | 101 | .num_xgmi_sdma_engines = 0, |
d5094189 | 102 | .num_sdma_queues_per_engine = 2, |
0da7558c BG |
103 | }; |
104 | ||
105 | static const struct kfd_device_info carrizo_device_info = { | |
106 | .asic_family = CHIP_CARRIZO, | |
c181159a | 107 | .asic_name = "carrizo", |
4a488a7a | 108 | .max_pasid_bits = 16, |
eaccd6e7 OG |
109 | /* max num of queues for CZ.TODO should be a dynamic value */ |
110 | .max_no_of_hqd = 24, | |
ada2b29c | 111 | .doorbell_size = 4, |
b3f5e6b4 | 112 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
eaccd6e7 | 113 | .event_interrupt_class = &event_interrupt_class_cik, |
f7c826ad | 114 | .num_of_watch_points = 4, |
373d7080 FK |
115 | .mqd_size_aligned = MQD_SIZE_ALIGNED, |
116 | .supports_cwsr = true, | |
64d1c3a4 | 117 | .needs_iommu_device = true, |
3ee2d00c | 118 | .needs_pci_atomics = false, |
98bb9222 | 119 | .num_sdma_engines = 2, |
1b4670f6 | 120 | .num_xgmi_sdma_engines = 0, |
d5094189 | 121 | .num_sdma_queues_per_engine = 2, |
4a488a7a | 122 | }; |
6127896f | 123 | #endif |
4d663df6 YZ |
124 | |
125 | static const struct kfd_device_info raven_device_info = { | |
126 | .asic_family = CHIP_RAVEN, | |
c181159a | 127 | .asic_name = "raven", |
4d663df6 YZ |
128 | .max_pasid_bits = 16, |
129 | .max_no_of_hqd = 24, | |
130 | .doorbell_size = 8, | |
131 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
132 | .event_interrupt_class = &event_interrupt_class_v9, | |
133 | .num_of_watch_points = 4, | |
134 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
135 | .supports_cwsr = true, | |
136 | .needs_iommu_device = true, | |
137 | .needs_pci_atomics = true, | |
138 | .num_sdma_engines = 1, | |
1b4670f6 | 139 | .num_xgmi_sdma_engines = 0, |
d5094189 | 140 | .num_sdma_queues_per_engine = 2, |
4d663df6 | 141 | }; |
4a488a7a | 142 | |
a3084e6c FK |
143 | static const struct kfd_device_info hawaii_device_info = { |
144 | .asic_family = CHIP_HAWAII, | |
c181159a | 145 | .asic_name = "hawaii", |
a3084e6c FK |
146 | .max_pasid_bits = 16, |
147 | /* max num of queues for KV.TODO should be a dynamic value */ | |
148 | .max_no_of_hqd = 24, | |
ada2b29c | 149 | .doorbell_size = 4, |
a3084e6c FK |
150 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
151 | .event_interrupt_class = &event_interrupt_class_cik, | |
152 | .num_of_watch_points = 4, | |
153 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
154 | .supports_cwsr = false, | |
64d1c3a4 | 155 | .needs_iommu_device = false, |
a3084e6c | 156 | .needs_pci_atomics = false, |
98bb9222 | 157 | .num_sdma_engines = 2, |
1b4670f6 | 158 | .num_xgmi_sdma_engines = 0, |
d5094189 | 159 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
160 | }; |
161 | ||
162 | static const struct kfd_device_info tonga_device_info = { | |
163 | .asic_family = CHIP_TONGA, | |
c181159a | 164 | .asic_name = "tonga", |
a3084e6c FK |
165 | .max_pasid_bits = 16, |
166 | .max_no_of_hqd = 24, | |
ada2b29c | 167 | .doorbell_size = 4, |
a3084e6c FK |
168 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
169 | .event_interrupt_class = &event_interrupt_class_cik, | |
170 | .num_of_watch_points = 4, | |
171 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
172 | .supports_cwsr = false, | |
64d1c3a4 | 173 | .needs_iommu_device = false, |
a3084e6c | 174 | .needs_pci_atomics = true, |
98bb9222 | 175 | .num_sdma_engines = 2, |
1b4670f6 | 176 | .num_xgmi_sdma_engines = 0, |
d5094189 | 177 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
178 | }; |
179 | ||
a3084e6c FK |
180 | static const struct kfd_device_info fiji_device_info = { |
181 | .asic_family = CHIP_FIJI, | |
c181159a | 182 | .asic_name = "fiji", |
a3084e6c FK |
183 | .max_pasid_bits = 16, |
184 | .max_no_of_hqd = 24, | |
ada2b29c | 185 | .doorbell_size = 4, |
a3084e6c FK |
186 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
187 | .event_interrupt_class = &event_interrupt_class_cik, | |
188 | .num_of_watch_points = 4, | |
189 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
190 | .supports_cwsr = true, | |
64d1c3a4 | 191 | .needs_iommu_device = false, |
a3084e6c | 192 | .needs_pci_atomics = true, |
98bb9222 | 193 | .num_sdma_engines = 2, |
1b4670f6 | 194 | .num_xgmi_sdma_engines = 0, |
d5094189 | 195 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
196 | }; |
197 | ||
198 | static const struct kfd_device_info fiji_vf_device_info = { | |
199 | .asic_family = CHIP_FIJI, | |
c181159a | 200 | .asic_name = "fiji", |
a3084e6c FK |
201 | .max_pasid_bits = 16, |
202 | .max_no_of_hqd = 24, | |
ada2b29c | 203 | .doorbell_size = 4, |
a3084e6c FK |
204 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
205 | .event_interrupt_class = &event_interrupt_class_cik, | |
206 | .num_of_watch_points = 4, | |
207 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
208 | .supports_cwsr = true, | |
64d1c3a4 | 209 | .needs_iommu_device = false, |
a3084e6c | 210 | .needs_pci_atomics = false, |
98bb9222 | 211 | .num_sdma_engines = 2, |
1b4670f6 | 212 | .num_xgmi_sdma_engines = 0, |
d5094189 | 213 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
214 | }; |
215 | ||
216 | ||
217 | static const struct kfd_device_info polaris10_device_info = { | |
218 | .asic_family = CHIP_POLARIS10, | |
c181159a | 219 | .asic_name = "polaris10", |
a3084e6c FK |
220 | .max_pasid_bits = 16, |
221 | .max_no_of_hqd = 24, | |
ada2b29c | 222 | .doorbell_size = 4, |
a3084e6c FK |
223 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
224 | .event_interrupt_class = &event_interrupt_class_cik, | |
225 | .num_of_watch_points = 4, | |
226 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
227 | .supports_cwsr = true, | |
64d1c3a4 | 228 | .needs_iommu_device = false, |
a3084e6c | 229 | .needs_pci_atomics = true, |
98bb9222 | 230 | .num_sdma_engines = 2, |
1b4670f6 | 231 | .num_xgmi_sdma_engines = 0, |
d5094189 | 232 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
233 | }; |
234 | ||
235 | static const struct kfd_device_info polaris10_vf_device_info = { | |
236 | .asic_family = CHIP_POLARIS10, | |
c181159a | 237 | .asic_name = "polaris10", |
a3084e6c FK |
238 | .max_pasid_bits = 16, |
239 | .max_no_of_hqd = 24, | |
ada2b29c | 240 | .doorbell_size = 4, |
a3084e6c FK |
241 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
242 | .event_interrupt_class = &event_interrupt_class_cik, | |
243 | .num_of_watch_points = 4, | |
244 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
245 | .supports_cwsr = true, | |
64d1c3a4 | 246 | .needs_iommu_device = false, |
a3084e6c | 247 | .needs_pci_atomics = false, |
98bb9222 | 248 | .num_sdma_engines = 2, |
1b4670f6 | 249 | .num_xgmi_sdma_engines = 0, |
d5094189 | 250 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
251 | }; |
252 | ||
253 | static const struct kfd_device_info polaris11_device_info = { | |
254 | .asic_family = CHIP_POLARIS11, | |
c181159a | 255 | .asic_name = "polaris11", |
a3084e6c FK |
256 | .max_pasid_bits = 16, |
257 | .max_no_of_hqd = 24, | |
ada2b29c | 258 | .doorbell_size = 4, |
a3084e6c FK |
259 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
260 | .event_interrupt_class = &event_interrupt_class_cik, | |
261 | .num_of_watch_points = 4, | |
262 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
263 | .supports_cwsr = true, | |
64d1c3a4 | 264 | .needs_iommu_device = false, |
a3084e6c | 265 | .needs_pci_atomics = true, |
98bb9222 | 266 | .num_sdma_engines = 2, |
1b4670f6 | 267 | .num_xgmi_sdma_engines = 0, |
d5094189 | 268 | .num_sdma_queues_per_engine = 2, |
a3084e6c FK |
269 | }; |
270 | ||
846a44d7 GB |
271 | static const struct kfd_device_info polaris12_device_info = { |
272 | .asic_family = CHIP_POLARIS12, | |
c181159a | 273 | .asic_name = "polaris12", |
846a44d7 GB |
274 | .max_pasid_bits = 16, |
275 | .max_no_of_hqd = 24, | |
276 | .doorbell_size = 4, | |
277 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | |
278 | .event_interrupt_class = &event_interrupt_class_cik, | |
279 | .num_of_watch_points = 4, | |
280 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
281 | .supports_cwsr = true, | |
282 | .needs_iommu_device = false, | |
283 | .needs_pci_atomics = true, | |
284 | .num_sdma_engines = 2, | |
1b4670f6 | 285 | .num_xgmi_sdma_engines = 0, |
846a44d7 GB |
286 | .num_sdma_queues_per_engine = 2, |
287 | }; | |
288 | ||
ed81cd6e KR |
289 | static const struct kfd_device_info vegam_device_info = { |
290 | .asic_family = CHIP_VEGAM, | |
c181159a | 291 | .asic_name = "vegam", |
ed81cd6e KR |
292 | .max_pasid_bits = 16, |
293 | .max_no_of_hqd = 24, | |
294 | .doorbell_size = 4, | |
295 | .ih_ring_entry_size = 4 * sizeof(uint32_t), | |
296 | .event_interrupt_class = &event_interrupt_class_cik, | |
297 | .num_of_watch_points = 4, | |
298 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
299 | .supports_cwsr = true, | |
300 | .needs_iommu_device = false, | |
301 | .needs_pci_atomics = true, | |
302 | .num_sdma_engines = 2, | |
303 | .num_xgmi_sdma_engines = 0, | |
846a44d7 GB |
304 | .num_sdma_queues_per_engine = 2, |
305 | }; | |
306 | ||
389056e5 FK |
307 | static const struct kfd_device_info vega10_device_info = { |
308 | .asic_family = CHIP_VEGA10, | |
c181159a | 309 | .asic_name = "vega10", |
389056e5 FK |
310 | .max_pasid_bits = 16, |
311 | .max_no_of_hqd = 24, | |
312 | .doorbell_size = 8, | |
313 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
314 | .event_interrupt_class = &event_interrupt_class_v9, | |
315 | .num_of_watch_points = 4, | |
316 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
317 | .supports_cwsr = true, | |
318 | .needs_iommu_device = false, | |
319 | .needs_pci_atomics = false, | |
98bb9222 | 320 | .num_sdma_engines = 2, |
1b4670f6 | 321 | .num_xgmi_sdma_engines = 0, |
d5094189 | 322 | .num_sdma_queues_per_engine = 2, |
389056e5 FK |
323 | }; |
324 | ||
325 | static const struct kfd_device_info vega10_vf_device_info = { | |
326 | .asic_family = CHIP_VEGA10, | |
c181159a | 327 | .asic_name = "vega10", |
389056e5 FK |
328 | .max_pasid_bits = 16, |
329 | .max_no_of_hqd = 24, | |
330 | .doorbell_size = 8, | |
331 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
332 | .event_interrupt_class = &event_interrupt_class_v9, | |
333 | .num_of_watch_points = 4, | |
334 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
335 | .supports_cwsr = true, | |
336 | .needs_iommu_device = false, | |
337 | .needs_pci_atomics = false, | |
98bb9222 | 338 | .num_sdma_engines = 2, |
1b4670f6 | 339 | .num_xgmi_sdma_engines = 0, |
d5094189 | 340 | .num_sdma_queues_per_engine = 2, |
389056e5 FK |
341 | }; |
342 | ||
846a44d7 GB |
343 | static const struct kfd_device_info vega12_device_info = { |
344 | .asic_family = CHIP_VEGA12, | |
c181159a | 345 | .asic_name = "vega12", |
846a44d7 GB |
346 | .max_pasid_bits = 16, |
347 | .max_no_of_hqd = 24, | |
348 | .doorbell_size = 8, | |
349 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
350 | .event_interrupt_class = &event_interrupt_class_v9, | |
351 | .num_of_watch_points = 4, | |
352 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
353 | .supports_cwsr = true, | |
354 | .needs_iommu_device = false, | |
355 | .needs_pci_atomics = false, | |
356 | .num_sdma_engines = 2, | |
1b4670f6 | 357 | .num_xgmi_sdma_engines = 0, |
846a44d7 GB |
358 | .num_sdma_queues_per_engine = 2, |
359 | }; | |
360 | ||
22a3a294 SL |
361 | static const struct kfd_device_info vega20_device_info = { |
362 | .asic_family = CHIP_VEGA20, | |
c181159a | 363 | .asic_name = "vega20", |
22a3a294 SL |
364 | .max_pasid_bits = 16, |
365 | .max_no_of_hqd = 24, | |
366 | .doorbell_size = 8, | |
367 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
368 | .event_interrupt_class = &event_interrupt_class_v9, | |
369 | .num_of_watch_points = 4, | |
370 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
371 | .supports_cwsr = true, | |
372 | .needs_iommu_device = false, | |
006a0b3d | 373 | .needs_pci_atomics = false, |
22a3a294 | 374 | .num_sdma_engines = 2, |
1b4670f6 | 375 | .num_xgmi_sdma_engines = 0, |
22a3a294 SL |
376 | .num_sdma_queues_per_engine = 8, |
377 | }; | |
378 | ||
49adcf8a YZ |
379 | static const struct kfd_device_info arcturus_device_info = { |
380 | .asic_family = CHIP_ARCTURUS, | |
c181159a | 381 | .asic_name = "arcturus", |
49adcf8a YZ |
382 | .max_pasid_bits = 16, |
383 | .max_no_of_hqd = 24, | |
384 | .doorbell_size = 8, | |
385 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
386 | .event_interrupt_class = &event_interrupt_class_v9, | |
387 | .num_of_watch_points = 4, | |
388 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
389 | .supports_cwsr = true, | |
390 | .needs_iommu_device = false, | |
391 | .needs_pci_atomics = false, | |
b6689cf7 OZ |
392 | .num_sdma_engines = 2, |
393 | .num_xgmi_sdma_engines = 6, | |
49adcf8a YZ |
394 | .num_sdma_queues_per_engine = 8, |
395 | }; | |
396 | ||
36e22d59 YZ |
397 | static const struct kfd_device_info aldebaran_device_info = { |
398 | .asic_family = CHIP_ALDEBARAN, | |
399 | .asic_name = "aldebaran", | |
400 | .max_pasid_bits = 16, | |
401 | .max_no_of_hqd = 24, | |
402 | .doorbell_size = 8, | |
403 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
404 | .event_interrupt_class = &event_interrupt_class_v9, | |
405 | .num_of_watch_points = 4, | |
406 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
407 | .supports_cwsr = true, | |
408 | .needs_iommu_device = false, | |
409 | .needs_pci_atomics = false, | |
410 | .num_sdma_engines = 2, | |
411 | .num_xgmi_sdma_engines = 3, | |
412 | .num_sdma_queues_per_engine = 8, | |
413 | }; | |
414 | ||
2b9c2211 HR |
415 | static const struct kfd_device_info renoir_device_info = { |
416 | .asic_family = CHIP_RENOIR, | |
acb9acbe | 417 | .asic_name = "renoir", |
2b9c2211 HR |
418 | .max_pasid_bits = 16, |
419 | .max_no_of_hqd = 24, | |
420 | .doorbell_size = 8, | |
421 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
422 | .event_interrupt_class = &event_interrupt_class_v9, | |
423 | .num_of_watch_points = 4, | |
424 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
425 | .supports_cwsr = true, | |
426 | .needs_iommu_device = false, | |
427 | .needs_pci_atomics = false, | |
428 | .num_sdma_engines = 1, | |
429 | .num_xgmi_sdma_engines = 0, | |
430 | .num_sdma_queues_per_engine = 2, | |
431 | }; | |
432 | ||
14328aa5 PC |
433 | static const struct kfd_device_info navi10_device_info = { |
434 | .asic_family = CHIP_NAVI10, | |
c181159a | 435 | .asic_name = "navi10", |
14328aa5 PC |
436 | .max_pasid_bits = 16, |
437 | .max_no_of_hqd = 24, | |
438 | .doorbell_size = 8, | |
439 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
440 | .event_interrupt_class = &event_interrupt_class_v9, | |
441 | .num_of_watch_points = 4, | |
442 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
443 | .needs_iommu_device = false, | |
444 | .supports_cwsr = true, | |
6cc980e3 | 445 | .needs_pci_atomics = true, |
14328aa5 PC |
446 | .num_sdma_engines = 2, |
447 | .num_xgmi_sdma_engines = 0, | |
448 | .num_sdma_queues_per_engine = 8, | |
449 | }; | |
450 | ||
b77fb9d8 | 451 | static const struct kfd_device_info navi12_device_info = { |
0e94b564 | 452 | .asic_family = CHIP_NAVI12, |
b77fb9d8 | 453 | .asic_name = "navi12", |
454 | .max_pasid_bits = 16, | |
455 | .max_no_of_hqd = 24, | |
456 | .doorbell_size = 8, | |
457 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
458 | .event_interrupt_class = &event_interrupt_class_v9, | |
459 | .num_of_watch_points = 4, | |
460 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
461 | .needs_iommu_device = false, | |
462 | .supports_cwsr = true, | |
6cc980e3 | 463 | .needs_pci_atomics = true, |
b77fb9d8 | 464 | .num_sdma_engines = 2, |
465 | .num_xgmi_sdma_engines = 0, | |
466 | .num_sdma_queues_per_engine = 8, | |
467 | }; | |
468 | ||
8099ae40 YZ |
469 | static const struct kfd_device_info navi14_device_info = { |
470 | .asic_family = CHIP_NAVI14, | |
471 | .asic_name = "navi14", | |
472 | .max_pasid_bits = 16, | |
473 | .max_no_of_hqd = 24, | |
474 | .doorbell_size = 8, | |
475 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
476 | .event_interrupt_class = &event_interrupt_class_v9, | |
477 | .num_of_watch_points = 4, | |
478 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
479 | .needs_iommu_device = false, | |
480 | .supports_cwsr = true, | |
6cc980e3 | 481 | .needs_pci_atomics = true, |
8099ae40 YZ |
482 | .num_sdma_engines = 2, |
483 | .num_xgmi_sdma_engines = 0, | |
484 | .num_sdma_queues_per_engine = 8, | |
485 | }; | |
486 | ||
3a2f0c81 YZ |
487 | static const struct kfd_device_info sienna_cichlid_device_info = { |
488 | .asic_family = CHIP_SIENNA_CICHLID, | |
489 | .asic_name = "sienna_cichlid", | |
490 | .max_pasid_bits = 16, | |
491 | .max_no_of_hqd = 24, | |
492 | .doorbell_size = 8, | |
493 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
494 | .event_interrupt_class = &event_interrupt_class_v9, | |
495 | .num_of_watch_points = 4, | |
496 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
497 | .needs_iommu_device = false, | |
498 | .supports_cwsr = true, | |
6cc980e3 | 499 | .needs_pci_atomics = true, |
3a2f0c81 YZ |
500 | .num_sdma_engines = 4, |
501 | .num_xgmi_sdma_engines = 0, | |
502 | .num_sdma_queues_per_engine = 8, | |
503 | }; | |
504 | ||
de89b2e4 CG |
505 | static const struct kfd_device_info navy_flounder_device_info = { |
506 | .asic_family = CHIP_NAVY_FLOUNDER, | |
507 | .asic_name = "navy_flounder", | |
508 | .max_pasid_bits = 16, | |
509 | .max_no_of_hqd = 24, | |
510 | .doorbell_size = 8, | |
511 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
512 | .event_interrupt_class = &event_interrupt_class_v9, | |
513 | .num_of_watch_points = 4, | |
514 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
515 | .needs_iommu_device = false, | |
516 | .supports_cwsr = true, | |
6cc980e3 | 517 | .needs_pci_atomics = true, |
de89b2e4 CG |
518 | .num_sdma_engines = 2, |
519 | .num_xgmi_sdma_engines = 0, | |
520 | .num_sdma_queues_per_engine = 8, | |
521 | }; | |
522 | ||
3a5e715d HR |
523 | static const struct kfd_device_info vangogh_device_info = { |
524 | .asic_family = CHIP_VANGOGH, | |
525 | .asic_name = "vangogh", | |
526 | .max_pasid_bits = 16, | |
527 | .max_no_of_hqd = 24, | |
528 | .doorbell_size = 8, | |
529 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
530 | .event_interrupt_class = &event_interrupt_class_v9, | |
531 | .num_of_watch_points = 4, | |
532 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
533 | .needs_iommu_device = false, | |
534 | .supports_cwsr = true, | |
535 | .needs_pci_atomics = false, | |
536 | .num_sdma_engines = 1, | |
537 | .num_xgmi_sdma_engines = 0, | |
538 | .num_sdma_queues_per_engine = 2, | |
539 | }; | |
540 | ||
eb5a34d4 CG |
541 | static const struct kfd_device_info dimgrey_cavefish_device_info = { |
542 | .asic_family = CHIP_DIMGREY_CAVEFISH, | |
543 | .asic_name = "dimgrey_cavefish", | |
544 | .max_pasid_bits = 16, | |
545 | .max_no_of_hqd = 24, | |
546 | .doorbell_size = 8, | |
547 | .ih_ring_entry_size = 8 * sizeof(uint32_t), | |
548 | .event_interrupt_class = &event_interrupt_class_v9, | |
549 | .num_of_watch_points = 4, | |
550 | .mqd_size_aligned = MQD_SIZE_ALIGNED, | |
551 | .needs_iommu_device = false, | |
552 | .supports_cwsr = true, | |
6cc980e3 | 553 | .needs_pci_atomics = true, |
eb5a34d4 CG |
554 | .num_sdma_engines = 2, |
555 | .num_xgmi_sdma_engines = 0, | |
556 | .num_sdma_queues_per_engine = 8, | |
557 | }; | |
558 | ||
559 | ||
050091ab YZ |
560 | /* For each entry, [0] is regular and [1] is virtualisation device. */ |
561 | static const struct kfd_device_info *kfd_supported_devices[][2] = { | |
95a5bd1b | 562 | #ifdef KFD_SUPPORT_IOMMU_V2 |
050091ab | 563 | [CHIP_KAVERI] = {&kaveri_device_info, NULL}, |
95a5bd1b | 564 | [CHIP_CARRIZO] = {&carrizo_device_info, NULL}, |
95a5bd1b | 565 | #endif |
2b3bbf23 | 566 | [CHIP_RAVEN] = {&raven_device_info, NULL}, |
050091ab YZ |
567 | [CHIP_HAWAII] = {&hawaii_device_info, NULL}, |
568 | [CHIP_TONGA] = {&tonga_device_info, NULL}, | |
569 | [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info}, | |
050091ab YZ |
570 | [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info}, |
571 | [CHIP_POLARIS11] = {&polaris11_device_info, NULL}, | |
572 | [CHIP_POLARIS12] = {&polaris12_device_info, NULL}, | |
573 | [CHIP_VEGAM] = {&vegam_device_info, NULL}, | |
574 | [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info}, | |
575 | [CHIP_VEGA12] = {&vega12_device_info, NULL}, | |
576 | [CHIP_VEGA20] = {&vega20_device_info, NULL}, | |
2b9c2211 | 577 | [CHIP_RENOIR] = {&renoir_device_info, NULL}, |
050091ab | 578 | [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info}, |
36e22d59 | 579 | [CHIP_ALDEBARAN] = {&aldebaran_device_info, NULL}, |
050091ab | 580 | [CHIP_NAVI10] = {&navi10_device_info, NULL}, |
b77fb9d8 | 581 | [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info}, |
8099ae40 | 582 | [CHIP_NAVI14] = {&navi14_device_info, NULL}, |
adab4dad | 583 | [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info}, |
de89b2e4 | 584 | [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info}, |
3a5e715d | 585 | [CHIP_VANGOGH] = {&vangogh_device_info, NULL}, |
eb5a34d4 | 586 | [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info}, |
4a488a7a OG |
587 | }; |
588 | ||
6e81090b OG |
589 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, |
590 | unsigned int chunk_size); | |
591 | static void kfd_gtt_sa_fini(struct kfd_dev *kfd); | |
592 | ||
b8935a7c YZ |
593 | static int kfd_resume(struct kfd_dev *kfd); |
594 | ||
050091ab | 595 | struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, |
e392c887 | 596 | struct pci_dev *pdev, unsigned int asic_type, bool vf) |
4a488a7a | 597 | { |
050091ab YZ |
598 | struct kfd_dev *kfd; |
599 | const struct kfd_device_info *device_info; | |
e392c887 | 600 | const struct kfd2kgd_calls *f2g; |
4a488a7a | 601 | |
e392c887 YZ |
602 | if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2) |
603 | || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) { | |
050091ab YZ |
604 | dev_err(kfd_device, "asic_type %d out of range\n", asic_type); |
605 | return NULL; /* asic_type out of range */ | |
4a488a7a OG |
606 | } |
607 | ||
050091ab | 608 | device_info = kfd_supported_devices[asic_type][vf]; |
e392c887 | 609 | f2g = kfd2kgd_funcs[asic_type]; |
4a488a7a | 610 | |
aa5e899d | 611 | if (!device_info || !f2g) { |
050091ab YZ |
612 | dev_err(kfd_device, "%s %s not supported in kfd\n", |
613 | amdgpu_asic_name[asic_type], vf ? "VF" : ""); | |
4a488a7a | 614 | return NULL; |
4ebc7182 | 615 | } |
4a488a7a | 616 | |
d35f00d8 EH |
617 | kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); |
618 | if (!kfd) | |
619 | return NULL; | |
620 | ||
6106dce9 | 621 | /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. |
622 | * 32 and 64-bit requests are possible and must be | |
623 | * supported. | |
624 | */ | |
aabf3a95 JX |
625 | kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); |
626 | if (device_info->needs_pci_atomics && | |
627 | !kfd->pci_atomic_requested) { | |
6106dce9 | 628 | dev_info(kfd_device, |
629 | "skipped device %x:%x, PCI rejects atomics\n", | |
630 | pdev->vendor, pdev->device); | |
d35f00d8 | 631 | kfree(kfd); |
6106dce9 | 632 | return NULL; |
aabf3a95 | 633 | } |
4a488a7a OG |
634 | |
635 | kfd->kgd = kgd; | |
636 | kfd->device_info = device_info; | |
637 | kfd->pdev = pdev; | |
19f6d2a6 | 638 | kfd->init_complete = false; |
cea405b1 | 639 | kfd->kfd2kgd = f2g; |
43d8107f | 640 | atomic_set(&kfd->compute_profile, 0); |
cea405b1 XZ |
641 | |
642 | mutex_init(&kfd->doorbell_mutex); | |
643 | memset(&kfd->doorbell_available_index, 0, | |
644 | sizeof(kfd->doorbell_available_index)); | |
4a488a7a | 645 | |
9b54d201 EH |
646 | atomic_set(&kfd->sram_ecc_flag, 0); |
647 | ||
59d7115d MJ |
648 | ida_init(&kfd->doorbell_ida); |
649 | ||
4a488a7a OG |
650 | return kfd; |
651 | } | |
652 | ||
373d7080 FK |
653 | static void kfd_cwsr_init(struct kfd_dev *kfd) |
654 | { | |
655 | if (cwsr_enable && kfd->device_info->supports_cwsr) { | |
3e76c239 FK |
656 | if (kfd->device_info->asic_family < CHIP_VEGA10) { |
657 | BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); | |
658 | kfd->cwsr_isa = cwsr_trap_gfx8_hex; | |
659 | kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); | |
0ef6845c | 660 | } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { |
3baa24f0 OZ |
661 | BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); |
662 | kfd->cwsr_isa = cwsr_trap_arcturus_hex; | |
663 | kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); | |
0ef6845c JC |
664 | } else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { |
665 | BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); | |
666 | kfd->cwsr_isa = cwsr_trap_aldebaran_hex; | |
667 | kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); | |
14328aa5 | 668 | } else if (kfd->device_info->asic_family < CHIP_NAVI10) { |
3e76c239 FK |
669 | BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); |
670 | kfd->cwsr_isa = cwsr_trap_gfx9_hex; | |
671 | kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); | |
80b6cfed JC |
672 | } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { |
673 | BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); | |
674 | kfd->cwsr_isa = cwsr_trap_nv1x_hex; | |
675 | kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); | |
14328aa5 PC |
676 | } else { |
677 | BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); | |
678 | kfd->cwsr_isa = cwsr_trap_gfx10_hex; | |
679 | kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); | |
3e76c239 | 680 | } |
373d7080 | 681 | |
373d7080 FK |
682 | kfd->cwsr_enabled = true; |
683 | } | |
684 | } | |
685 | ||
29633d0e JG |
686 | static int kfd_gws_init(struct kfd_dev *kfd) |
687 | { | |
688 | int ret = 0; | |
689 | ||
690 | if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) | |
691 | return 0; | |
692 | ||
693 | if (hws_gws_support | |
fea7d919 JG |
694 | || (kfd->device_info->asic_family == CHIP_VEGA10 |
695 | && kfd->mec2_fw_version >= 0x81b3) | |
696 | || (kfd->device_info->asic_family >= CHIP_VEGA12 | |
29633d0e | 697 | && kfd->device_info->asic_family <= CHIP_RAVEN |
fea7d919 JG |
698 | && kfd->mec2_fw_version >= 0x1b3) |
699 | || (kfd->device_info->asic_family == CHIP_ARCTURUS | |
700 | && kfd->mec2_fw_version >= 0x30)) | |
29633d0e JG |
701 | ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, |
702 | amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); | |
703 | ||
704 | return ret; | |
705 | } | |
706 | ||
938a0650 AL |
707 | static void kfd_smi_init(struct kfd_dev *dev) { |
708 | INIT_LIST_HEAD(&dev->smi_clients); | |
709 | spin_lock_init(&dev->smi_lock); | |
710 | } | |
711 | ||
4a488a7a | 712 | bool kgd2kfd_device_init(struct kfd_dev *kfd, |
3a0c3423 | 713 | struct drm_device *ddev, |
4a488a7a OG |
714 | const struct kgd2kfd_shared_resources *gpu_resources) |
715 | { | |
19f6d2a6 OG |
716 | unsigned int size; |
717 | ||
3a0c3423 | 718 | kfd->ddev = ddev; |
0da8b10e | 719 | kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, |
5ade6c9c | 720 | KGD_ENGINE_MEC1); |
29633d0e JG |
721 | kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, |
722 | KGD_ENGINE_MEC2); | |
0da8b10e | 723 | kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, |
5ade6c9c | 724 | KGD_ENGINE_SDMA1); |
4a488a7a OG |
725 | kfd->shared_resources = *gpu_resources; |
726 | ||
44008d7a YZ |
727 | kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; |
728 | kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; | |
729 | kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd | |
730 | - kfd->vm_info.first_vmid_kfd + 1; | |
731 | ||
a99c6d4f FK |
732 | /* Verify module parameters regarding mapped process number*/ |
733 | if ((hws_max_conc_proc < 0) | |
734 | || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { | |
735 | dev_err(kfd_device, | |
736 | "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", | |
737 | hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, | |
738 | kfd->vm_info.vmid_num_kfd); | |
739 | kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; | |
740 | } else | |
741 | kfd->max_proc_per_quantum = hws_max_conc_proc; | |
742 | ||
19f6d2a6 | 743 | /* calculate max size of mqds needed for queues */ |
b8cbab04 OG |
744 | size = max_num_of_queues_per_device * |
745 | kfd->device_info->mqd_size_aligned; | |
19f6d2a6 | 746 | |
e18e794e OG |
747 | /* |
748 | * calculate max size of runlist packet. | |
749 | * There can be only 2 packets at once | |
750 | */ | |
507968dd FK |
751 | size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) + |
752 | max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) | |
753 | + sizeof(struct pm4_mes_runlist)) * 2; | |
e18e794e OG |
754 | |
755 | /* Add size of HIQ & DIQ */ | |
756 | size += KFD_KERNEL_QUEUE_SIZE * 2; | |
757 | ||
758 | /* add another 512KB for all other allocations on gart (HPD, fences) */ | |
19f6d2a6 OG |
759 | size += 512 * 1024; |
760 | ||
7cd52c91 | 761 | if (amdgpu_amdkfd_alloc_gtt_mem( |
cea405b1 | 762 | kfd->kgd, size, &kfd->gtt_mem, |
15426dbb YZ |
763 | &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, |
764 | false)) { | |
79775b62 | 765 | dev_err(kfd_device, "Could not allocate %d bytes\n", size); |
e09d4fc8 | 766 | goto alloc_gtt_mem_failure; |
19f6d2a6 OG |
767 | } |
768 | ||
79775b62 | 769 | dev_info(kfd_device, "Allocated %d bytes on gart\n", size); |
e18e794e | 770 | |
73a1da0b OG |
771 | /* Initialize GTT sa with 512 byte chunk size */ |
772 | if (kfd_gtt_sa_init(kfd, size, 512) != 0) { | |
79775b62 | 773 | dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); |
73a1da0b OG |
774 | goto kfd_gtt_sa_init_error; |
775 | } | |
776 | ||
735df2ba FK |
777 | if (kfd_doorbell_init(kfd)) { |
778 | dev_err(kfd_device, | |
779 | "Error initializing doorbell aperture\n"); | |
780 | goto kfd_doorbell_error; | |
781 | } | |
19f6d2a6 | 782 | |
332f6e1e | 783 | kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); |
0c1690e3 | 784 | |
9b498efa AD |
785 | kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); |
786 | ||
2249d558 | 787 | if (kfd_interrupt_init(kfd)) { |
79775b62 | 788 | dev_err(kfd_device, "Error initializing interrupts\n"); |
2249d558 AL |
789 | goto kfd_interrupt_error; |
790 | } | |
791 | ||
64c7f8cf BG |
792 | kfd->dqm = device_queue_manager_init(kfd); |
793 | if (!kfd->dqm) { | |
79775b62 | 794 | dev_err(kfd_device, "Error initializing queue manager\n"); |
64c7f8cf BG |
795 | goto device_queue_manager_error; |
796 | } | |
797 | ||
29633d0e JG |
798 | /* If supported on this device, allocate global GWS that is shared |
799 | * by all KFD processes | |
800 | */ | |
801 | if (kfd_gws_init(kfd)) { | |
802 | dev_err(kfd_device, "Could not allocate %d gws\n", | |
803 | amdgpu_amdkfd_get_num_gws(kfd->kgd)); | |
804 | goto gws_error; | |
805 | } | |
806 | ||
6127896f HR |
807 | /* If CRAT is broken, won't set iommu enabled */ |
808 | kfd_double_confirm_iommu_support(kfd); | |
809 | ||
64d1c3a4 FK |
810 | if (kfd_iommu_device_init(kfd)) { |
811 | dev_err(kfd_device, "Error initializing iommuv2\n"); | |
812 | goto device_iommu_error; | |
64c7f8cf BG |
813 | } |
814 | ||
373d7080 FK |
815 | kfd_cwsr_init(kfd); |
816 | ||
b8935a7c YZ |
817 | if (kfd_resume(kfd)) |
818 | goto kfd_resume_error; | |
819 | ||
fbeb661b YS |
820 | kfd->dbgmgr = NULL; |
821 | ||
465ab9e0 OZ |
822 | if (kfd_topology_add_device(kfd)) { |
823 | dev_err(kfd_device, "Error adding device to topology\n"); | |
824 | goto kfd_topology_add_device_error; | |
825 | } | |
826 | ||
938a0650 AL |
827 | kfd_smi_init(kfd); |
828 | ||
4a488a7a | 829 | kfd->init_complete = true; |
79775b62 | 830 | dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, |
4a488a7a OG |
831 | kfd->pdev->device); |
832 | ||
79775b62 | 833 | pr_debug("Starting kfd with the following scheduling policy %d\n", |
d146c5a7 | 834 | kfd->dqm->sched_policy); |
64c7f8cf | 835 | |
19f6d2a6 OG |
836 | goto out; |
837 | ||
465ab9e0 | 838 | kfd_topology_add_device_error: |
b8935a7c | 839 | kfd_resume_error: |
64d1c3a4 | 840 | device_iommu_error: |
29633d0e | 841 | gws_error: |
64c7f8cf BG |
842 | device_queue_manager_uninit(kfd->dqm); |
843 | device_queue_manager_error: | |
2249d558 AL |
844 | kfd_interrupt_exit(kfd); |
845 | kfd_interrupt_error: | |
735df2ba FK |
846 | kfd_doorbell_fini(kfd); |
847 | kfd_doorbell_error: | |
73a1da0b OG |
848 | kfd_gtt_sa_fini(kfd); |
849 | kfd_gtt_sa_init_error: | |
7cd52c91 | 850 | amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); |
e09d4fc8 | 851 | alloc_gtt_mem_failure: |
29633d0e | 852 | if (kfd->gws) |
e09d4fc8 | 853 | amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); |
19f6d2a6 | 854 | dev_err(kfd_device, |
79775b62 | 855 | "device %x:%x NOT added due to errors\n", |
19f6d2a6 OG |
856 | kfd->pdev->vendor, kfd->pdev->device); |
857 | out: | |
858 | return kfd->init_complete; | |
4a488a7a OG |
859 | } |
860 | ||
861 | void kgd2kfd_device_exit(struct kfd_dev *kfd) | |
862 | { | |
b17f068a | 863 | if (kfd->init_complete) { |
9593f4d6 | 864 | kgd2kfd_suspend(kfd, false); |
64c7f8cf | 865 | device_queue_manager_uninit(kfd->dqm); |
2249d558 | 866 | kfd_interrupt_exit(kfd); |
b17f068a | 867 | kfd_topology_remove_device(kfd); |
735df2ba | 868 | kfd_doorbell_fini(kfd); |
59d7115d | 869 | ida_destroy(&kfd->doorbell_ida); |
73a1da0b | 870 | kfd_gtt_sa_fini(kfd); |
7cd52c91 | 871 | amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); |
29633d0e | 872 | if (kfd->gws) |
e09d4fc8 | 873 | amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); |
b17f068a | 874 | } |
5b5c4e40 | 875 | |
4a488a7a OG |
876 | kfree(kfd); |
877 | } | |
878 | ||
e3b7a967 SL |
879 | int kgd2kfd_pre_reset(struct kfd_dev *kfd) |
880 | { | |
e42051d2 SL |
881 | if (!kfd->init_complete) |
882 | return 0; | |
09c34e8d | 883 | |
55977744 MJ |
884 | kfd_smi_event_update_gpu_reset(kfd, false); |
885 | ||
09c34e8d FK |
886 | kfd->dqm->ops.pre_reset(kfd->dqm); |
887 | ||
9593f4d6 | 888 | kgd2kfd_suspend(kfd, false); |
e42051d2 | 889 | |
e42051d2 | 890 | kfd_signal_reset_event(kfd); |
e3b7a967 SL |
891 | return 0; |
892 | } | |
893 | ||
e42051d2 SL |
894 | /* |
895 | * Fix me. KFD won't be able to resume existing process for now. | |
896 | * We will keep all existing process in a evicted state and | |
897 | * wait the process to be terminated. | |
898 | */ | |
899 | ||
e3b7a967 SL |
900 | int kgd2kfd_post_reset(struct kfd_dev *kfd) |
901 | { | |
a1bd079f | 902 | int ret; |
e42051d2 SL |
903 | |
904 | if (!kfd->init_complete) | |
905 | return 0; | |
906 | ||
e42051d2 SL |
907 | ret = kfd_resume(kfd); |
908 | if (ret) | |
909 | return ret; | |
a1bd079f | 910 | atomic_dec(&kfd_locked); |
9b54d201 EH |
911 | |
912 | atomic_set(&kfd->sram_ecc_flag, 0); | |
913 | ||
55977744 MJ |
914 | kfd_smi_event_update_gpu_reset(kfd, true); |
915 | ||
e3b7a967 SL |
916 | return 0; |
917 | } | |
918 | ||
e42051d2 SL |
919 | bool kfd_is_locked(void) |
920 | { | |
921 | return (atomic_read(&kfd_locked) > 0); | |
922 | } | |
923 | ||
9593f4d6 | 924 | void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) |
4a488a7a | 925 | { |
733fa1f7 YZ |
926 | if (!kfd->init_complete) |
927 | return; | |
928 | ||
9593f4d6 RB |
929 | /* for runtime suspend, skip locking kfd */ |
930 | if (!run_pm) { | |
931 | /* For first KFD device suspend all the KFD processes */ | |
932 | if (atomic_inc_return(&kfd_locked) == 1) | |
933 | kfd_suspend_all_processes(); | |
934 | } | |
26103436 | 935 | |
733fa1f7 | 936 | kfd->dqm->ops.stop(kfd->dqm); |
64d1c3a4 | 937 | kfd_iommu_suspend(kfd); |
4a488a7a OG |
938 | } |
939 | ||
9593f4d6 | 940 | int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) |
4a488a7a | 941 | { |
26103436 FK |
942 | int ret, count; |
943 | ||
b8935a7c YZ |
944 | if (!kfd->init_complete) |
945 | return 0; | |
b17f068a | 946 | |
26103436 FK |
947 | ret = kfd_resume(kfd); |
948 | if (ret) | |
949 | return ret; | |
950 | ||
9593f4d6 RB |
951 | /* for runtime resume, skip unlocking kfd */ |
952 | if (!run_pm) { | |
953 | count = atomic_dec_return(&kfd_locked); | |
954 | WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); | |
955 | if (count == 0) | |
956 | ret = kfd_resume_all_processes(); | |
957 | } | |
b17f068a | 958 | |
26103436 | 959 | return ret; |
b8935a7c YZ |
960 | } |
961 | ||
962 | static int kfd_resume(struct kfd_dev *kfd) | |
963 | { | |
964 | int err = 0; | |
b8935a7c | 965 | |
64d1c3a4 FK |
966 | err = kfd_iommu_resume(kfd); |
967 | if (err) { | |
968 | dev_err(kfd_device, | |
969 | "Failed to resume IOMMU for device %x:%x\n", | |
970 | kfd->pdev->vendor, kfd->pdev->device); | |
971 | return err; | |
972 | } | |
733fa1f7 | 973 | |
b8935a7c YZ |
974 | err = kfd->dqm->ops.start(kfd->dqm); |
975 | if (err) { | |
976 | dev_err(kfd_device, | |
977 | "Error starting queue manager for device %x:%x\n", | |
978 | kfd->pdev->vendor, kfd->pdev->device); | |
979 | goto dqm_start_error; | |
b17f068a OG |
980 | } |
981 | ||
b8935a7c YZ |
982 | return err; |
983 | ||
984 | dqm_start_error: | |
64d1c3a4 | 985 | kfd_iommu_suspend(kfd); |
b8935a7c | 986 | return err; |
4a488a7a OG |
987 | } |
988 | ||
b3eca59d PY |
989 | static inline void kfd_queue_work(struct workqueue_struct *wq, |
990 | struct work_struct *work) | |
991 | { | |
992 | int cpu, new_cpu; | |
993 | ||
994 | cpu = new_cpu = smp_processor_id(); | |
995 | do { | |
996 | new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; | |
997 | if (cpu_to_node(new_cpu) == numa_node_id()) | |
998 | break; | |
999 | } while (cpu != new_cpu); | |
1000 | ||
1001 | queue_work_on(new_cpu, wq, work); | |
1002 | } | |
1003 | ||
b3f5e6b4 AL |
1004 | /* This is called directly from KGD at ISR. */ |
1005 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | |
4a488a7a | 1006 | { |
58e69886 LX |
1007 | uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; |
1008 | bool is_patched = false; | |
2383a767 | 1009 | unsigned long flags; |
58e69886 | 1010 | |
2249d558 AL |
1011 | if (!kfd->init_complete) |
1012 | return; | |
1013 | ||
58e69886 LX |
1014 | if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { |
1015 | dev_err_once(kfd_device, "Ring entry too small\n"); | |
1016 | return; | |
1017 | } | |
1018 | ||
2383a767 | 1019 | spin_lock_irqsave(&kfd->interrupt_lock, flags); |
2249d558 AL |
1020 | |
1021 | if (kfd->interrupts_active | |
58e69886 LX |
1022 | && interrupt_is_wanted(kfd, ih_ring_entry, |
1023 | patched_ihre, &is_patched) | |
1024 | && enqueue_ih_ring_entry(kfd, | |
1025 | is_patched ? patched_ihre : ih_ring_entry)) | |
b3eca59d | 1026 | kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); |
2249d558 | 1027 | |
2383a767 | 1028 | spin_unlock_irqrestore(&kfd->interrupt_lock, flags); |
4a488a7a | 1029 | } |
6e81090b | 1030 | |
6b95e797 FK |
1031 | int kgd2kfd_quiesce_mm(struct mm_struct *mm) |
1032 | { | |
1033 | struct kfd_process *p; | |
1034 | int r; | |
1035 | ||
1036 | /* Because we are called from arbitrary context (workqueue) as opposed | |
1037 | * to process context, kfd_process could attempt to exit while we are | |
1038 | * running so the lookup function increments the process ref count. | |
1039 | */ | |
1040 | p = kfd_lookup_process_by_mm(mm); | |
1041 | if (!p) | |
1042 | return -ESRCH; | |
1043 | ||
b2057956 | 1044 | WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); |
6b95e797 FK |
1045 | r = kfd_process_evict_queues(p); |
1046 | ||
1047 | kfd_unref_process(p); | |
1048 | return r; | |
1049 | } | |
1050 | ||
1051 | int kgd2kfd_resume_mm(struct mm_struct *mm) | |
1052 | { | |
1053 | struct kfd_process *p; | |
1054 | int r; | |
1055 | ||
1056 | /* Because we are called from arbitrary context (workqueue) as opposed | |
1057 | * to process context, kfd_process could attempt to exit while we are | |
1058 | * running so the lookup function increments the process ref count. | |
1059 | */ | |
1060 | p = kfd_lookup_process_by_mm(mm); | |
1061 | if (!p) | |
1062 | return -ESRCH; | |
1063 | ||
1064 | r = kfd_process_restore_queues(p); | |
1065 | ||
1066 | kfd_unref_process(p); | |
1067 | return r; | |
1068 | } | |
1069 | ||
26103436 FK |
1070 | /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will |
1071 | * prepare for safe eviction of KFD BOs that belong to the specified | |
1072 | * process. | |
1073 | * | |
1074 | * @mm: mm_struct that identifies the specified KFD process | |
1075 | * @fence: eviction fence attached to KFD process BOs | |
1076 | * | |
1077 | */ | |
1078 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, | |
1079 | struct dma_fence *fence) | |
1080 | { | |
1081 | struct kfd_process *p; | |
1082 | unsigned long active_time; | |
1083 | unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); | |
1084 | ||
1085 | if (!fence) | |
1086 | return -EINVAL; | |
1087 | ||
1088 | if (dma_fence_is_signaled(fence)) | |
1089 | return 0; | |
1090 | ||
1091 | p = kfd_lookup_process_by_mm(mm); | |
1092 | if (!p) | |
1093 | return -ENODEV; | |
1094 | ||
1095 | if (fence->seqno == p->last_eviction_seqno) | |
1096 | goto out; | |
1097 | ||
1098 | p->last_eviction_seqno = fence->seqno; | |
1099 | ||
1100 | /* Avoid KFD process starvation. Wait for at least | |
1101 | * PROCESS_ACTIVE_TIME_MS before evicting the process again | |
1102 | */ | |
1103 | active_time = get_jiffies_64() - p->last_restore_timestamp; | |
1104 | if (delay_jiffies > active_time) | |
1105 | delay_jiffies -= active_time; | |
1106 | else | |
1107 | delay_jiffies = 0; | |
1108 | ||
1109 | /* During process initialization eviction_work.dwork is initialized | |
1110 | * to kfd_evict_bo_worker | |
1111 | */ | |
b2057956 FK |
1112 | WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", |
1113 | p->lead_thread->pid, delay_jiffies); | |
26103436 FK |
1114 | schedule_delayed_work(&p->eviction_work, delay_jiffies); |
1115 | out: | |
1116 | kfd_unref_process(p); | |
1117 | return 0; | |
1118 | } | |
1119 | ||
6e81090b OG |
1120 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, |
1121 | unsigned int chunk_size) | |
1122 | { | |
8625ff9c | 1123 | unsigned int num_of_longs; |
6e81090b | 1124 | |
32fa8219 FK |
1125 | if (WARN_ON(buf_size < chunk_size)) |
1126 | return -EINVAL; | |
1127 | if (WARN_ON(buf_size == 0)) | |
1128 | return -EINVAL; | |
1129 | if (WARN_ON(chunk_size == 0)) | |
1130 | return -EINVAL; | |
6e81090b OG |
1131 | |
1132 | kfd->gtt_sa_chunk_size = chunk_size; | |
1133 | kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; | |
1134 | ||
8625ff9c FK |
1135 | num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / |
1136 | BITS_PER_LONG; | |
6e81090b | 1137 | |
8625ff9c | 1138 | kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); |
6e81090b OG |
1139 | |
1140 | if (!kfd->gtt_sa_bitmap) | |
1141 | return -ENOMEM; | |
1142 | ||
79775b62 | 1143 | pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", |
6e81090b OG |
1144 | kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); |
1145 | ||
1146 | mutex_init(&kfd->gtt_sa_lock); | |
1147 | ||
1148 | return 0; | |
1149 | ||
1150 | } | |
1151 | ||
1152 | static void kfd_gtt_sa_fini(struct kfd_dev *kfd) | |
1153 | { | |
1154 | mutex_destroy(&kfd->gtt_sa_lock); | |
1155 | kfree(kfd->gtt_sa_bitmap); | |
1156 | } | |
1157 | ||
1158 | static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, | |
1159 | unsigned int bit_num, | |
1160 | unsigned int chunk_size) | |
1161 | { | |
1162 | return start_addr + bit_num * chunk_size; | |
1163 | } | |
1164 | ||
1165 | static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, | |
1166 | unsigned int bit_num, | |
1167 | unsigned int chunk_size) | |
1168 | { | |
1169 | return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); | |
1170 | } | |
1171 | ||
1172 | int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, | |
1173 | struct kfd_mem_obj **mem_obj) | |
1174 | { | |
1175 | unsigned int found, start_search, cur_size; | |
1176 | ||
6e81090b OG |
1177 | if (size == 0) |
1178 | return -EINVAL; | |
1179 | ||
1180 | if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) | |
1181 | return -ENOMEM; | |
1182 | ||
1cd106ec FK |
1183 | *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); |
1184 | if (!(*mem_obj)) | |
6e81090b OG |
1185 | return -ENOMEM; |
1186 | ||
79775b62 | 1187 | pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); |
6e81090b OG |
1188 | |
1189 | start_search = 0; | |
1190 | ||
1191 | mutex_lock(&kfd->gtt_sa_lock); | |
1192 | ||
1193 | kfd_gtt_restart_search: | |
1194 | /* Find the first chunk that is free */ | |
1195 | found = find_next_zero_bit(kfd->gtt_sa_bitmap, | |
1196 | kfd->gtt_sa_num_of_chunks, | |
1197 | start_search); | |
1198 | ||
79775b62 | 1199 | pr_debug("Found = %d\n", found); |
6e81090b OG |
1200 | |
1201 | /* If there wasn't any free chunk, bail out */ | |
1202 | if (found == kfd->gtt_sa_num_of_chunks) | |
1203 | goto kfd_gtt_no_free_chunk; | |
1204 | ||
1205 | /* Update fields of mem_obj */ | |
1206 | (*mem_obj)->range_start = found; | |
1207 | (*mem_obj)->range_end = found; | |
1208 | (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( | |
1209 | kfd->gtt_start_gpu_addr, | |
1210 | found, | |
1211 | kfd->gtt_sa_chunk_size); | |
1212 | (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( | |
1213 | kfd->gtt_start_cpu_ptr, | |
1214 | found, | |
1215 | kfd->gtt_sa_chunk_size); | |
1216 | ||
79775b62 | 1217 | pr_debug("gpu_addr = %p, cpu_addr = %p\n", |
6e81090b OG |
1218 | (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); |
1219 | ||
1220 | /* If we need only one chunk, mark it as allocated and get out */ | |
1221 | if (size <= kfd->gtt_sa_chunk_size) { | |
79775b62 | 1222 | pr_debug("Single bit\n"); |
6e81090b OG |
1223 | set_bit(found, kfd->gtt_sa_bitmap); |
1224 | goto kfd_gtt_out; | |
1225 | } | |
1226 | ||
1227 | /* Otherwise, try to see if we have enough contiguous chunks */ | |
1228 | cur_size = size - kfd->gtt_sa_chunk_size; | |
1229 | do { | |
1230 | (*mem_obj)->range_end = | |
1231 | find_next_zero_bit(kfd->gtt_sa_bitmap, | |
1232 | kfd->gtt_sa_num_of_chunks, ++found); | |
1233 | /* | |
1234 | * If next free chunk is not contiguous than we need to | |
1235 | * restart our search from the last free chunk we found (which | |
1236 | * wasn't contiguous to the previous ones | |
1237 | */ | |
1238 | if ((*mem_obj)->range_end != found) { | |
1239 | start_search = found; | |
1240 | goto kfd_gtt_restart_search; | |
1241 | } | |
1242 | ||
1243 | /* | |
1244 | * If we reached end of buffer, bail out with error | |
1245 | */ | |
1246 | if (found == kfd->gtt_sa_num_of_chunks) | |
1247 | goto kfd_gtt_no_free_chunk; | |
1248 | ||
1249 | /* Check if we don't need another chunk */ | |
1250 | if (cur_size <= kfd->gtt_sa_chunk_size) | |
1251 | cur_size = 0; | |
1252 | else | |
1253 | cur_size -= kfd->gtt_sa_chunk_size; | |
1254 | ||
1255 | } while (cur_size > 0); | |
1256 | ||
79775b62 | 1257 | pr_debug("range_start = %d, range_end = %d\n", |
6e81090b OG |
1258 | (*mem_obj)->range_start, (*mem_obj)->range_end); |
1259 | ||
1260 | /* Mark the chunks as allocated */ | |
1261 | for (found = (*mem_obj)->range_start; | |
1262 | found <= (*mem_obj)->range_end; | |
1263 | found++) | |
1264 | set_bit(found, kfd->gtt_sa_bitmap); | |
1265 | ||
1266 | kfd_gtt_out: | |
1267 | mutex_unlock(&kfd->gtt_sa_lock); | |
1268 | return 0; | |
1269 | ||
1270 | kfd_gtt_no_free_chunk: | |
3148a6a0 | 1271 | pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); |
6e81090b | 1272 | mutex_unlock(&kfd->gtt_sa_lock); |
3148a6a0 | 1273 | kfree(*mem_obj); |
6e81090b OG |
1274 | return -ENOMEM; |
1275 | } | |
1276 | ||
1277 | int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) | |
1278 | { | |
1279 | unsigned int bit; | |
1280 | ||
9216ed29 OG |
1281 | /* Act like kfree when trying to free a NULL object */ |
1282 | if (!mem_obj) | |
1283 | return 0; | |
6e81090b | 1284 | |
79775b62 | 1285 | pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", |
6e81090b OG |
1286 | mem_obj, mem_obj->range_start, mem_obj->range_end); |
1287 | ||
1288 | mutex_lock(&kfd->gtt_sa_lock); | |
1289 | ||
1290 | /* Mark the chunks as free */ | |
1291 | for (bit = mem_obj->range_start; | |
1292 | bit <= mem_obj->range_end; | |
1293 | bit++) | |
1294 | clear_bit(bit, kfd->gtt_sa_bitmap); | |
1295 | ||
1296 | mutex_unlock(&kfd->gtt_sa_lock); | |
1297 | ||
1298 | kfree(mem_obj); | |
1299 | return 0; | |
1300 | } | |
a29ec470 | 1301 | |
9b54d201 EH |
1302 | void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) |
1303 | { | |
1304 | if (kfd) | |
1305 | atomic_inc(&kfd->sram_ecc_flag); | |
1306 | } | |
1307 | ||
43d8107f HK |
1308 | void kfd_inc_compute_active(struct kfd_dev *kfd) |
1309 | { | |
1310 | if (atomic_inc_return(&kfd->compute_profile) == 1) | |
1311 | amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); | |
1312 | } | |
1313 | ||
1314 | void kfd_dec_compute_active(struct kfd_dev *kfd) | |
1315 | { | |
1316 | int count = atomic_dec_return(&kfd->compute_profile); | |
1317 | ||
1318 | if (count == 0) | |
1319 | amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); | |
1320 | WARN_ONCE(count < 0, "Compute profile ref. count error"); | |
1321 | } | |
1322 | ||
2c2b0d88 MJ |
1323 | void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) |
1324 | { | |
1325 | if (kfd) | |
1326 | kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); | |
1327 | } | |
1328 | ||
a29ec470 SL |
1329 | #if defined(CONFIG_DEBUG_FS) |
1330 | ||
1331 | /* This function will send a package to HIQ to hang the HWS | |
1332 | * which will trigger a GPU reset and bring the HWS back to normal state | |
1333 | */ | |
1334 | int kfd_debugfs_hang_hws(struct kfd_dev *dev) | |
1335 | { | |
1336 | int r = 0; | |
1337 | ||
1338 | if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { | |
1339 | pr_err("HWS is not enabled"); | |
1340 | return -EINVAL; | |
1341 | } | |
1342 | ||
1343 | r = pm_debugfs_hang_hws(&dev->dqm->packets); | |
1344 | if (!r) | |
1345 | r = dqm_debugfs_execute_queues(dev->dqm); | |
1346 | ||
1347 | return r; | |
1348 | } | |
1349 | ||
1350 | #endif |