Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * Copyright 2008 Red Hat Inc. | |
4 | * Copyright 2009 Jerome Glisse. | |
5 | * | |
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |
7 | * copy of this software and associated documentation files (the "Software"), | |
8 | * to deal in the Software without restriction, including without limitation | |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
10 | * and/or sell copies of the Software, and to permit persons to whom the | |
11 | * Software is furnished to do so, subject to the following conditions: | |
12 | * | |
13 | * The above copyright notice and this permission notice shall be included in | |
14 | * all copies or substantial portions of the Software. | |
15 | * | |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
22 | * OTHER DEALINGS IN THE SOFTWARE. | |
23 | * | |
24 | */ | |
25 | #include <drm/drmP.h> | |
26 | #include "amdgpu.h" | |
356aee30 | 27 | #include "amdgpu_gfx.h" |
d38ceaf9 AD |
28 | |
29 | /* | |
30 | * GPU scratch registers helpers function. | |
31 | */ | |
32 | /** | |
33 | * amdgpu_gfx_scratch_get - Allocate a scratch register | |
34 | * | |
35 | * @adev: amdgpu_device pointer | |
36 | * @reg: scratch register mmio offset | |
37 | * | |
38 | * Allocate a CP scratch register for use by the driver (all asics). | |
39 | * Returns 0 on success or -EINVAL on failure. | |
40 | */ | |
41 | int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg) | |
42 | { | |
43 | int i; | |
44 | ||
50261151 NW |
45 | i = ffs(adev->gfx.scratch.free_mask); |
46 | if (i != 0 && i <= adev->gfx.scratch.num_reg) { | |
47 | i--; | |
48 | adev->gfx.scratch.free_mask &= ~(1u << i); | |
49 | *reg = adev->gfx.scratch.reg_base + i; | |
50 | return 0; | |
d38ceaf9 AD |
51 | } |
52 | return -EINVAL; | |
53 | } | |
54 | ||
55 | /** | |
56 | * amdgpu_gfx_scratch_free - Free a scratch register | |
57 | * | |
58 | * @adev: amdgpu_device pointer | |
59 | * @reg: scratch register mmio offset | |
60 | * | |
61 | * Free a CP scratch register allocated for use by the driver (all asics) | |
62 | */ | |
63 | void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg) | |
64 | { | |
50261151 | 65 | adev->gfx.scratch.free_mask |= 1u << (reg - adev->gfx.scratch.reg_base); |
d38ceaf9 | 66 | } |
6f8941a2 NH |
67 | |
68 | /** | |
69 | * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter | |
70 | * | |
71 | * @mask: array in which the per-shader array disable masks will be stored | |
72 | * @max_se: number of SEs | |
73 | * @max_sh: number of SHs | |
74 | * | |
75 | * The bitmask of CUs to be disabled in the shader array determined by se and | |
76 | * sh is stored in mask[se * max_sh + sh]. | |
77 | */ | |
78 | void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh) | |
79 | { | |
80 | unsigned se, sh, cu; | |
81 | const char *p; | |
82 | ||
83 | memset(mask, 0, sizeof(*mask) * max_se * max_sh); | |
84 | ||
85 | if (!amdgpu_disable_cu || !*amdgpu_disable_cu) | |
86 | return; | |
87 | ||
88 | p = amdgpu_disable_cu; | |
89 | for (;;) { | |
90 | char *next; | |
91 | int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); | |
92 | if (ret < 3) { | |
93 | DRM_ERROR("amdgpu: could not parse disable_cu\n"); | |
94 | return; | |
95 | } | |
96 | ||
97 | if (se < max_se && sh < max_sh && cu < 16) { | |
98 | DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu); | |
99 | mask[se * max_sh + sh] |= 1u << cu; | |
100 | } else { | |
101 | DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n", | |
102 | se, sh, cu); | |
103 | } | |
104 | ||
105 | next = strchr(p, ','); | |
106 | if (!next) | |
107 | break; | |
108 | p = next + 1; | |
109 | } | |
110 | } | |
41f6a99a | 111 | |
0f7607d4 AR |
112 | static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) |
113 | { | |
4a75aefe AR |
114 | if (amdgpu_compute_multipipe != -1) { |
115 | DRM_INFO("amdgpu: forcing compute pipe policy %d\n", | |
116 | amdgpu_compute_multipipe); | |
117 | return amdgpu_compute_multipipe == 1; | |
118 | } | |
119 | ||
0f7607d4 AR |
120 | /* FIXME: spreading the queues across pipes causes perf regressions |
121 | * on POLARIS11 compute workloads */ | |
122 | if (adev->asic_type == CHIP_POLARIS11) | |
123 | return false; | |
124 | ||
125 | return adev->gfx.mec.num_mec > 1; | |
126 | } | |
127 | ||
41f6a99a AD |
128 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) |
129 | { | |
130 | int i, queue, pipe, mec; | |
0f7607d4 | 131 | bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); |
41f6a99a AD |
132 | |
133 | /* policy for amdgpu compute queue ownership */ | |
134 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { | |
135 | queue = i % adev->gfx.mec.num_queue_per_pipe; | |
136 | pipe = (i / adev->gfx.mec.num_queue_per_pipe) | |
137 | % adev->gfx.mec.num_pipe_per_mec; | |
138 | mec = (i / adev->gfx.mec.num_queue_per_pipe) | |
139 | / adev->gfx.mec.num_pipe_per_mec; | |
140 | ||
141 | /* we've run out of HW */ | |
142 | if (mec >= adev->gfx.mec.num_mec) | |
143 | break; | |
144 | ||
0f7607d4 | 145 | if (multipipe_policy) { |
41f6a99a AD |
146 | /* policy: amdgpu owns the first two queues of the first MEC */ |
147 | if (mec == 0 && queue < 2) | |
148 | set_bit(i, adev->gfx.mec.queue_bitmap); | |
149 | } else { | |
150 | /* policy: amdgpu owns all queues in the first pipe */ | |
151 | if (mec == 0 && pipe == 0) | |
152 | set_bit(i, adev->gfx.mec.queue_bitmap); | |
153 | } | |
154 | } | |
155 | ||
156 | /* update the number of active compute rings */ | |
157 | adev->gfx.num_compute_rings = | |
158 | bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); | |
159 | ||
160 | /* If you hit this case and edited the policy, you probably just | |
161 | * need to increase AMDGPU_MAX_COMPUTE_RINGS */ | |
162 | if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) | |
163 | adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; | |
164 | } | |
71c37505 AD |
165 | |
166 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, | |
167 | struct amdgpu_ring *ring) | |
168 | { | |
169 | int queue_bit; | |
170 | int mec, pipe, queue; | |
171 | ||
172 | queue_bit = adev->gfx.mec.num_mec | |
173 | * adev->gfx.mec.num_pipe_per_mec | |
174 | * adev->gfx.mec.num_queue_per_pipe; | |
175 | ||
176 | while (queue_bit-- >= 0) { | |
177 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) | |
178 | continue; | |
179 | ||
180 | amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); | |
181 | ||
59fd27cd HR |
182 | /* |
183 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. | |
184 | * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN | |
185 | * only can be issued on queue 0. | |
186 | */ | |
187 | if ((mec == 1 && pipe > 1) || queue != 0) | |
71c37505 AD |
188 | continue; |
189 | ||
190 | ring->me = mec + 1; | |
191 | ring->pipe = pipe; | |
192 | ring->queue = queue; | |
193 | ||
194 | return 0; | |
195 | } | |
196 | ||
197 | dev_err(adev->dev, "Failed to find a queue for KIQ\n"); | |
198 | return -EINVAL; | |
199 | } | |
200 | ||
201 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | |
202 | struct amdgpu_ring *ring, | |
203 | struct amdgpu_irq_src *irq) | |
204 | { | |
205 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |
206 | int r = 0; | |
207 | ||
43ca8efa | 208 | spin_lock_init(&kiq->ring_lock); |
71c37505 | 209 | |
131b4b36 | 210 | r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); |
71c37505 AD |
211 | if (r) |
212 | return r; | |
213 | ||
214 | ring->adev = NULL; | |
215 | ring->ring_obj = NULL; | |
216 | ring->use_doorbell = true; | |
217 | ring->doorbell_index = AMDGPU_DOORBELL_KIQ; | |
218 | ||
219 | r = amdgpu_gfx_kiq_acquire(adev, ring); | |
220 | if (r) | |
221 | return r; | |
222 | ||
223 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | |
2119d0db | 224 | sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); |
71c37505 AD |
225 | r = amdgpu_ring_init(adev, ring, 1024, |
226 | irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); | |
227 | if (r) | |
228 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); | |
229 | ||
230 | return r; | |
231 | } | |
232 | ||
233 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, | |
234 | struct amdgpu_irq_src *irq) | |
235 | { | |
131b4b36 | 236 | amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); |
71c37505 AD |
237 | amdgpu_ring_fini(ring); |
238 | } | |
239 | ||
240 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) | |
241 | { | |
242 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |
243 | ||
244 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); | |
245 | } | |
246 | ||
247 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | |
248 | unsigned hpd_size) | |
249 | { | |
250 | int r; | |
251 | u32 *hpd; | |
252 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |
253 | ||
254 | r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, | |
255 | AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, | |
256 | &kiq->eop_gpu_addr, (void **)&hpd); | |
257 | if (r) { | |
258 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); | |
259 | return r; | |
260 | } | |
261 | ||
262 | memset(hpd, 0, hpd_size); | |
263 | ||
264 | r = amdgpu_bo_reserve(kiq->eop_obj, true); | |
265 | if (unlikely(r != 0)) | |
266 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); | |
267 | amdgpu_bo_kunmap(kiq->eop_obj); | |
268 | amdgpu_bo_unreserve(kiq->eop_obj); | |
269 | ||
270 | return 0; | |
271 | } | |
b9683c21 AD |
272 | |
273 | /* create MQD for each compute queue */ | |
274 | int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, | |
275 | unsigned mqd_size) | |
276 | { | |
277 | struct amdgpu_ring *ring = NULL; | |
278 | int r, i; | |
279 | ||
280 | /* create MQD for KIQ */ | |
281 | ring = &adev->gfx.kiq.ring; | |
282 | if (!ring->mqd_obj) { | |
beb84102 ML |
283 | /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must |
284 | * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD | |
285 | * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for | |
286 | * KIQ MQD no matter SRIOV or Bare-metal | |
287 | */ | |
b9683c21 | 288 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, |
beb84102 | 289 | AMDGPU_GEM_DOMAIN_VRAM, &ring->mqd_obj, |
b9683c21 AD |
290 | &ring->mqd_gpu_addr, &ring->mqd_ptr); |
291 | if (r) { | |
292 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | |
293 | return r; | |
294 | } | |
295 | ||
296 | /* prepare MQD backup */ | |
297 | adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); | |
298 | if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) | |
299 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | |
300 | } | |
301 | ||
302 | /* create MQD for each KCQ */ | |
303 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
304 | ring = &adev->gfx.compute_ring[i]; | |
305 | if (!ring->mqd_obj) { | |
306 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, | |
307 | AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, | |
308 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |
309 | if (r) { | |
310 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); | |
311 | return r; | |
312 | } | |
313 | ||
314 | /* prepare MQD backup */ | |
315 | adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); | |
316 | if (!adev->gfx.mec.mqd_backup[i]) | |
317 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); | |
318 | } | |
319 | } | |
320 | ||
321 | return 0; | |
322 | } | |
323 | ||
324 | void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) | |
325 | { | |
326 | struct amdgpu_ring *ring = NULL; | |
327 | int i; | |
328 | ||
329 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |
330 | ring = &adev->gfx.compute_ring[i]; | |
331 | kfree(adev->gfx.mec.mqd_backup[i]); | |
332 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |
333 | &ring->mqd_gpu_addr, | |
334 | &ring->mqd_ptr); | |
335 | } | |
336 | ||
337 | ring = &adev->gfx.kiq.ring; | |
338 | kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); | |
339 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |
340 | &ring->mqd_gpu_addr, | |
341 | &ring->mqd_ptr); | |
342 | } |