drm/radeon: separate UVD code v3
[linux-2.6-block.git] / drivers / gpu / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "nid.h"
32 #include "atom.h"
33 #include "ni_reg.h"
34 #include "cayman_blit_shaders.h"
35 #include "radeon_ucode.h"
36 #include "clearstate_cayman.h"
37
38 static const u32 tn_rlc_save_restore_register_list[] =
39 {
40         0x98fc,
41         0x98f0,
42         0x9834,
43         0x9838,
44         0x9870,
45         0x9874,
46         0x8a14,
47         0x8b24,
48         0x8bcc,
49         0x8b10,
50         0x8c30,
51         0x8d00,
52         0x8d04,
53         0x8c00,
54         0x8c04,
55         0x8c10,
56         0x8c14,
57         0x8d8c,
58         0x8cf0,
59         0x8e38,
60         0x9508,
61         0x9688,
62         0x9608,
63         0x960c,
64         0x9610,
65         0x9614,
66         0x88c4,
67         0x8978,
68         0x88d4,
69         0x900c,
70         0x9100,
71         0x913c,
72         0x90e8,
73         0x9354,
74         0xa008,
75         0x98f8,
76         0x9148,
77         0x914c,
78         0x3f94,
79         0x98f4,
80         0x9b7c,
81         0x3f8c,
82         0x8950,
83         0x8954,
84         0x8a18,
85         0x8b28,
86         0x9144,
87         0x3f90,
88         0x915c,
89         0x9160,
90         0x9178,
91         0x917c,
92         0x9180,
93         0x918c,
94         0x9190,
95         0x9194,
96         0x9198,
97         0x919c,
98         0x91a8,
99         0x91ac,
100         0x91b0,
101         0x91b4,
102         0x91b8,
103         0x91c4,
104         0x91c8,
105         0x91cc,
106         0x91d0,
107         0x91d4,
108         0x91e0,
109         0x91e4,
110         0x91ec,
111         0x91f0,
112         0x91f4,
113         0x9200,
114         0x9204,
115         0x929c,
116         0x8030,
117         0x9150,
118         0x9a60,
119         0x920c,
120         0x9210,
121         0x9228,
122         0x922c,
123         0x9244,
124         0x9248,
125         0x91e8,
126         0x9294,
127         0x9208,
128         0x9224,
129         0x9240,
130         0x9220,
131         0x923c,
132         0x9258,
133         0x9744,
134         0xa200,
135         0xa204,
136         0xa208,
137         0xa20c,
138         0x8d58,
139         0x9030,
140         0x9034,
141         0x9038,
142         0x903c,
143         0x9040,
144         0x9654,
145         0x897c,
146         0xa210,
147         0xa214,
148         0x9868,
149         0xa02c,
150         0x9664,
151         0x9698,
152         0x949c,
153         0x8e10,
154         0x8e18,
155         0x8c50,
156         0x8c58,
157         0x8c60,
158         0x8c68,
159         0x89b4,
160         0x9830,
161         0x802c,
162 };
163
164 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
165 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
166 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
167 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
168 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
169 extern void evergreen_mc_program(struct radeon_device *rdev);
170 extern void evergreen_irq_suspend(struct radeon_device *rdev);
171 extern int evergreen_mc_init(struct radeon_device *rdev);
172 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
173 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174 extern void evergreen_program_aspm(struct radeon_device *rdev);
175 extern void sumo_rlc_fini(struct radeon_device *rdev);
176 extern int sumo_rlc_init(struct radeon_device *rdev);
177
178 /* Firmware Names */
179 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
180 MODULE_FIRMWARE("radeon/BARTS_me.bin");
181 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
182 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
183 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
184 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
185 MODULE_FIRMWARE("radeon/TURKS_me.bin");
186 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
187 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
188 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
189 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
190 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
191 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
192 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
193 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
194 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
195 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
196 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
197 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
198 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
199 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
200
201
202 static const u32 cayman_golden_registers2[] =
203 {
204         0x3e5c, 0xffffffff, 0x00000000,
205         0x3e48, 0xffffffff, 0x00000000,
206         0x3e4c, 0xffffffff, 0x00000000,
207         0x3e64, 0xffffffff, 0x00000000,
208         0x3e50, 0xffffffff, 0x00000000,
209         0x3e60, 0xffffffff, 0x00000000
210 };
211
212 static const u32 cayman_golden_registers[] =
213 {
214         0x5eb4, 0xffffffff, 0x00000002,
215         0x5e78, 0x8f311ff1, 0x001000f0,
216         0x3f90, 0xffff0000, 0xff000000,
217         0x9148, 0xffff0000, 0xff000000,
218         0x3f94, 0xffff0000, 0xff000000,
219         0x914c, 0xffff0000, 0xff000000,
220         0xc78, 0x00000080, 0x00000080,
221         0xbd4, 0x70073777, 0x00011003,
222         0xd02c, 0xbfffff1f, 0x08421000,
223         0xd0b8, 0x73773777, 0x02011003,
224         0x5bc0, 0x00200000, 0x50100000,
225         0x98f8, 0x33773777, 0x02011003,
226         0x98fc, 0xffffffff, 0x76541032,
227         0x7030, 0x31000311, 0x00000011,
228         0x2f48, 0x33773777, 0x42010001,
229         0x6b28, 0x00000010, 0x00000012,
230         0x7728, 0x00000010, 0x00000012,
231         0x10328, 0x00000010, 0x00000012,
232         0x10f28, 0x00000010, 0x00000012,
233         0x11b28, 0x00000010, 0x00000012,
234         0x12728, 0x00000010, 0x00000012,
235         0x240c, 0x000007ff, 0x00000000,
236         0x8a14, 0xf000001f, 0x00000007,
237         0x8b24, 0x3fff3fff, 0x00ff0fff,
238         0x8b10, 0x0000ff0f, 0x00000000,
239         0x28a4c, 0x07ffffff, 0x06000000,
240         0x10c, 0x00000001, 0x00010003,
241         0xa02c, 0xffffffff, 0x0000009b,
242         0x913c, 0x0000010f, 0x01000100,
243         0x8c04, 0xf8ff00ff, 0x40600060,
244         0x28350, 0x00000f01, 0x00000000,
245         0x9508, 0x3700001f, 0x00000002,
246         0x960c, 0xffffffff, 0x54763210,
247         0x88c4, 0x001f3ae3, 0x00000082,
248         0x88d0, 0xffffffff, 0x0f40df40,
249         0x88d4, 0x0000001f, 0x00000010,
250         0x8974, 0xffffffff, 0x00000000
251 };
252
253 static const u32 dvst_golden_registers2[] =
254 {
255         0x8f8, 0xffffffff, 0,
256         0x8fc, 0x00380000, 0,
257         0x8f8, 0xffffffff, 1,
258         0x8fc, 0x0e000000, 0
259 };
260
261 static const u32 dvst_golden_registers[] =
262 {
263         0x690, 0x3fff3fff, 0x20c00033,
264         0x918c, 0x0fff0fff, 0x00010006,
265         0x91a8, 0x0fff0fff, 0x00010006,
266         0x9150, 0xffffdfff, 0x6e944040,
267         0x917c, 0x0fff0fff, 0x00030002,
268         0x9198, 0x0fff0fff, 0x00030002,
269         0x915c, 0x0fff0fff, 0x00010000,
270         0x3f90, 0xffff0001, 0xff000000,
271         0x9178, 0x0fff0fff, 0x00070000,
272         0x9194, 0x0fff0fff, 0x00070000,
273         0x9148, 0xffff0001, 0xff000000,
274         0x9190, 0x0fff0fff, 0x00090008,
275         0x91ac, 0x0fff0fff, 0x00090008,
276         0x3f94, 0xffff0000, 0xff000000,
277         0x914c, 0xffff0000, 0xff000000,
278         0x929c, 0x00000fff, 0x00000001,
279         0x55e4, 0xff607fff, 0xfc000100,
280         0x8a18, 0xff000fff, 0x00000100,
281         0x8b28, 0xff000fff, 0x00000100,
282         0x9144, 0xfffc0fff, 0x00000100,
283         0x6ed8, 0x00010101, 0x00010000,
284         0x9830, 0xffffffff, 0x00000000,
285         0x9834, 0xf00fffff, 0x00000400,
286         0x9838, 0xfffffffe, 0x00000000,
287         0xd0c0, 0xff000fff, 0x00000100,
288         0xd02c, 0xbfffff1f, 0x08421000,
289         0xd0b8, 0x73773777, 0x12010001,
290         0x5bb0, 0x000000f0, 0x00000070,
291         0x98f8, 0x73773777, 0x12010001,
292         0x98fc, 0xffffffff, 0x00000010,
293         0x9b7c, 0x00ff0000, 0x00fc0000,
294         0x8030, 0x00001f0f, 0x0000100a,
295         0x2f48, 0x73773777, 0x12010001,
296         0x2408, 0x00030000, 0x000c007f,
297         0x8a14, 0xf000003f, 0x00000007,
298         0x8b24, 0x3fff3fff, 0x00ff0fff,
299         0x8b10, 0x0000ff0f, 0x00000000,
300         0x28a4c, 0x07ffffff, 0x06000000,
301         0x4d8, 0x00000fff, 0x00000100,
302         0xa008, 0xffffffff, 0x00010000,
303         0x913c, 0xffff03ff, 0x01000100,
304         0x8c00, 0x000000ff, 0x00000003,
305         0x8c04, 0xf8ff00ff, 0x40600060,
306         0x8cf0, 0x1fff1fff, 0x08e00410,
307         0x28350, 0x00000f01, 0x00000000,
308         0x9508, 0xf700071f, 0x00000002,
309         0x960c, 0xffffffff, 0x54763210,
310         0x20ef8, 0x01ff01ff, 0x00000002,
311         0x20e98, 0xfffffbff, 0x00200000,
312         0x2015c, 0xffffffff, 0x00000f40,
313         0x88c4, 0x001f3ae3, 0x00000082,
314         0x8978, 0x3fffffff, 0x04050140,
315         0x88d4, 0x0000001f, 0x00000010,
316         0x8974, 0xffffffff, 0x00000000
317 };
318
319 static const u32 scrapper_golden_registers[] =
320 {
321         0x690, 0x3fff3fff, 0x20c00033,
322         0x918c, 0x0fff0fff, 0x00010006,
323         0x918c, 0x0fff0fff, 0x00010006,
324         0x91a8, 0x0fff0fff, 0x00010006,
325         0x91a8, 0x0fff0fff, 0x00010006,
326         0x9150, 0xffffdfff, 0x6e944040,
327         0x9150, 0xffffdfff, 0x6e944040,
328         0x917c, 0x0fff0fff, 0x00030002,
329         0x917c, 0x0fff0fff, 0x00030002,
330         0x9198, 0x0fff0fff, 0x00030002,
331         0x9198, 0x0fff0fff, 0x00030002,
332         0x915c, 0x0fff0fff, 0x00010000,
333         0x915c, 0x0fff0fff, 0x00010000,
334         0x3f90, 0xffff0001, 0xff000000,
335         0x3f90, 0xffff0001, 0xff000000,
336         0x9178, 0x0fff0fff, 0x00070000,
337         0x9178, 0x0fff0fff, 0x00070000,
338         0x9194, 0x0fff0fff, 0x00070000,
339         0x9194, 0x0fff0fff, 0x00070000,
340         0x9148, 0xffff0001, 0xff000000,
341         0x9148, 0xffff0001, 0xff000000,
342         0x9190, 0x0fff0fff, 0x00090008,
343         0x9190, 0x0fff0fff, 0x00090008,
344         0x91ac, 0x0fff0fff, 0x00090008,
345         0x91ac, 0x0fff0fff, 0x00090008,
346         0x3f94, 0xffff0000, 0xff000000,
347         0x3f94, 0xffff0000, 0xff000000,
348         0x914c, 0xffff0000, 0xff000000,
349         0x914c, 0xffff0000, 0xff000000,
350         0x929c, 0x00000fff, 0x00000001,
351         0x929c, 0x00000fff, 0x00000001,
352         0x55e4, 0xff607fff, 0xfc000100,
353         0x8a18, 0xff000fff, 0x00000100,
354         0x8a18, 0xff000fff, 0x00000100,
355         0x8b28, 0xff000fff, 0x00000100,
356         0x8b28, 0xff000fff, 0x00000100,
357         0x9144, 0xfffc0fff, 0x00000100,
358         0x9144, 0xfffc0fff, 0x00000100,
359         0x6ed8, 0x00010101, 0x00010000,
360         0x9830, 0xffffffff, 0x00000000,
361         0x9830, 0xffffffff, 0x00000000,
362         0x9834, 0xf00fffff, 0x00000400,
363         0x9834, 0xf00fffff, 0x00000400,
364         0x9838, 0xfffffffe, 0x00000000,
365         0x9838, 0xfffffffe, 0x00000000,
366         0xd0c0, 0xff000fff, 0x00000100,
367         0xd02c, 0xbfffff1f, 0x08421000,
368         0xd02c, 0xbfffff1f, 0x08421000,
369         0xd0b8, 0x73773777, 0x12010001,
370         0xd0b8, 0x73773777, 0x12010001,
371         0x5bb0, 0x000000f0, 0x00000070,
372         0x98f8, 0x73773777, 0x12010001,
373         0x98f8, 0x73773777, 0x12010001,
374         0x98fc, 0xffffffff, 0x00000010,
375         0x98fc, 0xffffffff, 0x00000010,
376         0x9b7c, 0x00ff0000, 0x00fc0000,
377         0x9b7c, 0x00ff0000, 0x00fc0000,
378         0x8030, 0x00001f0f, 0x0000100a,
379         0x8030, 0x00001f0f, 0x0000100a,
380         0x2f48, 0x73773777, 0x12010001,
381         0x2f48, 0x73773777, 0x12010001,
382         0x2408, 0x00030000, 0x000c007f,
383         0x8a14, 0xf000003f, 0x00000007,
384         0x8a14, 0xf000003f, 0x00000007,
385         0x8b24, 0x3fff3fff, 0x00ff0fff,
386         0x8b24, 0x3fff3fff, 0x00ff0fff,
387         0x8b10, 0x0000ff0f, 0x00000000,
388         0x8b10, 0x0000ff0f, 0x00000000,
389         0x28a4c, 0x07ffffff, 0x06000000,
390         0x28a4c, 0x07ffffff, 0x06000000,
391         0x4d8, 0x00000fff, 0x00000100,
392         0x4d8, 0x00000fff, 0x00000100,
393         0xa008, 0xffffffff, 0x00010000,
394         0xa008, 0xffffffff, 0x00010000,
395         0x913c, 0xffff03ff, 0x01000100,
396         0x913c, 0xffff03ff, 0x01000100,
397         0x90e8, 0x001fffff, 0x010400c0,
398         0x8c00, 0x000000ff, 0x00000003,
399         0x8c00, 0x000000ff, 0x00000003,
400         0x8c04, 0xf8ff00ff, 0x40600060,
401         0x8c04, 0xf8ff00ff, 0x40600060,
402         0x8c30, 0x0000000f, 0x00040005,
403         0x8cf0, 0x1fff1fff, 0x08e00410,
404         0x8cf0, 0x1fff1fff, 0x08e00410,
405         0x900c, 0x00ffffff, 0x0017071f,
406         0x28350, 0x00000f01, 0x00000000,
407         0x28350, 0x00000f01, 0x00000000,
408         0x9508, 0xf700071f, 0x00000002,
409         0x9508, 0xf700071f, 0x00000002,
410         0x9688, 0x00300000, 0x0017000f,
411         0x960c, 0xffffffff, 0x54763210,
412         0x960c, 0xffffffff, 0x54763210,
413         0x20ef8, 0x01ff01ff, 0x00000002,
414         0x20e98, 0xfffffbff, 0x00200000,
415         0x2015c, 0xffffffff, 0x00000f40,
416         0x88c4, 0x001f3ae3, 0x00000082,
417         0x88c4, 0x001f3ae3, 0x00000082,
418         0x8978, 0x3fffffff, 0x04050140,
419         0x8978, 0x3fffffff, 0x04050140,
420         0x88d4, 0x0000001f, 0x00000010,
421         0x88d4, 0x0000001f, 0x00000010,
422         0x8974, 0xffffffff, 0x00000000,
423         0x8974, 0xffffffff, 0x00000000
424 };
425
426 static void ni_init_golden_registers(struct radeon_device *rdev)
427 {
428         switch (rdev->family) {
429         case CHIP_CAYMAN:
430                 radeon_program_register_sequence(rdev,
431                                                  cayman_golden_registers,
432                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
433                 radeon_program_register_sequence(rdev,
434                                                  cayman_golden_registers2,
435                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
436                 break;
437         case CHIP_ARUBA:
438                 if ((rdev->pdev->device == 0x9900) ||
439                     (rdev->pdev->device == 0x9901) ||
440                     (rdev->pdev->device == 0x9903) ||
441                     (rdev->pdev->device == 0x9904) ||
442                     (rdev->pdev->device == 0x9905) ||
443                     (rdev->pdev->device == 0x9906) ||
444                     (rdev->pdev->device == 0x9907) ||
445                     (rdev->pdev->device == 0x9908) ||
446                     (rdev->pdev->device == 0x9909) ||
447                     (rdev->pdev->device == 0x990A) ||
448                     (rdev->pdev->device == 0x990B) ||
449                     (rdev->pdev->device == 0x990C) ||
450                     (rdev->pdev->device == 0x990D) ||
451                     (rdev->pdev->device == 0x990E) ||
452                     (rdev->pdev->device == 0x990F) ||
453                     (rdev->pdev->device == 0x9910) ||
454                     (rdev->pdev->device == 0x9913) ||
455                     (rdev->pdev->device == 0x9917) ||
456                     (rdev->pdev->device == 0x9918)) {
457                         radeon_program_register_sequence(rdev,
458                                                          dvst_golden_registers,
459                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
460                         radeon_program_register_sequence(rdev,
461                                                          dvst_golden_registers2,
462                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
463                 } else {
464                         radeon_program_register_sequence(rdev,
465                                                          scrapper_golden_registers,
466                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
467                         radeon_program_register_sequence(rdev,
468                                                          dvst_golden_registers2,
469                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
470                 }
471                 break;
472         default:
473                 break;
474         }
475 }
476
477 #define BTC_IO_MC_REGS_SIZE 29
478
479 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
480         {0x00000077, 0xff010100},
481         {0x00000078, 0x00000000},
482         {0x00000079, 0x00001434},
483         {0x0000007a, 0xcc08ec08},
484         {0x0000007b, 0x00040000},
485         {0x0000007c, 0x000080c0},
486         {0x0000007d, 0x09000000},
487         {0x0000007e, 0x00210404},
488         {0x00000081, 0x08a8e800},
489         {0x00000082, 0x00030444},
490         {0x00000083, 0x00000000},
491         {0x00000085, 0x00000001},
492         {0x00000086, 0x00000002},
493         {0x00000087, 0x48490000},
494         {0x00000088, 0x20244647},
495         {0x00000089, 0x00000005},
496         {0x0000008b, 0x66030000},
497         {0x0000008c, 0x00006603},
498         {0x0000008d, 0x00000100},
499         {0x0000008f, 0x00001c0a},
500         {0x00000090, 0xff000001},
501         {0x00000094, 0x00101101},
502         {0x00000095, 0x00000fff},
503         {0x00000096, 0x00116fff},
504         {0x00000097, 0x60010000},
505         {0x00000098, 0x10010000},
506         {0x00000099, 0x00006000},
507         {0x0000009a, 0x00001000},
508         {0x0000009f, 0x00946a00}
509 };
510
511 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
512         {0x00000077, 0xff010100},
513         {0x00000078, 0x00000000},
514         {0x00000079, 0x00001434},
515         {0x0000007a, 0xcc08ec08},
516         {0x0000007b, 0x00040000},
517         {0x0000007c, 0x000080c0},
518         {0x0000007d, 0x09000000},
519         {0x0000007e, 0x00210404},
520         {0x00000081, 0x08a8e800},
521         {0x00000082, 0x00030444},
522         {0x00000083, 0x00000000},
523         {0x00000085, 0x00000001},
524         {0x00000086, 0x00000002},
525         {0x00000087, 0x48490000},
526         {0x00000088, 0x20244647},
527         {0x00000089, 0x00000005},
528         {0x0000008b, 0x66030000},
529         {0x0000008c, 0x00006603},
530         {0x0000008d, 0x00000100},
531         {0x0000008f, 0x00001c0a},
532         {0x00000090, 0xff000001},
533         {0x00000094, 0x00101101},
534         {0x00000095, 0x00000fff},
535         {0x00000096, 0x00116fff},
536         {0x00000097, 0x60010000},
537         {0x00000098, 0x10010000},
538         {0x00000099, 0x00006000},
539         {0x0000009a, 0x00001000},
540         {0x0000009f, 0x00936a00}
541 };
542
543 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
544         {0x00000077, 0xff010100},
545         {0x00000078, 0x00000000},
546         {0x00000079, 0x00001434},
547         {0x0000007a, 0xcc08ec08},
548         {0x0000007b, 0x00040000},
549         {0x0000007c, 0x000080c0},
550         {0x0000007d, 0x09000000},
551         {0x0000007e, 0x00210404},
552         {0x00000081, 0x08a8e800},
553         {0x00000082, 0x00030444},
554         {0x00000083, 0x00000000},
555         {0x00000085, 0x00000001},
556         {0x00000086, 0x00000002},
557         {0x00000087, 0x48490000},
558         {0x00000088, 0x20244647},
559         {0x00000089, 0x00000005},
560         {0x0000008b, 0x66030000},
561         {0x0000008c, 0x00006603},
562         {0x0000008d, 0x00000100},
563         {0x0000008f, 0x00001c0a},
564         {0x00000090, 0xff000001},
565         {0x00000094, 0x00101101},
566         {0x00000095, 0x00000fff},
567         {0x00000096, 0x00116fff},
568         {0x00000097, 0x60010000},
569         {0x00000098, 0x10010000},
570         {0x00000099, 0x00006000},
571         {0x0000009a, 0x00001000},
572         {0x0000009f, 0x00916a00}
573 };
574
575 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
576         {0x00000077, 0xff010100},
577         {0x00000078, 0x00000000},
578         {0x00000079, 0x00001434},
579         {0x0000007a, 0xcc08ec08},
580         {0x0000007b, 0x00040000},
581         {0x0000007c, 0x000080c0},
582         {0x0000007d, 0x09000000},
583         {0x0000007e, 0x00210404},
584         {0x00000081, 0x08a8e800},
585         {0x00000082, 0x00030444},
586         {0x00000083, 0x00000000},
587         {0x00000085, 0x00000001},
588         {0x00000086, 0x00000002},
589         {0x00000087, 0x48490000},
590         {0x00000088, 0x20244647},
591         {0x00000089, 0x00000005},
592         {0x0000008b, 0x66030000},
593         {0x0000008c, 0x00006603},
594         {0x0000008d, 0x00000100},
595         {0x0000008f, 0x00001c0a},
596         {0x00000090, 0xff000001},
597         {0x00000094, 0x00101101},
598         {0x00000095, 0x00000fff},
599         {0x00000096, 0x00116fff},
600         {0x00000097, 0x60010000},
601         {0x00000098, 0x10010000},
602         {0x00000099, 0x00006000},
603         {0x0000009a, 0x00001000},
604         {0x0000009f, 0x00976b00}
605 };
606
607 int ni_mc_load_microcode(struct radeon_device *rdev)
608 {
609         const __be32 *fw_data;
610         u32 mem_type, running, blackout = 0;
611         u32 *io_mc_regs;
612         int i, ucode_size, regs_size;
613
614         if (!rdev->mc_fw)
615                 return -EINVAL;
616
617         switch (rdev->family) {
618         case CHIP_BARTS:
619                 io_mc_regs = (u32 *)&barts_io_mc_regs;
620                 ucode_size = BTC_MC_UCODE_SIZE;
621                 regs_size = BTC_IO_MC_REGS_SIZE;
622                 break;
623         case CHIP_TURKS:
624                 io_mc_regs = (u32 *)&turks_io_mc_regs;
625                 ucode_size = BTC_MC_UCODE_SIZE;
626                 regs_size = BTC_IO_MC_REGS_SIZE;
627                 break;
628         case CHIP_CAICOS:
629         default:
630                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
631                 ucode_size = BTC_MC_UCODE_SIZE;
632                 regs_size = BTC_IO_MC_REGS_SIZE;
633                 break;
634         case CHIP_CAYMAN:
635                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
636                 ucode_size = CAYMAN_MC_UCODE_SIZE;
637                 regs_size = BTC_IO_MC_REGS_SIZE;
638                 break;
639         }
640
641         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
642         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
643
644         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
645                 if (running) {
646                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
647                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
648                 }
649
650                 /* reset the engine and set to writable */
651                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
652                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
653
654                 /* load mc io regs */
655                 for (i = 0; i < regs_size; i++) {
656                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
657                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
658                 }
659                 /* load the MC ucode */
660                 fw_data = (const __be32 *)rdev->mc_fw->data;
661                 for (i = 0; i < ucode_size; i++)
662                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
663
664                 /* put the engine back into the active state */
665                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
666                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
667                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
668
669                 /* wait for training to complete */
670                 for (i = 0; i < rdev->usec_timeout; i++) {
671                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
672                                 break;
673                         udelay(1);
674                 }
675
676                 if (running)
677                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
678         }
679
680         return 0;
681 }
682
683 int ni_init_microcode(struct radeon_device *rdev)
684 {
685         const char *chip_name;
686         const char *rlc_chip_name;
687         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
688         size_t smc_req_size = 0;
689         char fw_name[30];
690         int err;
691
692         DRM_DEBUG("\n");
693
694         switch (rdev->family) {
695         case CHIP_BARTS:
696                 chip_name = "BARTS";
697                 rlc_chip_name = "BTC";
698                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
699                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
700                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
701                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
702                 smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
703                 break;
704         case CHIP_TURKS:
705                 chip_name = "TURKS";
706                 rlc_chip_name = "BTC";
707                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
708                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
709                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
710                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
711                 smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
712                 break;
713         case CHIP_CAICOS:
714                 chip_name = "CAICOS";
715                 rlc_chip_name = "BTC";
716                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
717                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
718                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
719                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
720                 smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
721                 break;
722         case CHIP_CAYMAN:
723                 chip_name = "CAYMAN";
724                 rlc_chip_name = "CAYMAN";
725                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
726                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
727                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
728                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
729                 smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
730                 break;
731         case CHIP_ARUBA:
732                 chip_name = "ARUBA";
733                 rlc_chip_name = "ARUBA";
734                 /* pfp/me same size as CAYMAN */
735                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
736                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
737                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
738                 mc_req_size = 0;
739                 break;
740         default: BUG();
741         }
742
743         DRM_INFO("Loading %s Microcode\n", chip_name);
744
745         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
746         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
747         if (err)
748                 goto out;
749         if (rdev->pfp_fw->size != pfp_req_size) {
750                 printk(KERN_ERR
751                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
752                        rdev->pfp_fw->size, fw_name);
753                 err = -EINVAL;
754                 goto out;
755         }
756
757         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
758         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
759         if (err)
760                 goto out;
761         if (rdev->me_fw->size != me_req_size) {
762                 printk(KERN_ERR
763                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
764                        rdev->me_fw->size, fw_name);
765                 err = -EINVAL;
766         }
767
768         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
769         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
770         if (err)
771                 goto out;
772         if (rdev->rlc_fw->size != rlc_req_size) {
773                 printk(KERN_ERR
774                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
775                        rdev->rlc_fw->size, fw_name);
776                 err = -EINVAL;
777         }
778
779         /* no MC ucode on TN */
780         if (!(rdev->flags & RADEON_IS_IGP)) {
781                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
782                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
783                 if (err)
784                         goto out;
785                 if (rdev->mc_fw->size != mc_req_size) {
786                         printk(KERN_ERR
787                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
788                                rdev->mc_fw->size, fw_name);
789                         err = -EINVAL;
790                 }
791         }
792
793         if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
794                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
795                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
796                 if (err) {
797                         printk(KERN_ERR
798                                "smc: error loading firmware \"%s\"\n",
799                                fw_name);
800                         release_firmware(rdev->smc_fw);
801                         rdev->smc_fw = NULL;
802                 } else if (rdev->smc_fw->size != smc_req_size) {
803                         printk(KERN_ERR
804                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
805                                rdev->mc_fw->size, fw_name);
806                         err = -EINVAL;
807                 }
808         }
809
810 out:
811         if (err) {
812                 if (err != -EINVAL)
813                         printk(KERN_ERR
814                                "ni_cp: Failed to load firmware \"%s\"\n",
815                                fw_name);
816                 release_firmware(rdev->pfp_fw);
817                 rdev->pfp_fw = NULL;
818                 release_firmware(rdev->me_fw);
819                 rdev->me_fw = NULL;
820                 release_firmware(rdev->rlc_fw);
821                 rdev->rlc_fw = NULL;
822                 release_firmware(rdev->mc_fw);
823                 rdev->mc_fw = NULL;
824         }
825         return err;
826 }
827
828 int tn_get_temp(struct radeon_device *rdev)
829 {
830         u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
831         int actual_temp = (temp / 8) - 49;
832
833         return actual_temp * 1000;
834 }
835
836 /*
837  * Core functions
838  */
839 static void cayman_gpu_init(struct radeon_device *rdev)
840 {
841         u32 gb_addr_config = 0;
842         u32 mc_shared_chmap, mc_arb_ramcfg;
843         u32 cgts_tcc_disable;
844         u32 sx_debug_1;
845         u32 smx_dc_ctl0;
846         u32 cgts_sm_ctrl_reg;
847         u32 hdp_host_path_cntl;
848         u32 tmp;
849         u32 disabled_rb_mask;
850         int i, j;
851
852         switch (rdev->family) {
853         case CHIP_CAYMAN:
854                 rdev->config.cayman.max_shader_engines = 2;
855                 rdev->config.cayman.max_pipes_per_simd = 4;
856                 rdev->config.cayman.max_tile_pipes = 8;
857                 rdev->config.cayman.max_simds_per_se = 12;
858                 rdev->config.cayman.max_backends_per_se = 4;
859                 rdev->config.cayman.max_texture_channel_caches = 8;
860                 rdev->config.cayman.max_gprs = 256;
861                 rdev->config.cayman.max_threads = 256;
862                 rdev->config.cayman.max_gs_threads = 32;
863                 rdev->config.cayman.max_stack_entries = 512;
864                 rdev->config.cayman.sx_num_of_sets = 8;
865                 rdev->config.cayman.sx_max_export_size = 256;
866                 rdev->config.cayman.sx_max_export_pos_size = 64;
867                 rdev->config.cayman.sx_max_export_smx_size = 192;
868                 rdev->config.cayman.max_hw_contexts = 8;
869                 rdev->config.cayman.sq_num_cf_insts = 2;
870
871                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
872                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
873                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
874                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
875                 break;
876         case CHIP_ARUBA:
877         default:
878                 rdev->config.cayman.max_shader_engines = 1;
879                 rdev->config.cayman.max_pipes_per_simd = 4;
880                 rdev->config.cayman.max_tile_pipes = 2;
881                 if ((rdev->pdev->device == 0x9900) ||
882                     (rdev->pdev->device == 0x9901) ||
883                     (rdev->pdev->device == 0x9905) ||
884                     (rdev->pdev->device == 0x9906) ||
885                     (rdev->pdev->device == 0x9907) ||
886                     (rdev->pdev->device == 0x9908) ||
887                     (rdev->pdev->device == 0x9909) ||
888                     (rdev->pdev->device == 0x990B) ||
889                     (rdev->pdev->device == 0x990C) ||
890                     (rdev->pdev->device == 0x990F) ||
891                     (rdev->pdev->device == 0x9910) ||
892                     (rdev->pdev->device == 0x9917) ||
893                     (rdev->pdev->device == 0x9999) ||
894                     (rdev->pdev->device == 0x999C)) {
895                         rdev->config.cayman.max_simds_per_se = 6;
896                         rdev->config.cayman.max_backends_per_se = 2;
897                 } else if ((rdev->pdev->device == 0x9903) ||
898                            (rdev->pdev->device == 0x9904) ||
899                            (rdev->pdev->device == 0x990A) ||
900                            (rdev->pdev->device == 0x990D) ||
901                            (rdev->pdev->device == 0x990E) ||
902                            (rdev->pdev->device == 0x9913) ||
903                            (rdev->pdev->device == 0x9918) ||
904                            (rdev->pdev->device == 0x999D)) {
905                         rdev->config.cayman.max_simds_per_se = 4;
906                         rdev->config.cayman.max_backends_per_se = 2;
907                 } else if ((rdev->pdev->device == 0x9919) ||
908                            (rdev->pdev->device == 0x9990) ||
909                            (rdev->pdev->device == 0x9991) ||
910                            (rdev->pdev->device == 0x9994) ||
911                            (rdev->pdev->device == 0x9995) ||
912                            (rdev->pdev->device == 0x9996) ||
913                            (rdev->pdev->device == 0x999A) ||
914                            (rdev->pdev->device == 0x99A0)) {
915                         rdev->config.cayman.max_simds_per_se = 3;
916                         rdev->config.cayman.max_backends_per_se = 1;
917                 } else {
918                         rdev->config.cayman.max_simds_per_se = 2;
919                         rdev->config.cayman.max_backends_per_se = 1;
920                 }
921                 rdev->config.cayman.max_texture_channel_caches = 2;
922                 rdev->config.cayman.max_gprs = 256;
923                 rdev->config.cayman.max_threads = 256;
924                 rdev->config.cayman.max_gs_threads = 32;
925                 rdev->config.cayman.max_stack_entries = 512;
926                 rdev->config.cayman.sx_num_of_sets = 8;
927                 rdev->config.cayman.sx_max_export_size = 256;
928                 rdev->config.cayman.sx_max_export_pos_size = 64;
929                 rdev->config.cayman.sx_max_export_smx_size = 192;
930                 rdev->config.cayman.max_hw_contexts = 8;
931                 rdev->config.cayman.sq_num_cf_insts = 2;
932
933                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
934                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
935                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
936                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
937                 break;
938         }
939
940         /* Initialize HDP */
941         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
942                 WREG32((0x2c14 + j), 0x00000000);
943                 WREG32((0x2c18 + j), 0x00000000);
944                 WREG32((0x2c1c + j), 0x00000000);
945                 WREG32((0x2c20 + j), 0x00000000);
946                 WREG32((0x2c24 + j), 0x00000000);
947         }
948
949         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
950
951         evergreen_fix_pci_max_read_req_size(rdev);
952
953         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
954         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
955
956         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
957         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
958         if (rdev->config.cayman.mem_row_size_in_kb > 4)
959                 rdev->config.cayman.mem_row_size_in_kb = 4;
960         /* XXX use MC settings? */
961         rdev->config.cayman.shader_engine_tile_size = 32;
962         rdev->config.cayman.num_gpus = 1;
963         rdev->config.cayman.multi_gpu_tile_size = 64;
964
965         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
966         rdev->config.cayman.num_tile_pipes = (1 << tmp);
967         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
968         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
969         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
970         rdev->config.cayman.num_shader_engines = tmp + 1;
971         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
972         rdev->config.cayman.num_gpus = tmp + 1;
973         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
974         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
975         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
976         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
977
978
979         /* setup tiling info dword.  gb_addr_config is not adequate since it does
980          * not have bank info, so create a custom tiling dword.
981          * bits 3:0   num_pipes
982          * bits 7:4   num_banks
983          * bits 11:8  group_size
984          * bits 15:12 row_size
985          */
986         rdev->config.cayman.tile_config = 0;
987         switch (rdev->config.cayman.num_tile_pipes) {
988         case 1:
989         default:
990                 rdev->config.cayman.tile_config |= (0 << 0);
991                 break;
992         case 2:
993                 rdev->config.cayman.tile_config |= (1 << 0);
994                 break;
995         case 4:
996                 rdev->config.cayman.tile_config |= (2 << 0);
997                 break;
998         case 8:
999                 rdev->config.cayman.tile_config |= (3 << 0);
1000                 break;
1001         }
1002
1003         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1004         if (rdev->flags & RADEON_IS_IGP)
1005                 rdev->config.cayman.tile_config |= 1 << 4;
1006         else {
1007                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1008                 case 0: /* four banks */
1009                         rdev->config.cayman.tile_config |= 0 << 4;
1010                         break;
1011                 case 1: /* eight banks */
1012                         rdev->config.cayman.tile_config |= 1 << 4;
1013                         break;
1014                 case 2: /* sixteen banks */
1015                 default:
1016                         rdev->config.cayman.tile_config |= 2 << 4;
1017                         break;
1018                 }
1019         }
1020         rdev->config.cayman.tile_config |=
1021                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1022         rdev->config.cayman.tile_config |=
1023                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1024
1025         tmp = 0;
1026         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1027                 u32 rb_disable_bitmap;
1028
1029                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1030                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1031                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1032                 tmp <<= 4;
1033                 tmp |= rb_disable_bitmap;
1034         }
1035         /* enabled rb are just the one not disabled :) */
1036         disabled_rb_mask = tmp;
1037         tmp = 0;
1038         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1039                 tmp |= (1 << i);
1040         /* if all the backends are disabled, fix it up here */
1041         if ((disabled_rb_mask & tmp) == tmp) {
1042                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1043                         disabled_rb_mask &= ~(1 << i);
1044         }
1045
1046         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1047         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1048
1049         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1050         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1051         if (ASIC_IS_DCE6(rdev))
1052                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1053         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1054         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1055         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1056         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1057         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1058         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1059
1060         if ((rdev->config.cayman.max_backends_per_se == 1) &&
1061             (rdev->flags & RADEON_IS_IGP)) {
1062                 if ((disabled_rb_mask & 3) == 1) {
1063                         /* RB0 disabled, RB1 enabled */
1064                         tmp = 0x11111111;
1065                 } else {
1066                         /* RB1 disabled, RB0 enabled */
1067                         tmp = 0x00000000;
1068                 }
1069         } else {
1070                 tmp = gb_addr_config & NUM_PIPES_MASK;
1071                 tmp = r6xx_remap_render_backend(rdev, tmp,
1072                                                 rdev->config.cayman.max_backends_per_se *
1073                                                 rdev->config.cayman.max_shader_engines,
1074                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1075         }
1076         WREG32(GB_BACKEND_MAP, tmp);
1077
1078         cgts_tcc_disable = 0xffff0000;
1079         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1080                 cgts_tcc_disable &= ~(1 << (16 + i));
1081         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1082         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1083         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1084         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1085
1086         /* reprogram the shader complex */
1087         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1088         for (i = 0; i < 16; i++)
1089                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1090         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1091
1092         /* set HW defaults for 3D engine */
1093         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1094
1095         sx_debug_1 = RREG32(SX_DEBUG_1);
1096         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1097         WREG32(SX_DEBUG_1, sx_debug_1);
1098
1099         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1100         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1101         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1102         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1103
1104         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1105
1106         /* need to be explicitly zero-ed */
1107         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1108         WREG32(SQ_LSTMP_RING_BASE, 0);
1109         WREG32(SQ_HSTMP_RING_BASE, 0);
1110         WREG32(SQ_ESTMP_RING_BASE, 0);
1111         WREG32(SQ_GSTMP_RING_BASE, 0);
1112         WREG32(SQ_VSTMP_RING_BASE, 0);
1113         WREG32(SQ_PSTMP_RING_BASE, 0);
1114
1115         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1116
1117         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1118                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1119                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1120
1121         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1122                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1123                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1124
1125
1126         WREG32(VGT_NUM_INSTANCES, 1);
1127
1128         WREG32(CP_PERFMON_CNTL, 0);
1129
1130         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1131                                   FETCH_FIFO_HIWATER(0x4) |
1132                                   DONE_FIFO_HIWATER(0xe0) |
1133                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1134
1135         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1136         WREG32(SQ_CONFIG, (VC_ENABLE |
1137                            EXPORT_SRC_C |
1138                            GFX_PRIO(0) |
1139                            CS1_PRIO(0) |
1140                            CS2_PRIO(1)));
1141         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1142
1143         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1144                                           FORCE_EOV_MAX_REZ_CNT(255)));
1145
1146         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1147                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1148
1149         WREG32(VGT_GS_VERTEX_REUSE, 16);
1150         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1151
1152         WREG32(CB_PERF_CTR0_SEL_0, 0);
1153         WREG32(CB_PERF_CTR0_SEL_1, 0);
1154         WREG32(CB_PERF_CTR1_SEL_0, 0);
1155         WREG32(CB_PERF_CTR1_SEL_1, 0);
1156         WREG32(CB_PERF_CTR2_SEL_0, 0);
1157         WREG32(CB_PERF_CTR2_SEL_1, 0);
1158         WREG32(CB_PERF_CTR3_SEL_0, 0);
1159         WREG32(CB_PERF_CTR3_SEL_1, 0);
1160
1161         tmp = RREG32(HDP_MISC_CNTL);
1162         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1163         WREG32(HDP_MISC_CNTL, tmp);
1164
1165         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1166         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1167
1168         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1169
1170         udelay(50);
1171
1172         /* set clockgating golden values on TN */
1173         if (rdev->family == CHIP_ARUBA) {
1174                 tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1175                 tmp &= ~0x00380000;
1176                 WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1177                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1178                 tmp &= ~0x0e000000;
1179                 WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1180         }
1181 }
1182
1183 /*
1184  * GART
1185  */
1186 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1187 {
1188         /* flush hdp cache */
1189         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1190
1191         /* bits 0-7 are the VM contexts0-7 */
1192         WREG32(VM_INVALIDATE_REQUEST, 1);
1193 }
1194
1195 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1196 {
1197         int i, r;
1198
1199         if (rdev->gart.robj == NULL) {
1200                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1201                 return -EINVAL;
1202         }
1203         r = radeon_gart_table_vram_pin(rdev);
1204         if (r)
1205                 return r;
1206         radeon_gart_restore(rdev);
1207         /* Setup TLB control */
1208         WREG32(MC_VM_MX_L1_TLB_CNTL,
1209                (0xA << 7) |
1210                ENABLE_L1_TLB |
1211                ENABLE_L1_FRAGMENT_PROCESSING |
1212                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1213                ENABLE_ADVANCED_DRIVER_MODEL |
1214                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1215         /* Setup L2 cache */
1216         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1217                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1218                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1219                EFFECTIVE_L2_QUEUE_SIZE(7) |
1220                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1221         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1222         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1223                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1224         /* setup context0 */
1225         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1226         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1227         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1228         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1229                         (u32)(rdev->dummy_page.addr >> 12));
1230         WREG32(VM_CONTEXT0_CNTL2, 0);
1231         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1232                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1233
1234         WREG32(0x15D4, 0);
1235         WREG32(0x15D8, 0);
1236         WREG32(0x15DC, 0);
1237
1238         /* empty context1-7 */
1239         /* Assign the pt base to something valid for now; the pts used for
1240          * the VMs are determined by the application and setup and assigned
1241          * on the fly in the vm part of radeon_gart.c
1242          */
1243         for (i = 1; i < 8; i++) {
1244                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1245                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1246                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1247                         rdev->gart.table_addr >> 12);
1248         }
1249
1250         /* enable context1-7 */
1251         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1252                (u32)(rdev->dummy_page.addr >> 12));
1253         WREG32(VM_CONTEXT1_CNTL2, 4);
1254         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1255                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1256                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1257                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1258                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1259                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1260                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1261                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1262                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1263                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1264                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1265                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1266                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1267
1268         cayman_pcie_gart_tlb_flush(rdev);
1269         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1270                  (unsigned)(rdev->mc.gtt_size >> 20),
1271                  (unsigned long long)rdev->gart.table_addr);
1272         rdev->gart.ready = true;
1273         return 0;
1274 }
1275
1276 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1277 {
1278         /* Disable all tables */
1279         WREG32(VM_CONTEXT0_CNTL, 0);
1280         WREG32(VM_CONTEXT1_CNTL, 0);
1281         /* Setup TLB control */
1282         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1283                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1284                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1285         /* Setup L2 cache */
1286         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1287                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1288                EFFECTIVE_L2_QUEUE_SIZE(7) |
1289                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1290         WREG32(VM_L2_CNTL2, 0);
1291         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1292                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1293         radeon_gart_table_vram_unpin(rdev);
1294 }
1295
1296 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1297 {
1298         cayman_pcie_gart_disable(rdev);
1299         radeon_gart_table_vram_free(rdev);
1300         radeon_gart_fini(rdev);
1301 }
1302
1303 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1304                               int ring, u32 cp_int_cntl)
1305 {
1306         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1307
1308         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1309         WREG32(CP_INT_CNTL, cp_int_cntl);
1310 }
1311
1312 /*
1313  * CP.
1314  */
1315 void cayman_fence_ring_emit(struct radeon_device *rdev,
1316                             struct radeon_fence *fence)
1317 {
1318         struct radeon_ring *ring = &rdev->ring[fence->ring];
1319         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1320
1321         /* flush read cache over gart for this vmid */
1322         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1323         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1324         radeon_ring_write(ring, 0);
1325         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1326         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1327         radeon_ring_write(ring, 0xFFFFFFFF);
1328         radeon_ring_write(ring, 0);
1329         radeon_ring_write(ring, 10); /* poll interval */
1330         /* EVENT_WRITE_EOP - flush caches, send int */
1331         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1332         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1333         radeon_ring_write(ring, addr & 0xffffffff);
1334         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1335         radeon_ring_write(ring, fence->seq);
1336         radeon_ring_write(ring, 0);
1337 }
1338
1339 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1340 {
1341         struct radeon_ring *ring = &rdev->ring[ib->ring];
1342
1343         /* set to DX10/11 mode */
1344         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1345         radeon_ring_write(ring, 1);
1346
1347         if (ring->rptr_save_reg) {
1348                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1349                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1350                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1351                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1352                 radeon_ring_write(ring, next_rptr);
1353         }
1354
1355         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1356         radeon_ring_write(ring,
1357 #ifdef __BIG_ENDIAN
1358                           (2 << 0) |
1359 #endif
1360                           (ib->gpu_addr & 0xFFFFFFFC));
1361         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1362         radeon_ring_write(ring, ib->length_dw | 
1363                           (ib->vm ? (ib->vm->id << 24) : 0));
1364
1365         /* flush read cache over gart for this vmid */
1366         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1367         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1368         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1369         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1370         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1371         radeon_ring_write(ring, 0xFFFFFFFF);
1372         radeon_ring_write(ring, 0);
1373         radeon_ring_write(ring, 10); /* poll interval */
1374 }
1375
1376 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1377 {
1378         if (enable)
1379                 WREG32(CP_ME_CNTL, 0);
1380         else {
1381                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1382                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1383                 WREG32(SCRATCH_UMSK, 0);
1384                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1385         }
1386 }
1387
1388 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1389 {
1390         const __be32 *fw_data;
1391         int i;
1392
1393         if (!rdev->me_fw || !rdev->pfp_fw)
1394                 return -EINVAL;
1395
1396         cayman_cp_enable(rdev, false);
1397
1398         fw_data = (const __be32 *)rdev->pfp_fw->data;
1399         WREG32(CP_PFP_UCODE_ADDR, 0);
1400         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1401                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1402         WREG32(CP_PFP_UCODE_ADDR, 0);
1403
1404         fw_data = (const __be32 *)rdev->me_fw->data;
1405         WREG32(CP_ME_RAM_WADDR, 0);
1406         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1407                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1408
1409         WREG32(CP_PFP_UCODE_ADDR, 0);
1410         WREG32(CP_ME_RAM_WADDR, 0);
1411         WREG32(CP_ME_RAM_RADDR, 0);
1412         return 0;
1413 }
1414
1415 static int cayman_cp_start(struct radeon_device *rdev)
1416 {
1417         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1418         int r, i;
1419
1420         r = radeon_ring_lock(rdev, ring, 7);
1421         if (r) {
1422                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1423                 return r;
1424         }
1425         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1426         radeon_ring_write(ring, 0x1);
1427         radeon_ring_write(ring, 0x0);
1428         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1429         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1430         radeon_ring_write(ring, 0);
1431         radeon_ring_write(ring, 0);
1432         radeon_ring_unlock_commit(rdev, ring);
1433
1434         cayman_cp_enable(rdev, true);
1435
1436         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1437         if (r) {
1438                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1439                 return r;
1440         }
1441
1442         /* setup clear context state */
1443         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1444         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1445
1446         for (i = 0; i < cayman_default_size; i++)
1447                 radeon_ring_write(ring, cayman_default_state[i]);
1448
1449         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1450         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1451
1452         /* set clear context state */
1453         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1454         radeon_ring_write(ring, 0);
1455
1456         /* SQ_VTX_BASE_VTX_LOC */
1457         radeon_ring_write(ring, 0xc0026f00);
1458         radeon_ring_write(ring, 0x00000000);
1459         radeon_ring_write(ring, 0x00000000);
1460         radeon_ring_write(ring, 0x00000000);
1461
1462         /* Clear consts */
1463         radeon_ring_write(ring, 0xc0036f00);
1464         radeon_ring_write(ring, 0x00000bc4);
1465         radeon_ring_write(ring, 0xffffffff);
1466         radeon_ring_write(ring, 0xffffffff);
1467         radeon_ring_write(ring, 0xffffffff);
1468
1469         radeon_ring_write(ring, 0xc0026900);
1470         radeon_ring_write(ring, 0x00000316);
1471         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1472         radeon_ring_write(ring, 0x00000010); /*  */
1473
1474         radeon_ring_unlock_commit(rdev, ring);
1475
1476         /* XXX init other rings */
1477
1478         return 0;
1479 }
1480
1481 static void cayman_cp_fini(struct radeon_device *rdev)
1482 {
1483         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1484         cayman_cp_enable(rdev, false);
1485         radeon_ring_fini(rdev, ring);
1486         radeon_scratch_free(rdev, ring->rptr_save_reg);
1487 }
1488
1489 static int cayman_cp_resume(struct radeon_device *rdev)
1490 {
1491         static const int ridx[] = {
1492                 RADEON_RING_TYPE_GFX_INDEX,
1493                 CAYMAN_RING_TYPE_CP1_INDEX,
1494                 CAYMAN_RING_TYPE_CP2_INDEX
1495         };
1496         static const unsigned cp_rb_cntl[] = {
1497                 CP_RB0_CNTL,
1498                 CP_RB1_CNTL,
1499                 CP_RB2_CNTL,
1500         };
1501         static const unsigned cp_rb_rptr_addr[] = {
1502                 CP_RB0_RPTR_ADDR,
1503                 CP_RB1_RPTR_ADDR,
1504                 CP_RB2_RPTR_ADDR
1505         };
1506         static const unsigned cp_rb_rptr_addr_hi[] = {
1507                 CP_RB0_RPTR_ADDR_HI,
1508                 CP_RB1_RPTR_ADDR_HI,
1509                 CP_RB2_RPTR_ADDR_HI
1510         };
1511         static const unsigned cp_rb_base[] = {
1512                 CP_RB0_BASE,
1513                 CP_RB1_BASE,
1514                 CP_RB2_BASE
1515         };
1516         struct radeon_ring *ring;
1517         int i, r;
1518
1519         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1520         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1521                                  SOFT_RESET_PA |
1522                                  SOFT_RESET_SH |
1523                                  SOFT_RESET_VGT |
1524                                  SOFT_RESET_SPI |
1525                                  SOFT_RESET_SX));
1526         RREG32(GRBM_SOFT_RESET);
1527         mdelay(15);
1528         WREG32(GRBM_SOFT_RESET, 0);
1529         RREG32(GRBM_SOFT_RESET);
1530
1531         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1532         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1533
1534         /* Set the write pointer delay */
1535         WREG32(CP_RB_WPTR_DELAY, 0);
1536
1537         WREG32(CP_DEBUG, (1 << 27));
1538
1539         /* set the wb address whether it's enabled or not */
1540         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1541         WREG32(SCRATCH_UMSK, 0xff);
1542
1543         for (i = 0; i < 3; ++i) {
1544                 uint32_t rb_cntl;
1545                 uint64_t addr;
1546
1547                 /* Set ring buffer size */
1548                 ring = &rdev->ring[ridx[i]];
1549                 rb_cntl = drm_order(ring->ring_size / 8);
1550                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1551 #ifdef __BIG_ENDIAN
1552                 rb_cntl |= BUF_SWAP_32BIT;
1553 #endif
1554                 WREG32(cp_rb_cntl[i], rb_cntl);
1555
1556                 /* set the wb address whether it's enabled or not */
1557                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1558                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1559                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1560         }
1561
1562         /* set the rb base addr, this causes an internal reset of ALL rings */
1563         for (i = 0; i < 3; ++i) {
1564                 ring = &rdev->ring[ridx[i]];
1565                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1566         }
1567
1568         for (i = 0; i < 3; ++i) {
1569                 /* Initialize the ring buffer's read and write pointers */
1570                 ring = &rdev->ring[ridx[i]];
1571                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1572
1573                 ring->rptr = ring->wptr = 0;
1574                 WREG32(ring->rptr_reg, ring->rptr);
1575                 WREG32(ring->wptr_reg, ring->wptr);
1576
1577                 mdelay(1);
1578                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1579         }
1580
1581         /* start the rings */
1582         cayman_cp_start(rdev);
1583         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1584         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1585         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1586         /* this only test cp0 */
1587         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1588         if (r) {
1589                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1590                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1591                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1592                 return r;
1593         }
1594
1595         return 0;
1596 }
1597
1598 /*
1599  * DMA
1600  * Starting with R600, the GPU has an asynchronous
1601  * DMA engine.  The programming model is very similar
1602  * to the 3D engine (ring buffer, IBs, etc.), but the
1603  * DMA controller has it's own packet format that is
1604  * different form the PM4 format used by the 3D engine.
1605  * It supports copying data, writing embedded data,
1606  * solid fills, and a number of other things.  It also
1607  * has support for tiling/detiling of buffers.
1608  * Cayman and newer support two asynchronous DMA engines.
1609  */
1610 /**
1611  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1612  *
1613  * @rdev: radeon_device pointer
1614  * @ib: IB object to schedule
1615  *
1616  * Schedule an IB in the DMA ring (cayman-SI).
1617  */
1618 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1619                                 struct radeon_ib *ib)
1620 {
1621         struct radeon_ring *ring = &rdev->ring[ib->ring];
1622
1623         if (rdev->wb.enabled) {
1624                 u32 next_rptr = ring->wptr + 4;
1625                 while ((next_rptr & 7) != 5)
1626                         next_rptr++;
1627                 next_rptr += 3;
1628                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1629                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1630                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1631                 radeon_ring_write(ring, next_rptr);
1632         }
1633
1634         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1635          * Pad as necessary with NOPs.
1636          */
1637         while ((ring->wptr & 7) != 5)
1638                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1639         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1640         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1641         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1642
1643 }
1644
1645 /**
1646  * cayman_dma_stop - stop the async dma engines
1647  *
1648  * @rdev: radeon_device pointer
1649  *
1650  * Stop the async dma engines (cayman-SI).
1651  */
1652 void cayman_dma_stop(struct radeon_device *rdev)
1653 {
1654         u32 rb_cntl;
1655
1656         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1657
1658         /* dma0 */
1659         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1660         rb_cntl &= ~DMA_RB_ENABLE;
1661         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1662
1663         /* dma1 */
1664         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1665         rb_cntl &= ~DMA_RB_ENABLE;
1666         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1667
1668         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1669         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1670 }
1671
1672 /**
1673  * cayman_dma_resume - setup and start the async dma engines
1674  *
1675  * @rdev: radeon_device pointer
1676  *
1677  * Set up the DMA ring buffers and enable them. (cayman-SI).
1678  * Returns 0 for success, error for failure.
1679  */
1680 int cayman_dma_resume(struct radeon_device *rdev)
1681 {
1682         struct radeon_ring *ring;
1683         u32 rb_cntl, dma_cntl, ib_cntl;
1684         u32 rb_bufsz;
1685         u32 reg_offset, wb_offset;
1686         int i, r;
1687
1688         /* Reset dma */
1689         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1690         RREG32(SRBM_SOFT_RESET);
1691         udelay(50);
1692         WREG32(SRBM_SOFT_RESET, 0);
1693
1694         for (i = 0; i < 2; i++) {
1695                 if (i == 0) {
1696                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1697                         reg_offset = DMA0_REGISTER_OFFSET;
1698                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1699                 } else {
1700                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1701                         reg_offset = DMA1_REGISTER_OFFSET;
1702                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1703                 }
1704
1705                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1706                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1707
1708                 /* Set ring buffer size in dwords */
1709                 rb_bufsz = drm_order(ring->ring_size / 4);
1710                 rb_cntl = rb_bufsz << 1;
1711 #ifdef __BIG_ENDIAN
1712                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1713 #endif
1714                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1715
1716                 /* Initialize the ring buffer's read and write pointers */
1717                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1718                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1719
1720                 /* set the wb address whether it's enabled or not */
1721                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1722                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1723                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1724                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1725
1726                 if (rdev->wb.enabled)
1727                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1728
1729                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1730
1731                 /* enable DMA IBs */
1732                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1733 #ifdef __BIG_ENDIAN
1734                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1735 #endif
1736                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1737
1738                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1739                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1740                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1741
1742                 ring->wptr = 0;
1743                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1744
1745                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1746
1747                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1748
1749                 ring->ready = true;
1750
1751                 r = radeon_ring_test(rdev, ring->idx, ring);
1752                 if (r) {
1753                         ring->ready = false;
1754                         return r;
1755                 }
1756         }
1757
1758         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1759
1760         return 0;
1761 }
1762
1763 /**
1764  * cayman_dma_fini - tear down the async dma engines
1765  *
1766  * @rdev: radeon_device pointer
1767  *
1768  * Stop the async dma engines and free the rings (cayman-SI).
1769  */
1770 void cayman_dma_fini(struct radeon_device *rdev)
1771 {
1772         cayman_dma_stop(rdev);
1773         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1774         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1775 }
1776
1777 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1778 {
1779         u32 reset_mask = 0;
1780         u32 tmp;
1781
1782         /* GRBM_STATUS */
1783         tmp = RREG32(GRBM_STATUS);
1784         if (tmp & (PA_BUSY | SC_BUSY |
1785                    SH_BUSY | SX_BUSY |
1786                    TA_BUSY | VGT_BUSY |
1787                    DB_BUSY | CB_BUSY |
1788                    GDS_BUSY | SPI_BUSY |
1789                    IA_BUSY | IA_BUSY_NO_DMA))
1790                 reset_mask |= RADEON_RESET_GFX;
1791
1792         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1793                    CP_BUSY | CP_COHERENCY_BUSY))
1794                 reset_mask |= RADEON_RESET_CP;
1795
1796         if (tmp & GRBM_EE_BUSY)
1797                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1798
1799         /* DMA_STATUS_REG 0 */
1800         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1801         if (!(tmp & DMA_IDLE))
1802                 reset_mask |= RADEON_RESET_DMA;
1803
1804         /* DMA_STATUS_REG 1 */
1805         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1806         if (!(tmp & DMA_IDLE))
1807                 reset_mask |= RADEON_RESET_DMA1;
1808
1809         /* SRBM_STATUS2 */
1810         tmp = RREG32(SRBM_STATUS2);
1811         if (tmp & DMA_BUSY)
1812                 reset_mask |= RADEON_RESET_DMA;
1813
1814         if (tmp & DMA1_BUSY)
1815                 reset_mask |= RADEON_RESET_DMA1;
1816
1817         /* SRBM_STATUS */
1818         tmp = RREG32(SRBM_STATUS);
1819         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1820                 reset_mask |= RADEON_RESET_RLC;
1821
1822         if (tmp & IH_BUSY)
1823                 reset_mask |= RADEON_RESET_IH;
1824
1825         if (tmp & SEM_BUSY)
1826                 reset_mask |= RADEON_RESET_SEM;
1827
1828         if (tmp & GRBM_RQ_PENDING)
1829                 reset_mask |= RADEON_RESET_GRBM;
1830
1831         if (tmp & VMC_BUSY)
1832                 reset_mask |= RADEON_RESET_VMC;
1833
1834         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1835                    MCC_BUSY | MCD_BUSY))
1836                 reset_mask |= RADEON_RESET_MC;
1837
1838         if (evergreen_is_display_hung(rdev))
1839                 reset_mask |= RADEON_RESET_DISPLAY;
1840
1841         /* VM_L2_STATUS */
1842         tmp = RREG32(VM_L2_STATUS);
1843         if (tmp & L2_BUSY)
1844                 reset_mask |= RADEON_RESET_VMC;
1845
1846         /* Skip MC reset as it's mostly likely not hung, just busy */
1847         if (reset_mask & RADEON_RESET_MC) {
1848                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1849                 reset_mask &= ~RADEON_RESET_MC;
1850         }
1851
1852         return reset_mask;
1853 }
1854
1855 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1856 {
1857         struct evergreen_mc_save save;
1858         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1859         u32 tmp;
1860
1861         if (reset_mask == 0)
1862                 return;
1863
1864         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1865
1866         evergreen_print_gpu_status_regs(rdev);
1867         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1868                  RREG32(0x14F8));
1869         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1870                  RREG32(0x14D8));
1871         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1872                  RREG32(0x14FC));
1873         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1874                  RREG32(0x14DC));
1875
1876         /* Disable CP parsing/prefetching */
1877         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1878
1879         if (reset_mask & RADEON_RESET_DMA) {
1880                 /* dma0 */
1881                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1882                 tmp &= ~DMA_RB_ENABLE;
1883                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1884         }
1885
1886         if (reset_mask & RADEON_RESET_DMA1) {
1887                 /* dma1 */
1888                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1889                 tmp &= ~DMA_RB_ENABLE;
1890                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1891         }
1892
1893         udelay(50);
1894
1895         evergreen_mc_stop(rdev, &save);
1896         if (evergreen_mc_wait_for_idle(rdev)) {
1897                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1898         }
1899
1900         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1901                 grbm_soft_reset = SOFT_RESET_CB |
1902                         SOFT_RESET_DB |
1903                         SOFT_RESET_GDS |
1904                         SOFT_RESET_PA |
1905                         SOFT_RESET_SC |
1906                         SOFT_RESET_SPI |
1907                         SOFT_RESET_SH |
1908                         SOFT_RESET_SX |
1909                         SOFT_RESET_TC |
1910                         SOFT_RESET_TA |
1911                         SOFT_RESET_VGT |
1912                         SOFT_RESET_IA;
1913         }
1914
1915         if (reset_mask & RADEON_RESET_CP) {
1916                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1917
1918                 srbm_soft_reset |= SOFT_RESET_GRBM;
1919         }
1920
1921         if (reset_mask & RADEON_RESET_DMA)
1922                 srbm_soft_reset |= SOFT_RESET_DMA;
1923
1924         if (reset_mask & RADEON_RESET_DMA1)
1925                 srbm_soft_reset |= SOFT_RESET_DMA1;
1926
1927         if (reset_mask & RADEON_RESET_DISPLAY)
1928                 srbm_soft_reset |= SOFT_RESET_DC;
1929
1930         if (reset_mask & RADEON_RESET_RLC)
1931                 srbm_soft_reset |= SOFT_RESET_RLC;
1932
1933         if (reset_mask & RADEON_RESET_SEM)
1934                 srbm_soft_reset |= SOFT_RESET_SEM;
1935
1936         if (reset_mask & RADEON_RESET_IH)
1937                 srbm_soft_reset |= SOFT_RESET_IH;
1938
1939         if (reset_mask & RADEON_RESET_GRBM)
1940                 srbm_soft_reset |= SOFT_RESET_GRBM;
1941
1942         if (reset_mask & RADEON_RESET_VMC)
1943                 srbm_soft_reset |= SOFT_RESET_VMC;
1944
1945         if (!(rdev->flags & RADEON_IS_IGP)) {
1946                 if (reset_mask & RADEON_RESET_MC)
1947                         srbm_soft_reset |= SOFT_RESET_MC;
1948         }
1949
1950         if (grbm_soft_reset) {
1951                 tmp = RREG32(GRBM_SOFT_RESET);
1952                 tmp |= grbm_soft_reset;
1953                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1954                 WREG32(GRBM_SOFT_RESET, tmp);
1955                 tmp = RREG32(GRBM_SOFT_RESET);
1956
1957                 udelay(50);
1958
1959                 tmp &= ~grbm_soft_reset;
1960                 WREG32(GRBM_SOFT_RESET, tmp);
1961                 tmp = RREG32(GRBM_SOFT_RESET);
1962         }
1963
1964         if (srbm_soft_reset) {
1965                 tmp = RREG32(SRBM_SOFT_RESET);
1966                 tmp |= srbm_soft_reset;
1967                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1968                 WREG32(SRBM_SOFT_RESET, tmp);
1969                 tmp = RREG32(SRBM_SOFT_RESET);
1970
1971                 udelay(50);
1972
1973                 tmp &= ~srbm_soft_reset;
1974                 WREG32(SRBM_SOFT_RESET, tmp);
1975                 tmp = RREG32(SRBM_SOFT_RESET);
1976         }
1977
1978         /* Wait a little for things to settle down */
1979         udelay(50);
1980
1981         evergreen_mc_resume(rdev, &save);
1982         udelay(50);
1983
1984         evergreen_print_gpu_status_regs(rdev);
1985 }
1986
1987 int cayman_asic_reset(struct radeon_device *rdev)
1988 {
1989         u32 reset_mask;
1990
1991         reset_mask = cayman_gpu_check_soft_reset(rdev);
1992
1993         if (reset_mask)
1994                 r600_set_bios_scratch_engine_hung(rdev, true);
1995
1996         cayman_gpu_soft_reset(rdev, reset_mask);
1997
1998         reset_mask = cayman_gpu_check_soft_reset(rdev);
1999
2000         if (!reset_mask)
2001                 r600_set_bios_scratch_engine_hung(rdev, false);
2002
2003         return 0;
2004 }
2005
2006 /**
2007  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
2008  *
2009  * @rdev: radeon_device pointer
2010  * @ring: radeon_ring structure holding ring information
2011  *
2012  * Check if the GFX engine is locked up.
2013  * Returns true if the engine appears to be locked up, false if not.
2014  */
2015 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2016 {
2017         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2018
2019         if (!(reset_mask & (RADEON_RESET_GFX |
2020                             RADEON_RESET_COMPUTE |
2021                             RADEON_RESET_CP))) {
2022                 radeon_ring_lockup_update(ring);
2023                 return false;
2024         }
2025         /* force CP activities */
2026         radeon_ring_force_activity(rdev, ring);
2027         return radeon_ring_test_lockup(rdev, ring);
2028 }
2029
2030 /**
2031  * cayman_dma_is_lockup - Check if the DMA engine is locked up
2032  *
2033  * @rdev: radeon_device pointer
2034  * @ring: radeon_ring structure holding ring information
2035  *
2036  * Check if the async DMA engine is locked up.
2037  * Returns true if the engine appears to be locked up, false if not.
2038  */
2039 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2040 {
2041         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2042         u32 mask;
2043
2044         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2045                 mask = RADEON_RESET_DMA;
2046         else
2047                 mask = RADEON_RESET_DMA1;
2048
2049         if (!(reset_mask & mask)) {
2050                 radeon_ring_lockup_update(ring);
2051                 return false;
2052         }
2053         /* force ring activities */
2054         radeon_ring_force_activity(rdev, ring);
2055         return radeon_ring_test_lockup(rdev, ring);
2056 }
2057
2058 static int cayman_startup(struct radeon_device *rdev)
2059 {
2060         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2061         int r;
2062
2063         /* enable pcie gen2 link */
2064         evergreen_pcie_gen2_enable(rdev);
2065         /* enable aspm */
2066         evergreen_program_aspm(rdev);
2067
2068         evergreen_mc_program(rdev);
2069
2070         if (rdev->flags & RADEON_IS_IGP) {
2071                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
2072                         r = ni_init_microcode(rdev);
2073                         if (r) {
2074                                 DRM_ERROR("Failed to load firmware!\n");
2075                                 return r;
2076                         }
2077                 }
2078         } else {
2079                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
2080                         r = ni_init_microcode(rdev);
2081                         if (r) {
2082                                 DRM_ERROR("Failed to load firmware!\n");
2083                                 return r;
2084                         }
2085                 }
2086
2087                 r = ni_mc_load_microcode(rdev);
2088                 if (r) {
2089                         DRM_ERROR("Failed to load MC firmware!\n");
2090                         return r;
2091                 }
2092         }
2093
2094         r = r600_vram_scratch_init(rdev);
2095         if (r)
2096                 return r;
2097
2098         r = cayman_pcie_gart_enable(rdev);
2099         if (r)
2100                 return r;
2101         cayman_gpu_init(rdev);
2102
2103         /* allocate rlc buffers */
2104         if (rdev->flags & RADEON_IS_IGP) {
2105                 rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
2106                 rdev->rlc.reg_list_size =
2107                         (u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
2108                 rdev->rlc.cs_data = cayman_cs_data;
2109                 r = sumo_rlc_init(rdev);
2110                 if (r) {
2111                         DRM_ERROR("Failed to init rlc BOs!\n");
2112                         return r;
2113                 }
2114         }
2115
2116         /* allocate wb buffer */
2117         r = radeon_wb_init(rdev);
2118         if (r)
2119                 return r;
2120
2121         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2122         if (r) {
2123                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2124                 return r;
2125         }
2126
2127         r = uvd_v2_2_resume(rdev);
2128         if (!r) {
2129                 r = radeon_fence_driver_start_ring(rdev,
2130                                                    R600_RING_TYPE_UVD_INDEX);
2131                 if (r)
2132                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2133         }
2134         if (r)
2135                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2136
2137         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2138         if (r) {
2139                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2140                 return r;
2141         }
2142
2143         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2144         if (r) {
2145                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2146                 return r;
2147         }
2148
2149         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2150         if (r) {
2151                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2152                 return r;
2153         }
2154
2155         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2156         if (r) {
2157                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2158                 return r;
2159         }
2160
2161         /* Enable IRQ */
2162         if (!rdev->irq.installed) {
2163                 r = radeon_irq_kms_init(rdev);
2164                 if (r)
2165                         return r;
2166         }
2167
2168         r = r600_irq_init(rdev);
2169         if (r) {
2170                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2171                 radeon_irq_kms_fini(rdev);
2172                 return r;
2173         }
2174         evergreen_irq_set(rdev);
2175
2176         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2177                              CP_RB0_RPTR, CP_RB0_WPTR,
2178                              RADEON_CP_PACKET2);
2179         if (r)
2180                 return r;
2181
2182         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2183         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2184                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2185                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2186                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2187         if (r)
2188                 return r;
2189
2190         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2191         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2192                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2193                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2194                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2195         if (r)
2196                 return r;
2197
2198         r = cayman_cp_load_microcode(rdev);
2199         if (r)
2200                 return r;
2201         r = cayman_cp_resume(rdev);
2202         if (r)
2203                 return r;
2204
2205         r = cayman_dma_resume(rdev);
2206         if (r)
2207                 return r;
2208
2209         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2210         if (ring->ring_size) {
2211                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
2212                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2213                                      RADEON_CP_PACKET2);
2214                 if (!r)
2215                         r = uvd_v1_0_init(rdev);
2216                 if (r)
2217                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2218         }
2219
2220         r = radeon_ib_pool_init(rdev);
2221         if (r) {
2222                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2223                 return r;
2224         }
2225
2226         r = radeon_vm_manager_init(rdev);
2227         if (r) {
2228                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2229                 return r;
2230         }
2231
2232         r = r600_audio_init(rdev);
2233         if (r)
2234                 return r;
2235
2236         return 0;
2237 }
2238
2239 int cayman_resume(struct radeon_device *rdev)
2240 {
2241         int r;
2242
2243         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2244          * posting will perform necessary task to bring back GPU into good
2245          * shape.
2246          */
2247         /* post card */
2248         atom_asic_init(rdev->mode_info.atom_context);
2249
2250         /* init golden registers */
2251         ni_init_golden_registers(rdev);
2252
2253         rdev->accel_working = true;
2254         r = cayman_startup(rdev);
2255         if (r) {
2256                 DRM_ERROR("cayman startup failed on resume\n");
2257                 rdev->accel_working = false;
2258                 return r;
2259         }
2260         return r;
2261 }
2262
2263 int cayman_suspend(struct radeon_device *rdev)
2264 {
2265         r600_audio_fini(rdev);
2266         radeon_vm_manager_fini(rdev);
2267         cayman_cp_enable(rdev, false);
2268         cayman_dma_stop(rdev);
2269         uvd_v1_0_fini(rdev);
2270         radeon_uvd_suspend(rdev);
2271         evergreen_irq_suspend(rdev);
2272         radeon_wb_disable(rdev);
2273         cayman_pcie_gart_disable(rdev);
2274         return 0;
2275 }
2276
2277 /* Plan is to move initialization in that function and use
2278  * helper function so that radeon_device_init pretty much
2279  * do nothing more than calling asic specific function. This
2280  * should also allow to remove a bunch of callback function
2281  * like vram_info.
2282  */
2283 int cayman_init(struct radeon_device *rdev)
2284 {
2285         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2286         int r;
2287
2288         /* Read BIOS */
2289         if (!radeon_get_bios(rdev)) {
2290                 if (ASIC_IS_AVIVO(rdev))
2291                         return -EINVAL;
2292         }
2293         /* Must be an ATOMBIOS */
2294         if (!rdev->is_atom_bios) {
2295                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2296                 return -EINVAL;
2297         }
2298         r = radeon_atombios_init(rdev);
2299         if (r)
2300                 return r;
2301
2302         /* Post card if necessary */
2303         if (!radeon_card_posted(rdev)) {
2304                 if (!rdev->bios) {
2305                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2306                         return -EINVAL;
2307                 }
2308                 DRM_INFO("GPU not posted. posting now...\n");
2309                 atom_asic_init(rdev->mode_info.atom_context);
2310         }
2311         /* init golden registers */
2312         ni_init_golden_registers(rdev);
2313         /* Initialize scratch registers */
2314         r600_scratch_init(rdev);
2315         /* Initialize surface registers */
2316         radeon_surface_init(rdev);
2317         /* Initialize clocks */
2318         radeon_get_clock_info(rdev->ddev);
2319         /* Fence driver */
2320         r = radeon_fence_driver_init(rdev);
2321         if (r)
2322                 return r;
2323         /* initialize memory controller */
2324         r = evergreen_mc_init(rdev);
2325         if (r)
2326                 return r;
2327         /* Memory manager */
2328         r = radeon_bo_init(rdev);
2329         if (r)
2330                 return r;
2331
2332         ring->ring_obj = NULL;
2333         r600_ring_init(rdev, ring, 1024 * 1024);
2334
2335         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2336         ring->ring_obj = NULL;
2337         r600_ring_init(rdev, ring, 64 * 1024);
2338
2339         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2340         ring->ring_obj = NULL;
2341         r600_ring_init(rdev, ring, 64 * 1024);
2342
2343         r = radeon_uvd_init(rdev);
2344         if (!r) {
2345                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2346                 ring->ring_obj = NULL;
2347                 r600_ring_init(rdev, ring, 4096);
2348         }
2349
2350         rdev->ih.ring_obj = NULL;
2351         r600_ih_ring_init(rdev, 64 * 1024);
2352
2353         r = r600_pcie_gart_init(rdev);
2354         if (r)
2355                 return r;
2356
2357         rdev->accel_working = true;
2358         r = cayman_startup(rdev);
2359         if (r) {
2360                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2361                 cayman_cp_fini(rdev);
2362                 cayman_dma_fini(rdev);
2363                 r600_irq_fini(rdev);
2364                 if (rdev->flags & RADEON_IS_IGP)
2365                         sumo_rlc_fini(rdev);
2366                 radeon_wb_fini(rdev);
2367                 radeon_ib_pool_fini(rdev);
2368                 radeon_vm_manager_fini(rdev);
2369                 radeon_irq_kms_fini(rdev);
2370                 cayman_pcie_gart_fini(rdev);
2371                 rdev->accel_working = false;
2372         }
2373
2374         /* Don't start up if the MC ucode is missing.
2375          * The default clocks and voltages before the MC ucode
2376          * is loaded are not suffient for advanced operations.
2377          *
2378          * We can skip this check for TN, because there is no MC
2379          * ucode.
2380          */
2381         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2382                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2383                 return -EINVAL;
2384         }
2385
2386         return 0;
2387 }
2388
2389 void cayman_fini(struct radeon_device *rdev)
2390 {
2391         cayman_cp_fini(rdev);
2392         cayman_dma_fini(rdev);
2393         r600_irq_fini(rdev);
2394         if (rdev->flags & RADEON_IS_IGP)
2395                 sumo_rlc_fini(rdev);
2396         radeon_wb_fini(rdev);
2397         radeon_vm_manager_fini(rdev);
2398         radeon_ib_pool_fini(rdev);
2399         radeon_irq_kms_fini(rdev);
2400         uvd_v1_0_fini(rdev);
2401         radeon_uvd_fini(rdev);
2402         cayman_pcie_gart_fini(rdev);
2403         r600_vram_scratch_fini(rdev);
2404         radeon_gem_fini(rdev);
2405         radeon_fence_driver_fini(rdev);
2406         radeon_bo_fini(rdev);
2407         radeon_atombios_fini(rdev);
2408         kfree(rdev->bios);
2409         rdev->bios = NULL;
2410 }
2411
2412 /*
2413  * vm
2414  */
2415 int cayman_vm_init(struct radeon_device *rdev)
2416 {
2417         /* number of VMs */
2418         rdev->vm_manager.nvm = 8;
2419         /* base offset of vram pages */
2420         if (rdev->flags & RADEON_IS_IGP) {
2421                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2422                 tmp <<= 22;
2423                 rdev->vm_manager.vram_base_offset = tmp;
2424         } else
2425                 rdev->vm_manager.vram_base_offset = 0;
2426         return 0;
2427 }
2428
2429 void cayman_vm_fini(struct radeon_device *rdev)
2430 {
2431 }
2432
2433 /**
2434  * cayman_vm_decode_fault - print human readable fault info
2435  *
2436  * @rdev: radeon_device pointer
2437  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2438  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2439  *
2440  * Print human readable fault information (cayman/TN).
2441  */
2442 void cayman_vm_decode_fault(struct radeon_device *rdev,
2443                             u32 status, u32 addr)
2444 {
2445         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2446         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2447         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2448         char *block;
2449
2450         switch (mc_id) {
2451         case 32:
2452         case 16:
2453         case 96:
2454         case 80:
2455         case 160:
2456         case 144:
2457         case 224:
2458         case 208:
2459                 block = "CB";
2460                 break;
2461         case 33:
2462         case 17:
2463         case 97:
2464         case 81:
2465         case 161:
2466         case 145:
2467         case 225:
2468         case 209:
2469                 block = "CB_FMASK";
2470                 break;
2471         case 34:
2472         case 18:
2473         case 98:
2474         case 82:
2475         case 162:
2476         case 146:
2477         case 226:
2478         case 210:
2479                 block = "CB_CMASK";
2480                 break;
2481         case 35:
2482         case 19:
2483         case 99:
2484         case 83:
2485         case 163:
2486         case 147:
2487         case 227:
2488         case 211:
2489                 block = "CB_IMMED";
2490                 break;
2491         case 36:
2492         case 20:
2493         case 100:
2494         case 84:
2495         case 164:
2496         case 148:
2497         case 228:
2498         case 212:
2499                 block = "DB";
2500                 break;
2501         case 37:
2502         case 21:
2503         case 101:
2504         case 85:
2505         case 165:
2506         case 149:
2507         case 229:
2508         case 213:
2509                 block = "DB_HTILE";
2510                 break;
2511         case 38:
2512         case 22:
2513         case 102:
2514         case 86:
2515         case 166:
2516         case 150:
2517         case 230:
2518         case 214:
2519                 block = "SX";
2520                 break;
2521         case 39:
2522         case 23:
2523         case 103:
2524         case 87:
2525         case 167:
2526         case 151:
2527         case 231:
2528         case 215:
2529                 block = "DB_STEN";
2530                 break;
2531         case 40:
2532         case 24:
2533         case 104:
2534         case 88:
2535         case 232:
2536         case 216:
2537         case 168:
2538         case 152:
2539                 block = "TC_TFETCH";
2540                 break;
2541         case 41:
2542         case 25:
2543         case 105:
2544         case 89:
2545         case 233:
2546         case 217:
2547         case 169:
2548         case 153:
2549                 block = "TC_VFETCH";
2550                 break;
2551         case 42:
2552         case 26:
2553         case 106:
2554         case 90:
2555         case 234:
2556         case 218:
2557         case 170:
2558         case 154:
2559                 block = "VC";
2560                 break;
2561         case 112:
2562                 block = "CP";
2563                 break;
2564         case 113:
2565         case 114:
2566                 block = "SH";
2567                 break;
2568         case 115:
2569                 block = "VGT";
2570                 break;
2571         case 178:
2572                 block = "IH";
2573                 break;
2574         case 51:
2575                 block = "RLC";
2576                 break;
2577         case 55:
2578                 block = "DMA";
2579                 break;
2580         case 56:
2581                 block = "HDP";
2582                 break;
2583         default:
2584                 block = "unknown";
2585                 break;
2586         }
2587
2588         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2589                protections, vmid, addr,
2590                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2591                block, mc_id);
2592 }
2593
2594 #define R600_ENTRY_VALID   (1 << 0)
2595 #define R600_PTE_SYSTEM    (1 << 1)
2596 #define R600_PTE_SNOOPED   (1 << 2)
2597 #define R600_PTE_READABLE  (1 << 5)
2598 #define R600_PTE_WRITEABLE (1 << 6)
2599
2600 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2601 {
2602         uint32_t r600_flags = 0;
2603         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2604         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2605         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2606         if (flags & RADEON_VM_PAGE_SYSTEM) {
2607                 r600_flags |= R600_PTE_SYSTEM;
2608                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2609         }
2610         return r600_flags;
2611 }
2612
2613 /**
2614  * cayman_vm_set_page - update the page tables using the CP
2615  *
2616  * @rdev: radeon_device pointer
2617  * @ib: indirect buffer to fill with commands
2618  * @pe: addr of the page entry
2619  * @addr: dst addr to write into pe
2620  * @count: number of page entries to update
2621  * @incr: increase next addr by incr bytes
2622  * @flags: access flags
2623  *
2624  * Update the page tables using the CP (cayman/TN).
2625  */
2626 void cayman_vm_set_page(struct radeon_device *rdev,
2627                         struct radeon_ib *ib,
2628                         uint64_t pe,
2629                         uint64_t addr, unsigned count,
2630                         uint32_t incr, uint32_t flags)
2631 {
2632         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2633         uint64_t value;
2634         unsigned ndw;
2635
2636         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2637                 while (count) {
2638                         ndw = 1 + count * 2;
2639                         if (ndw > 0x3FFF)
2640                                 ndw = 0x3FFF;
2641
2642                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2643                         ib->ptr[ib->length_dw++] = pe;
2644                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2645                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2646                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2647                                         value = radeon_vm_map_gart(rdev, addr);
2648                                         value &= 0xFFFFFFFFFFFFF000ULL;
2649                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2650                                         value = addr;
2651                                 } else {
2652                                         value = 0;
2653                                 }
2654                                 addr += incr;
2655                                 value |= r600_flags;
2656                                 ib->ptr[ib->length_dw++] = value;
2657                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2658                         }
2659                 }
2660         } else {
2661                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2662                     (count == 1)) {
2663                         while (count) {
2664                                 ndw = count * 2;
2665                                 if (ndw > 0xFFFFE)
2666                                         ndw = 0xFFFFE;
2667
2668                                 /* for non-physically contiguous pages (system) */
2669                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2670                                 ib->ptr[ib->length_dw++] = pe;
2671                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2672                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2673                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2674                                                 value = radeon_vm_map_gart(rdev, addr);
2675                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2676                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2677                                                 value = addr;
2678                                         } else {
2679                                                 value = 0;
2680                                         }
2681                                         addr += incr;
2682                                         value |= r600_flags;
2683                                         ib->ptr[ib->length_dw++] = value;
2684                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2685                                 }
2686                         }
2687                         while (ib->length_dw & 0x7)
2688                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2689                 } else {
2690                         while (count) {
2691                                 ndw = count * 2;
2692                                 if (ndw > 0xFFFFE)
2693                                         ndw = 0xFFFFE;
2694
2695                                 if (flags & RADEON_VM_PAGE_VALID)
2696                                         value = addr;
2697                                 else
2698                                         value = 0;
2699                                 /* for physically contiguous pages (vram) */
2700                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2701                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2702                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2703                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2704                                 ib->ptr[ib->length_dw++] = 0;
2705                                 ib->ptr[ib->length_dw++] = value; /* value */
2706                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2707                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2708                                 ib->ptr[ib->length_dw++] = 0;
2709                                 pe += ndw * 4;
2710                                 addr += (ndw / 2) * incr;
2711                                 count -= ndw / 2;
2712                         }
2713                 }
2714                 while (ib->length_dw & 0x7)
2715                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2716         }
2717 }
2718
2719 /**
2720  * cayman_vm_flush - vm flush using the CP
2721  *
2722  * @rdev: radeon_device pointer
2723  *
2724  * Update the page table base and flush the VM TLB
2725  * using the CP (cayman-si).
2726  */
2727 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2728 {
2729         struct radeon_ring *ring = &rdev->ring[ridx];
2730
2731         if (vm == NULL)
2732                 return;
2733
2734         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2735         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2736
2737         /* flush hdp cache */
2738         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2739         radeon_ring_write(ring, 0x1);
2740
2741         /* bits 0-7 are the VM contexts0-7 */
2742         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2743         radeon_ring_write(ring, 1 << vm->id);
2744
2745         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2746         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2747         radeon_ring_write(ring, 0x0);
2748 }
2749
2750 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2751 {
2752         struct radeon_ring *ring = &rdev->ring[ridx];
2753
2754         if (vm == NULL)
2755                 return;
2756
2757         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2758         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2759         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2760
2761         /* flush hdp cache */
2762         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2763         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2764         radeon_ring_write(ring, 1);
2765
2766         /* bits 0-7 are the VM contexts0-7 */
2767         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2768         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2769         radeon_ring_write(ring, 1 << vm->id);
2770 }
2771