drm/radeon: remove special handling for the DMA ring
[linux-2.6-block.git] / drivers / gpu / drm / radeon / ni.c
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "nid.h"
32 #include "atom.h"
33 #include "ni_reg.h"
34 #include "cayman_blit_shaders.h"
35 #include "radeon_ucode.h"
36 #include "clearstate_cayman.h"
37
38 static const u32 tn_rlc_save_restore_register_list[] =
39 {
40         0x98fc,
41         0x98f0,
42         0x9834,
43         0x9838,
44         0x9870,
45         0x9874,
46         0x8a14,
47         0x8b24,
48         0x8bcc,
49         0x8b10,
50         0x8c30,
51         0x8d00,
52         0x8d04,
53         0x8c00,
54         0x8c04,
55         0x8c10,
56         0x8c14,
57         0x8d8c,
58         0x8cf0,
59         0x8e38,
60         0x9508,
61         0x9688,
62         0x9608,
63         0x960c,
64         0x9610,
65         0x9614,
66         0x88c4,
67         0x8978,
68         0x88d4,
69         0x900c,
70         0x9100,
71         0x913c,
72         0x90e8,
73         0x9354,
74         0xa008,
75         0x98f8,
76         0x9148,
77         0x914c,
78         0x3f94,
79         0x98f4,
80         0x9b7c,
81         0x3f8c,
82         0x8950,
83         0x8954,
84         0x8a18,
85         0x8b28,
86         0x9144,
87         0x3f90,
88         0x915c,
89         0x9160,
90         0x9178,
91         0x917c,
92         0x9180,
93         0x918c,
94         0x9190,
95         0x9194,
96         0x9198,
97         0x919c,
98         0x91a8,
99         0x91ac,
100         0x91b0,
101         0x91b4,
102         0x91b8,
103         0x91c4,
104         0x91c8,
105         0x91cc,
106         0x91d0,
107         0x91d4,
108         0x91e0,
109         0x91e4,
110         0x91ec,
111         0x91f0,
112         0x91f4,
113         0x9200,
114         0x9204,
115         0x929c,
116         0x8030,
117         0x9150,
118         0x9a60,
119         0x920c,
120         0x9210,
121         0x9228,
122         0x922c,
123         0x9244,
124         0x9248,
125         0x91e8,
126         0x9294,
127         0x9208,
128         0x9224,
129         0x9240,
130         0x9220,
131         0x923c,
132         0x9258,
133         0x9744,
134         0xa200,
135         0xa204,
136         0xa208,
137         0xa20c,
138         0x8d58,
139         0x9030,
140         0x9034,
141         0x9038,
142         0x903c,
143         0x9040,
144         0x9654,
145         0x897c,
146         0xa210,
147         0xa214,
148         0x9868,
149         0xa02c,
150         0x9664,
151         0x9698,
152         0x949c,
153         0x8e10,
154         0x8e18,
155         0x8c50,
156         0x8c58,
157         0x8c60,
158         0x8c68,
159         0x89b4,
160         0x9830,
161         0x802c,
162 };
163
164 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
165 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
166 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
167 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
168 extern int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
169 extern void evergreen_mc_program(struct radeon_device *rdev);
170 extern void evergreen_irq_suspend(struct radeon_device *rdev);
171 extern int evergreen_mc_init(struct radeon_device *rdev);
172 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
173 extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
174 extern void evergreen_program_aspm(struct radeon_device *rdev);
175 extern void sumo_rlc_fini(struct radeon_device *rdev);
176 extern int sumo_rlc_init(struct radeon_device *rdev);
177
178 /* Firmware Names */
179 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
180 MODULE_FIRMWARE("radeon/BARTS_me.bin");
181 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
182 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
183 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
184 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
185 MODULE_FIRMWARE("radeon/TURKS_me.bin");
186 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
187 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
188 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
189 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
190 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
191 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
192 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
193 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
194 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
195 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
196 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
197 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
198 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
199 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
200
201
202 static const u32 cayman_golden_registers2[] =
203 {
204         0x3e5c, 0xffffffff, 0x00000000,
205         0x3e48, 0xffffffff, 0x00000000,
206         0x3e4c, 0xffffffff, 0x00000000,
207         0x3e64, 0xffffffff, 0x00000000,
208         0x3e50, 0xffffffff, 0x00000000,
209         0x3e60, 0xffffffff, 0x00000000
210 };
211
212 static const u32 cayman_golden_registers[] =
213 {
214         0x5eb4, 0xffffffff, 0x00000002,
215         0x5e78, 0x8f311ff1, 0x001000f0,
216         0x3f90, 0xffff0000, 0xff000000,
217         0x9148, 0xffff0000, 0xff000000,
218         0x3f94, 0xffff0000, 0xff000000,
219         0x914c, 0xffff0000, 0xff000000,
220         0xc78, 0x00000080, 0x00000080,
221         0xbd4, 0x70073777, 0x00011003,
222         0xd02c, 0xbfffff1f, 0x08421000,
223         0xd0b8, 0x73773777, 0x02011003,
224         0x5bc0, 0x00200000, 0x50100000,
225         0x98f8, 0x33773777, 0x02011003,
226         0x98fc, 0xffffffff, 0x76541032,
227         0x7030, 0x31000311, 0x00000011,
228         0x2f48, 0x33773777, 0x42010001,
229         0x6b28, 0x00000010, 0x00000012,
230         0x7728, 0x00000010, 0x00000012,
231         0x10328, 0x00000010, 0x00000012,
232         0x10f28, 0x00000010, 0x00000012,
233         0x11b28, 0x00000010, 0x00000012,
234         0x12728, 0x00000010, 0x00000012,
235         0x240c, 0x000007ff, 0x00000000,
236         0x8a14, 0xf000001f, 0x00000007,
237         0x8b24, 0x3fff3fff, 0x00ff0fff,
238         0x8b10, 0x0000ff0f, 0x00000000,
239         0x28a4c, 0x07ffffff, 0x06000000,
240         0x10c, 0x00000001, 0x00010003,
241         0xa02c, 0xffffffff, 0x0000009b,
242         0x913c, 0x0000010f, 0x01000100,
243         0x8c04, 0xf8ff00ff, 0x40600060,
244         0x28350, 0x00000f01, 0x00000000,
245         0x9508, 0x3700001f, 0x00000002,
246         0x960c, 0xffffffff, 0x54763210,
247         0x88c4, 0x001f3ae3, 0x00000082,
248         0x88d0, 0xffffffff, 0x0f40df40,
249         0x88d4, 0x0000001f, 0x00000010,
250         0x8974, 0xffffffff, 0x00000000
251 };
252
253 static const u32 dvst_golden_registers2[] =
254 {
255         0x8f8, 0xffffffff, 0,
256         0x8fc, 0x00380000, 0,
257         0x8f8, 0xffffffff, 1,
258         0x8fc, 0x0e000000, 0
259 };
260
261 static const u32 dvst_golden_registers[] =
262 {
263         0x690, 0x3fff3fff, 0x20c00033,
264         0x918c, 0x0fff0fff, 0x00010006,
265         0x91a8, 0x0fff0fff, 0x00010006,
266         0x9150, 0xffffdfff, 0x6e944040,
267         0x917c, 0x0fff0fff, 0x00030002,
268         0x9198, 0x0fff0fff, 0x00030002,
269         0x915c, 0x0fff0fff, 0x00010000,
270         0x3f90, 0xffff0001, 0xff000000,
271         0x9178, 0x0fff0fff, 0x00070000,
272         0x9194, 0x0fff0fff, 0x00070000,
273         0x9148, 0xffff0001, 0xff000000,
274         0x9190, 0x0fff0fff, 0x00090008,
275         0x91ac, 0x0fff0fff, 0x00090008,
276         0x3f94, 0xffff0000, 0xff000000,
277         0x914c, 0xffff0000, 0xff000000,
278         0x929c, 0x00000fff, 0x00000001,
279         0x55e4, 0xff607fff, 0xfc000100,
280         0x8a18, 0xff000fff, 0x00000100,
281         0x8b28, 0xff000fff, 0x00000100,
282         0x9144, 0xfffc0fff, 0x00000100,
283         0x6ed8, 0x00010101, 0x00010000,
284         0x9830, 0xffffffff, 0x00000000,
285         0x9834, 0xf00fffff, 0x00000400,
286         0x9838, 0xfffffffe, 0x00000000,
287         0xd0c0, 0xff000fff, 0x00000100,
288         0xd02c, 0xbfffff1f, 0x08421000,
289         0xd0b8, 0x73773777, 0x12010001,
290         0x5bb0, 0x000000f0, 0x00000070,
291         0x98f8, 0x73773777, 0x12010001,
292         0x98fc, 0xffffffff, 0x00000010,
293         0x9b7c, 0x00ff0000, 0x00fc0000,
294         0x8030, 0x00001f0f, 0x0000100a,
295         0x2f48, 0x73773777, 0x12010001,
296         0x2408, 0x00030000, 0x000c007f,
297         0x8a14, 0xf000003f, 0x00000007,
298         0x8b24, 0x3fff3fff, 0x00ff0fff,
299         0x8b10, 0x0000ff0f, 0x00000000,
300         0x28a4c, 0x07ffffff, 0x06000000,
301         0x4d8, 0x00000fff, 0x00000100,
302         0xa008, 0xffffffff, 0x00010000,
303         0x913c, 0xffff03ff, 0x01000100,
304         0x8c00, 0x000000ff, 0x00000003,
305         0x8c04, 0xf8ff00ff, 0x40600060,
306         0x8cf0, 0x1fff1fff, 0x08e00410,
307         0x28350, 0x00000f01, 0x00000000,
308         0x9508, 0xf700071f, 0x00000002,
309         0x960c, 0xffffffff, 0x54763210,
310         0x20ef8, 0x01ff01ff, 0x00000002,
311         0x20e98, 0xfffffbff, 0x00200000,
312         0x2015c, 0xffffffff, 0x00000f40,
313         0x88c4, 0x001f3ae3, 0x00000082,
314         0x8978, 0x3fffffff, 0x04050140,
315         0x88d4, 0x0000001f, 0x00000010,
316         0x8974, 0xffffffff, 0x00000000
317 };
318
319 static const u32 scrapper_golden_registers[] =
320 {
321         0x690, 0x3fff3fff, 0x20c00033,
322         0x918c, 0x0fff0fff, 0x00010006,
323         0x918c, 0x0fff0fff, 0x00010006,
324         0x91a8, 0x0fff0fff, 0x00010006,
325         0x91a8, 0x0fff0fff, 0x00010006,
326         0x9150, 0xffffdfff, 0x6e944040,
327         0x9150, 0xffffdfff, 0x6e944040,
328         0x917c, 0x0fff0fff, 0x00030002,
329         0x917c, 0x0fff0fff, 0x00030002,
330         0x9198, 0x0fff0fff, 0x00030002,
331         0x9198, 0x0fff0fff, 0x00030002,
332         0x915c, 0x0fff0fff, 0x00010000,
333         0x915c, 0x0fff0fff, 0x00010000,
334         0x3f90, 0xffff0001, 0xff000000,
335         0x3f90, 0xffff0001, 0xff000000,
336         0x9178, 0x0fff0fff, 0x00070000,
337         0x9178, 0x0fff0fff, 0x00070000,
338         0x9194, 0x0fff0fff, 0x00070000,
339         0x9194, 0x0fff0fff, 0x00070000,
340         0x9148, 0xffff0001, 0xff000000,
341         0x9148, 0xffff0001, 0xff000000,
342         0x9190, 0x0fff0fff, 0x00090008,
343         0x9190, 0x0fff0fff, 0x00090008,
344         0x91ac, 0x0fff0fff, 0x00090008,
345         0x91ac, 0x0fff0fff, 0x00090008,
346         0x3f94, 0xffff0000, 0xff000000,
347         0x3f94, 0xffff0000, 0xff000000,
348         0x914c, 0xffff0000, 0xff000000,
349         0x914c, 0xffff0000, 0xff000000,
350         0x929c, 0x00000fff, 0x00000001,
351         0x929c, 0x00000fff, 0x00000001,
352         0x55e4, 0xff607fff, 0xfc000100,
353         0x8a18, 0xff000fff, 0x00000100,
354         0x8a18, 0xff000fff, 0x00000100,
355         0x8b28, 0xff000fff, 0x00000100,
356         0x8b28, 0xff000fff, 0x00000100,
357         0x9144, 0xfffc0fff, 0x00000100,
358         0x9144, 0xfffc0fff, 0x00000100,
359         0x6ed8, 0x00010101, 0x00010000,
360         0x9830, 0xffffffff, 0x00000000,
361         0x9830, 0xffffffff, 0x00000000,
362         0x9834, 0xf00fffff, 0x00000400,
363         0x9834, 0xf00fffff, 0x00000400,
364         0x9838, 0xfffffffe, 0x00000000,
365         0x9838, 0xfffffffe, 0x00000000,
366         0xd0c0, 0xff000fff, 0x00000100,
367         0xd02c, 0xbfffff1f, 0x08421000,
368         0xd02c, 0xbfffff1f, 0x08421000,
369         0xd0b8, 0x73773777, 0x12010001,
370         0xd0b8, 0x73773777, 0x12010001,
371         0x5bb0, 0x000000f0, 0x00000070,
372         0x98f8, 0x73773777, 0x12010001,
373         0x98f8, 0x73773777, 0x12010001,
374         0x98fc, 0xffffffff, 0x00000010,
375         0x98fc, 0xffffffff, 0x00000010,
376         0x9b7c, 0x00ff0000, 0x00fc0000,
377         0x9b7c, 0x00ff0000, 0x00fc0000,
378         0x8030, 0x00001f0f, 0x0000100a,
379         0x8030, 0x00001f0f, 0x0000100a,
380         0x2f48, 0x73773777, 0x12010001,
381         0x2f48, 0x73773777, 0x12010001,
382         0x2408, 0x00030000, 0x000c007f,
383         0x8a14, 0xf000003f, 0x00000007,
384         0x8a14, 0xf000003f, 0x00000007,
385         0x8b24, 0x3fff3fff, 0x00ff0fff,
386         0x8b24, 0x3fff3fff, 0x00ff0fff,
387         0x8b10, 0x0000ff0f, 0x00000000,
388         0x8b10, 0x0000ff0f, 0x00000000,
389         0x28a4c, 0x07ffffff, 0x06000000,
390         0x28a4c, 0x07ffffff, 0x06000000,
391         0x4d8, 0x00000fff, 0x00000100,
392         0x4d8, 0x00000fff, 0x00000100,
393         0xa008, 0xffffffff, 0x00010000,
394         0xa008, 0xffffffff, 0x00010000,
395         0x913c, 0xffff03ff, 0x01000100,
396         0x913c, 0xffff03ff, 0x01000100,
397         0x90e8, 0x001fffff, 0x010400c0,
398         0x8c00, 0x000000ff, 0x00000003,
399         0x8c00, 0x000000ff, 0x00000003,
400         0x8c04, 0xf8ff00ff, 0x40600060,
401         0x8c04, 0xf8ff00ff, 0x40600060,
402         0x8c30, 0x0000000f, 0x00040005,
403         0x8cf0, 0x1fff1fff, 0x08e00410,
404         0x8cf0, 0x1fff1fff, 0x08e00410,
405         0x900c, 0x00ffffff, 0x0017071f,
406         0x28350, 0x00000f01, 0x00000000,
407         0x28350, 0x00000f01, 0x00000000,
408         0x9508, 0xf700071f, 0x00000002,
409         0x9508, 0xf700071f, 0x00000002,
410         0x9688, 0x00300000, 0x0017000f,
411         0x960c, 0xffffffff, 0x54763210,
412         0x960c, 0xffffffff, 0x54763210,
413         0x20ef8, 0x01ff01ff, 0x00000002,
414         0x20e98, 0xfffffbff, 0x00200000,
415         0x2015c, 0xffffffff, 0x00000f40,
416         0x88c4, 0x001f3ae3, 0x00000082,
417         0x88c4, 0x001f3ae3, 0x00000082,
418         0x8978, 0x3fffffff, 0x04050140,
419         0x8978, 0x3fffffff, 0x04050140,
420         0x88d4, 0x0000001f, 0x00000010,
421         0x88d4, 0x0000001f, 0x00000010,
422         0x8974, 0xffffffff, 0x00000000,
423         0x8974, 0xffffffff, 0x00000000
424 };
425
426 static void ni_init_golden_registers(struct radeon_device *rdev)
427 {
428         switch (rdev->family) {
429         case CHIP_CAYMAN:
430                 radeon_program_register_sequence(rdev,
431                                                  cayman_golden_registers,
432                                                  (const u32)ARRAY_SIZE(cayman_golden_registers));
433                 radeon_program_register_sequence(rdev,
434                                                  cayman_golden_registers2,
435                                                  (const u32)ARRAY_SIZE(cayman_golden_registers2));
436                 break;
437         case CHIP_ARUBA:
438                 if ((rdev->pdev->device == 0x9900) ||
439                     (rdev->pdev->device == 0x9901) ||
440                     (rdev->pdev->device == 0x9903) ||
441                     (rdev->pdev->device == 0x9904) ||
442                     (rdev->pdev->device == 0x9905) ||
443                     (rdev->pdev->device == 0x9906) ||
444                     (rdev->pdev->device == 0x9907) ||
445                     (rdev->pdev->device == 0x9908) ||
446                     (rdev->pdev->device == 0x9909) ||
447                     (rdev->pdev->device == 0x990A) ||
448                     (rdev->pdev->device == 0x990B) ||
449                     (rdev->pdev->device == 0x990C) ||
450                     (rdev->pdev->device == 0x990D) ||
451                     (rdev->pdev->device == 0x990E) ||
452                     (rdev->pdev->device == 0x990F) ||
453                     (rdev->pdev->device == 0x9910) ||
454                     (rdev->pdev->device == 0x9913) ||
455                     (rdev->pdev->device == 0x9917) ||
456                     (rdev->pdev->device == 0x9918)) {
457                         radeon_program_register_sequence(rdev,
458                                                          dvst_golden_registers,
459                                                          (const u32)ARRAY_SIZE(dvst_golden_registers));
460                         radeon_program_register_sequence(rdev,
461                                                          dvst_golden_registers2,
462                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
463                 } else {
464                         radeon_program_register_sequence(rdev,
465                                                          scrapper_golden_registers,
466                                                          (const u32)ARRAY_SIZE(scrapper_golden_registers));
467                         radeon_program_register_sequence(rdev,
468                                                          dvst_golden_registers2,
469                                                          (const u32)ARRAY_SIZE(dvst_golden_registers2));
470                 }
471                 break;
472         default:
473                 break;
474         }
475 }
476
477 #define BTC_IO_MC_REGS_SIZE 29
478
479 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
480         {0x00000077, 0xff010100},
481         {0x00000078, 0x00000000},
482         {0x00000079, 0x00001434},
483         {0x0000007a, 0xcc08ec08},
484         {0x0000007b, 0x00040000},
485         {0x0000007c, 0x000080c0},
486         {0x0000007d, 0x09000000},
487         {0x0000007e, 0x00210404},
488         {0x00000081, 0x08a8e800},
489         {0x00000082, 0x00030444},
490         {0x00000083, 0x00000000},
491         {0x00000085, 0x00000001},
492         {0x00000086, 0x00000002},
493         {0x00000087, 0x48490000},
494         {0x00000088, 0x20244647},
495         {0x00000089, 0x00000005},
496         {0x0000008b, 0x66030000},
497         {0x0000008c, 0x00006603},
498         {0x0000008d, 0x00000100},
499         {0x0000008f, 0x00001c0a},
500         {0x00000090, 0xff000001},
501         {0x00000094, 0x00101101},
502         {0x00000095, 0x00000fff},
503         {0x00000096, 0x00116fff},
504         {0x00000097, 0x60010000},
505         {0x00000098, 0x10010000},
506         {0x00000099, 0x00006000},
507         {0x0000009a, 0x00001000},
508         {0x0000009f, 0x00946a00}
509 };
510
511 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
512         {0x00000077, 0xff010100},
513         {0x00000078, 0x00000000},
514         {0x00000079, 0x00001434},
515         {0x0000007a, 0xcc08ec08},
516         {0x0000007b, 0x00040000},
517         {0x0000007c, 0x000080c0},
518         {0x0000007d, 0x09000000},
519         {0x0000007e, 0x00210404},
520         {0x00000081, 0x08a8e800},
521         {0x00000082, 0x00030444},
522         {0x00000083, 0x00000000},
523         {0x00000085, 0x00000001},
524         {0x00000086, 0x00000002},
525         {0x00000087, 0x48490000},
526         {0x00000088, 0x20244647},
527         {0x00000089, 0x00000005},
528         {0x0000008b, 0x66030000},
529         {0x0000008c, 0x00006603},
530         {0x0000008d, 0x00000100},
531         {0x0000008f, 0x00001c0a},
532         {0x00000090, 0xff000001},
533         {0x00000094, 0x00101101},
534         {0x00000095, 0x00000fff},
535         {0x00000096, 0x00116fff},
536         {0x00000097, 0x60010000},
537         {0x00000098, 0x10010000},
538         {0x00000099, 0x00006000},
539         {0x0000009a, 0x00001000},
540         {0x0000009f, 0x00936a00}
541 };
542
543 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
544         {0x00000077, 0xff010100},
545         {0x00000078, 0x00000000},
546         {0x00000079, 0x00001434},
547         {0x0000007a, 0xcc08ec08},
548         {0x0000007b, 0x00040000},
549         {0x0000007c, 0x000080c0},
550         {0x0000007d, 0x09000000},
551         {0x0000007e, 0x00210404},
552         {0x00000081, 0x08a8e800},
553         {0x00000082, 0x00030444},
554         {0x00000083, 0x00000000},
555         {0x00000085, 0x00000001},
556         {0x00000086, 0x00000002},
557         {0x00000087, 0x48490000},
558         {0x00000088, 0x20244647},
559         {0x00000089, 0x00000005},
560         {0x0000008b, 0x66030000},
561         {0x0000008c, 0x00006603},
562         {0x0000008d, 0x00000100},
563         {0x0000008f, 0x00001c0a},
564         {0x00000090, 0xff000001},
565         {0x00000094, 0x00101101},
566         {0x00000095, 0x00000fff},
567         {0x00000096, 0x00116fff},
568         {0x00000097, 0x60010000},
569         {0x00000098, 0x10010000},
570         {0x00000099, 0x00006000},
571         {0x0000009a, 0x00001000},
572         {0x0000009f, 0x00916a00}
573 };
574
575 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
576         {0x00000077, 0xff010100},
577         {0x00000078, 0x00000000},
578         {0x00000079, 0x00001434},
579         {0x0000007a, 0xcc08ec08},
580         {0x0000007b, 0x00040000},
581         {0x0000007c, 0x000080c0},
582         {0x0000007d, 0x09000000},
583         {0x0000007e, 0x00210404},
584         {0x00000081, 0x08a8e800},
585         {0x00000082, 0x00030444},
586         {0x00000083, 0x00000000},
587         {0x00000085, 0x00000001},
588         {0x00000086, 0x00000002},
589         {0x00000087, 0x48490000},
590         {0x00000088, 0x20244647},
591         {0x00000089, 0x00000005},
592         {0x0000008b, 0x66030000},
593         {0x0000008c, 0x00006603},
594         {0x0000008d, 0x00000100},
595         {0x0000008f, 0x00001c0a},
596         {0x00000090, 0xff000001},
597         {0x00000094, 0x00101101},
598         {0x00000095, 0x00000fff},
599         {0x00000096, 0x00116fff},
600         {0x00000097, 0x60010000},
601         {0x00000098, 0x10010000},
602         {0x00000099, 0x00006000},
603         {0x0000009a, 0x00001000},
604         {0x0000009f, 0x00976b00}
605 };
606
607 int ni_mc_load_microcode(struct radeon_device *rdev)
608 {
609         const __be32 *fw_data;
610         u32 mem_type, running, blackout = 0;
611         u32 *io_mc_regs;
612         int i, ucode_size, regs_size;
613
614         if (!rdev->mc_fw)
615                 return -EINVAL;
616
617         switch (rdev->family) {
618         case CHIP_BARTS:
619                 io_mc_regs = (u32 *)&barts_io_mc_regs;
620                 ucode_size = BTC_MC_UCODE_SIZE;
621                 regs_size = BTC_IO_MC_REGS_SIZE;
622                 break;
623         case CHIP_TURKS:
624                 io_mc_regs = (u32 *)&turks_io_mc_regs;
625                 ucode_size = BTC_MC_UCODE_SIZE;
626                 regs_size = BTC_IO_MC_REGS_SIZE;
627                 break;
628         case CHIP_CAICOS:
629         default:
630                 io_mc_regs = (u32 *)&caicos_io_mc_regs;
631                 ucode_size = BTC_MC_UCODE_SIZE;
632                 regs_size = BTC_IO_MC_REGS_SIZE;
633                 break;
634         case CHIP_CAYMAN:
635                 io_mc_regs = (u32 *)&cayman_io_mc_regs;
636                 ucode_size = CAYMAN_MC_UCODE_SIZE;
637                 regs_size = BTC_IO_MC_REGS_SIZE;
638                 break;
639         }
640
641         mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
642         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
643
644         if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
645                 if (running) {
646                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
647                         WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
648                 }
649
650                 /* reset the engine and set to writable */
651                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
652                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
653
654                 /* load mc io regs */
655                 for (i = 0; i < regs_size; i++) {
656                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
657                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
658                 }
659                 /* load the MC ucode */
660                 fw_data = (const __be32 *)rdev->mc_fw->data;
661                 for (i = 0; i < ucode_size; i++)
662                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
663
664                 /* put the engine back into the active state */
665                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
666                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
667                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
668
669                 /* wait for training to complete */
670                 for (i = 0; i < rdev->usec_timeout; i++) {
671                         if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
672                                 break;
673                         udelay(1);
674                 }
675
676                 if (running)
677                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
678         }
679
680         return 0;
681 }
682
683 int ni_init_microcode(struct radeon_device *rdev)
684 {
685         const char *chip_name;
686         const char *rlc_chip_name;
687         size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
688         size_t smc_req_size = 0;
689         char fw_name[30];
690         int err;
691
692         DRM_DEBUG("\n");
693
694         switch (rdev->family) {
695         case CHIP_BARTS:
696                 chip_name = "BARTS";
697                 rlc_chip_name = "BTC";
698                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
699                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
700                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
701                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
702                 smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
703                 break;
704         case CHIP_TURKS:
705                 chip_name = "TURKS";
706                 rlc_chip_name = "BTC";
707                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
708                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
709                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
710                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
711                 smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
712                 break;
713         case CHIP_CAICOS:
714                 chip_name = "CAICOS";
715                 rlc_chip_name = "BTC";
716                 pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
717                 me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
718                 rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
719                 mc_req_size = BTC_MC_UCODE_SIZE * 4;
720                 smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
721                 break;
722         case CHIP_CAYMAN:
723                 chip_name = "CAYMAN";
724                 rlc_chip_name = "CAYMAN";
725                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
726                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
727                 rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
728                 mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
729                 smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
730                 break;
731         case CHIP_ARUBA:
732                 chip_name = "ARUBA";
733                 rlc_chip_name = "ARUBA";
734                 /* pfp/me same size as CAYMAN */
735                 pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
736                 me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
737                 rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
738                 mc_req_size = 0;
739                 break;
740         default: BUG();
741         }
742
743         DRM_INFO("Loading %s Microcode\n", chip_name);
744
745         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
746         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
747         if (err)
748                 goto out;
749         if (rdev->pfp_fw->size != pfp_req_size) {
750                 printk(KERN_ERR
751                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
752                        rdev->pfp_fw->size, fw_name);
753                 err = -EINVAL;
754                 goto out;
755         }
756
757         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
758         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
759         if (err)
760                 goto out;
761         if (rdev->me_fw->size != me_req_size) {
762                 printk(KERN_ERR
763                        "ni_cp: Bogus length %zu in firmware \"%s\"\n",
764                        rdev->me_fw->size, fw_name);
765                 err = -EINVAL;
766         }
767
768         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
769         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
770         if (err)
771                 goto out;
772         if (rdev->rlc_fw->size != rlc_req_size) {
773                 printk(KERN_ERR
774                        "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
775                        rdev->rlc_fw->size, fw_name);
776                 err = -EINVAL;
777         }
778
779         /* no MC ucode on TN */
780         if (!(rdev->flags & RADEON_IS_IGP)) {
781                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
782                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
783                 if (err)
784                         goto out;
785                 if (rdev->mc_fw->size != mc_req_size) {
786                         printk(KERN_ERR
787                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
788                                rdev->mc_fw->size, fw_name);
789                         err = -EINVAL;
790                 }
791         }
792
793         if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
794                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
795                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
796                 if (err) {
797                         printk(KERN_ERR
798                                "smc: error loading firmware \"%s\"\n",
799                                fw_name);
800                         release_firmware(rdev->smc_fw);
801                         rdev->smc_fw = NULL;
802                 } else if (rdev->smc_fw->size != smc_req_size) {
803                         printk(KERN_ERR
804                                "ni_mc: Bogus length %zu in firmware \"%s\"\n",
805                                rdev->mc_fw->size, fw_name);
806                         err = -EINVAL;
807                 }
808         }
809
810 out:
811         if (err) {
812                 if (err != -EINVAL)
813                         printk(KERN_ERR
814                                "ni_cp: Failed to load firmware \"%s\"\n",
815                                fw_name);
816                 release_firmware(rdev->pfp_fw);
817                 rdev->pfp_fw = NULL;
818                 release_firmware(rdev->me_fw);
819                 rdev->me_fw = NULL;
820                 release_firmware(rdev->rlc_fw);
821                 rdev->rlc_fw = NULL;
822                 release_firmware(rdev->mc_fw);
823                 rdev->mc_fw = NULL;
824         }
825         return err;
826 }
827
828 int tn_get_temp(struct radeon_device *rdev)
829 {
830         u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
831         int actual_temp = (temp / 8) - 49;
832
833         return actual_temp * 1000;
834 }
835
836 /*
837  * Core functions
838  */
839 static void cayman_gpu_init(struct radeon_device *rdev)
840 {
841         u32 gb_addr_config = 0;
842         u32 mc_shared_chmap, mc_arb_ramcfg;
843         u32 cgts_tcc_disable;
844         u32 sx_debug_1;
845         u32 smx_dc_ctl0;
846         u32 cgts_sm_ctrl_reg;
847         u32 hdp_host_path_cntl;
848         u32 tmp;
849         u32 disabled_rb_mask;
850         int i, j;
851
852         switch (rdev->family) {
853         case CHIP_CAYMAN:
854                 rdev->config.cayman.max_shader_engines = 2;
855                 rdev->config.cayman.max_pipes_per_simd = 4;
856                 rdev->config.cayman.max_tile_pipes = 8;
857                 rdev->config.cayman.max_simds_per_se = 12;
858                 rdev->config.cayman.max_backends_per_se = 4;
859                 rdev->config.cayman.max_texture_channel_caches = 8;
860                 rdev->config.cayman.max_gprs = 256;
861                 rdev->config.cayman.max_threads = 256;
862                 rdev->config.cayman.max_gs_threads = 32;
863                 rdev->config.cayman.max_stack_entries = 512;
864                 rdev->config.cayman.sx_num_of_sets = 8;
865                 rdev->config.cayman.sx_max_export_size = 256;
866                 rdev->config.cayman.sx_max_export_pos_size = 64;
867                 rdev->config.cayman.sx_max_export_smx_size = 192;
868                 rdev->config.cayman.max_hw_contexts = 8;
869                 rdev->config.cayman.sq_num_cf_insts = 2;
870
871                 rdev->config.cayman.sc_prim_fifo_size = 0x100;
872                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
873                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
874                 gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
875                 break;
876         case CHIP_ARUBA:
877         default:
878                 rdev->config.cayman.max_shader_engines = 1;
879                 rdev->config.cayman.max_pipes_per_simd = 4;
880                 rdev->config.cayman.max_tile_pipes = 2;
881                 if ((rdev->pdev->device == 0x9900) ||
882                     (rdev->pdev->device == 0x9901) ||
883                     (rdev->pdev->device == 0x9905) ||
884                     (rdev->pdev->device == 0x9906) ||
885                     (rdev->pdev->device == 0x9907) ||
886                     (rdev->pdev->device == 0x9908) ||
887                     (rdev->pdev->device == 0x9909) ||
888                     (rdev->pdev->device == 0x990B) ||
889                     (rdev->pdev->device == 0x990C) ||
890                     (rdev->pdev->device == 0x990F) ||
891                     (rdev->pdev->device == 0x9910) ||
892                     (rdev->pdev->device == 0x9917) ||
893                     (rdev->pdev->device == 0x9999) ||
894                     (rdev->pdev->device == 0x999C)) {
895                         rdev->config.cayman.max_simds_per_se = 6;
896                         rdev->config.cayman.max_backends_per_se = 2;
897                 } else if ((rdev->pdev->device == 0x9903) ||
898                            (rdev->pdev->device == 0x9904) ||
899                            (rdev->pdev->device == 0x990A) ||
900                            (rdev->pdev->device == 0x990D) ||
901                            (rdev->pdev->device == 0x990E) ||
902                            (rdev->pdev->device == 0x9913) ||
903                            (rdev->pdev->device == 0x9918) ||
904                            (rdev->pdev->device == 0x999D)) {
905                         rdev->config.cayman.max_simds_per_se = 4;
906                         rdev->config.cayman.max_backends_per_se = 2;
907                 } else if ((rdev->pdev->device == 0x9919) ||
908                            (rdev->pdev->device == 0x9990) ||
909                            (rdev->pdev->device == 0x9991) ||
910                            (rdev->pdev->device == 0x9994) ||
911                            (rdev->pdev->device == 0x9995) ||
912                            (rdev->pdev->device == 0x9996) ||
913                            (rdev->pdev->device == 0x999A) ||
914                            (rdev->pdev->device == 0x99A0)) {
915                         rdev->config.cayman.max_simds_per_se = 3;
916                         rdev->config.cayman.max_backends_per_se = 1;
917                 } else {
918                         rdev->config.cayman.max_simds_per_se = 2;
919                         rdev->config.cayman.max_backends_per_se = 1;
920                 }
921                 rdev->config.cayman.max_texture_channel_caches = 2;
922                 rdev->config.cayman.max_gprs = 256;
923                 rdev->config.cayman.max_threads = 256;
924                 rdev->config.cayman.max_gs_threads = 32;
925                 rdev->config.cayman.max_stack_entries = 512;
926                 rdev->config.cayman.sx_num_of_sets = 8;
927                 rdev->config.cayman.sx_max_export_size = 256;
928                 rdev->config.cayman.sx_max_export_pos_size = 64;
929                 rdev->config.cayman.sx_max_export_smx_size = 192;
930                 rdev->config.cayman.max_hw_contexts = 8;
931                 rdev->config.cayman.sq_num_cf_insts = 2;
932
933                 rdev->config.cayman.sc_prim_fifo_size = 0x40;
934                 rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
935                 rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
936                 gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
937                 break;
938         }
939
940         /* Initialize HDP */
941         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
942                 WREG32((0x2c14 + j), 0x00000000);
943                 WREG32((0x2c18 + j), 0x00000000);
944                 WREG32((0x2c1c + j), 0x00000000);
945                 WREG32((0x2c20 + j), 0x00000000);
946                 WREG32((0x2c24 + j), 0x00000000);
947         }
948
949         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
950
951         evergreen_fix_pci_max_read_req_size(rdev);
952
953         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
954         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
955
956         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
957         rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
958         if (rdev->config.cayman.mem_row_size_in_kb > 4)
959                 rdev->config.cayman.mem_row_size_in_kb = 4;
960         /* XXX use MC settings? */
961         rdev->config.cayman.shader_engine_tile_size = 32;
962         rdev->config.cayman.num_gpus = 1;
963         rdev->config.cayman.multi_gpu_tile_size = 64;
964
965         tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
966         rdev->config.cayman.num_tile_pipes = (1 << tmp);
967         tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
968         rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
969         tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
970         rdev->config.cayman.num_shader_engines = tmp + 1;
971         tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
972         rdev->config.cayman.num_gpus = tmp + 1;
973         tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
974         rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
975         tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
976         rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
977
978
979         /* setup tiling info dword.  gb_addr_config is not adequate since it does
980          * not have bank info, so create a custom tiling dword.
981          * bits 3:0   num_pipes
982          * bits 7:4   num_banks
983          * bits 11:8  group_size
984          * bits 15:12 row_size
985          */
986         rdev->config.cayman.tile_config = 0;
987         switch (rdev->config.cayman.num_tile_pipes) {
988         case 1:
989         default:
990                 rdev->config.cayman.tile_config |= (0 << 0);
991                 break;
992         case 2:
993                 rdev->config.cayman.tile_config |= (1 << 0);
994                 break;
995         case 4:
996                 rdev->config.cayman.tile_config |= (2 << 0);
997                 break;
998         case 8:
999                 rdev->config.cayman.tile_config |= (3 << 0);
1000                 break;
1001         }
1002
1003         /* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1004         if (rdev->flags & RADEON_IS_IGP)
1005                 rdev->config.cayman.tile_config |= 1 << 4;
1006         else {
1007                 switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1008                 case 0: /* four banks */
1009                         rdev->config.cayman.tile_config |= 0 << 4;
1010                         break;
1011                 case 1: /* eight banks */
1012                         rdev->config.cayman.tile_config |= 1 << 4;
1013                         break;
1014                 case 2: /* sixteen banks */
1015                 default:
1016                         rdev->config.cayman.tile_config |= 2 << 4;
1017                         break;
1018                 }
1019         }
1020         rdev->config.cayman.tile_config |=
1021                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1022         rdev->config.cayman.tile_config |=
1023                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1024
1025         tmp = 0;
1026         for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1027                 u32 rb_disable_bitmap;
1028
1029                 WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1030                 WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1031                 rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1032                 tmp <<= 4;
1033                 tmp |= rb_disable_bitmap;
1034         }
1035         /* enabled rb are just the one not disabled :) */
1036         disabled_rb_mask = tmp;
1037         tmp = 0;
1038         for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1039                 tmp |= (1 << i);
1040         /* if all the backends are disabled, fix it up here */
1041         if ((disabled_rb_mask & tmp) == tmp) {
1042                 for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1043                         disabled_rb_mask &= ~(1 << i);
1044         }
1045
1046         WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1047         WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1048
1049         WREG32(GB_ADDR_CONFIG, gb_addr_config);
1050         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1051         if (ASIC_IS_DCE6(rdev))
1052                 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1053         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1054         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1055         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1056         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1057         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1058         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1059
1060         if ((rdev->config.cayman.max_backends_per_se == 1) &&
1061             (rdev->flags & RADEON_IS_IGP)) {
1062                 if ((disabled_rb_mask & 3) == 1) {
1063                         /* RB0 disabled, RB1 enabled */
1064                         tmp = 0x11111111;
1065                 } else {
1066                         /* RB1 disabled, RB0 enabled */
1067                         tmp = 0x00000000;
1068                 }
1069         } else {
1070                 tmp = gb_addr_config & NUM_PIPES_MASK;
1071                 tmp = r6xx_remap_render_backend(rdev, tmp,
1072                                                 rdev->config.cayman.max_backends_per_se *
1073                                                 rdev->config.cayman.max_shader_engines,
1074                                                 CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1075         }
1076         WREG32(GB_BACKEND_MAP, tmp);
1077
1078         cgts_tcc_disable = 0xffff0000;
1079         for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1080                 cgts_tcc_disable &= ~(1 << (16 + i));
1081         WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1082         WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1083         WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1084         WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1085
1086         /* reprogram the shader complex */
1087         cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1088         for (i = 0; i < 16; i++)
1089                 WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1090         WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1091
1092         /* set HW defaults for 3D engine */
1093         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1094
1095         sx_debug_1 = RREG32(SX_DEBUG_1);
1096         sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1097         WREG32(SX_DEBUG_1, sx_debug_1);
1098
1099         smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1100         smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1101         smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1102         WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1103
1104         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1105
1106         /* need to be explicitly zero-ed */
1107         WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1108         WREG32(SQ_LSTMP_RING_BASE, 0);
1109         WREG32(SQ_HSTMP_RING_BASE, 0);
1110         WREG32(SQ_ESTMP_RING_BASE, 0);
1111         WREG32(SQ_GSTMP_RING_BASE, 0);
1112         WREG32(SQ_VSTMP_RING_BASE, 0);
1113         WREG32(SQ_PSTMP_RING_BASE, 0);
1114
1115         WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1116
1117         WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1118                                         POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1119                                         SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1120
1121         WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1122                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1123                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1124
1125
1126         WREG32(VGT_NUM_INSTANCES, 1);
1127
1128         WREG32(CP_PERFMON_CNTL, 0);
1129
1130         WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1131                                   FETCH_FIFO_HIWATER(0x4) |
1132                                   DONE_FIFO_HIWATER(0xe0) |
1133                                   ALU_UPDATE_FIFO_HIWATER(0x8)));
1134
1135         WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1136         WREG32(SQ_CONFIG, (VC_ENABLE |
1137                            EXPORT_SRC_C |
1138                            GFX_PRIO(0) |
1139                            CS1_PRIO(0) |
1140                            CS2_PRIO(1)));
1141         WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1142
1143         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1144                                           FORCE_EOV_MAX_REZ_CNT(255)));
1145
1146         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1147                AUTO_INVLD_EN(ES_AND_GS_AUTO));
1148
1149         WREG32(VGT_GS_VERTEX_REUSE, 16);
1150         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1151
1152         WREG32(CB_PERF_CTR0_SEL_0, 0);
1153         WREG32(CB_PERF_CTR0_SEL_1, 0);
1154         WREG32(CB_PERF_CTR1_SEL_0, 0);
1155         WREG32(CB_PERF_CTR1_SEL_1, 0);
1156         WREG32(CB_PERF_CTR2_SEL_0, 0);
1157         WREG32(CB_PERF_CTR2_SEL_1, 0);
1158         WREG32(CB_PERF_CTR3_SEL_0, 0);
1159         WREG32(CB_PERF_CTR3_SEL_1, 0);
1160
1161         tmp = RREG32(HDP_MISC_CNTL);
1162         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1163         WREG32(HDP_MISC_CNTL, tmp);
1164
1165         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1166         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1167
1168         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1169
1170         udelay(50);
1171
1172         /* set clockgating golden values on TN */
1173         if (rdev->family == CHIP_ARUBA) {
1174                 tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1175                 tmp &= ~0x00380000;
1176                 WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1177                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1178                 tmp &= ~0x0e000000;
1179                 WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1180         }
1181 }
1182
1183 /*
1184  * GART
1185  */
1186 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1187 {
1188         /* flush hdp cache */
1189         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1190
1191         /* bits 0-7 are the VM contexts0-7 */
1192         WREG32(VM_INVALIDATE_REQUEST, 1);
1193 }
1194
1195 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1196 {
1197         int i, r;
1198
1199         if (rdev->gart.robj == NULL) {
1200                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1201                 return -EINVAL;
1202         }
1203         r = radeon_gart_table_vram_pin(rdev);
1204         if (r)
1205                 return r;
1206         radeon_gart_restore(rdev);
1207         /* Setup TLB control */
1208         WREG32(MC_VM_MX_L1_TLB_CNTL,
1209                (0xA << 7) |
1210                ENABLE_L1_TLB |
1211                ENABLE_L1_FRAGMENT_PROCESSING |
1212                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1213                ENABLE_ADVANCED_DRIVER_MODEL |
1214                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1215         /* Setup L2 cache */
1216         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1217                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1218                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1219                EFFECTIVE_L2_QUEUE_SIZE(7) |
1220                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1221         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1222         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1223                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1224         /* setup context0 */
1225         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1226         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1227         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1228         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1229                         (u32)(rdev->dummy_page.addr >> 12));
1230         WREG32(VM_CONTEXT0_CNTL2, 0);
1231         WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1232                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1233
1234         WREG32(0x15D4, 0);
1235         WREG32(0x15D8, 0);
1236         WREG32(0x15DC, 0);
1237
1238         /* empty context1-7 */
1239         /* Assign the pt base to something valid for now; the pts used for
1240          * the VMs are determined by the application and setup and assigned
1241          * on the fly in the vm part of radeon_gart.c
1242          */
1243         for (i = 1; i < 8; i++) {
1244                 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1245                 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1246                 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1247                         rdev->gart.table_addr >> 12);
1248         }
1249
1250         /* enable context1-7 */
1251         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1252                (u32)(rdev->dummy_page.addr >> 12));
1253         WREG32(VM_CONTEXT1_CNTL2, 4);
1254         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1255                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1256                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1257                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1258                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1259                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1260                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1261                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1262                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1263                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1264                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1265                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1266                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1267
1268         cayman_pcie_gart_tlb_flush(rdev);
1269         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1270                  (unsigned)(rdev->mc.gtt_size >> 20),
1271                  (unsigned long long)rdev->gart.table_addr);
1272         rdev->gart.ready = true;
1273         return 0;
1274 }
1275
1276 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1277 {
1278         /* Disable all tables */
1279         WREG32(VM_CONTEXT0_CNTL, 0);
1280         WREG32(VM_CONTEXT1_CNTL, 0);
1281         /* Setup TLB control */
1282         WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1283                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1284                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1285         /* Setup L2 cache */
1286         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1287                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1288                EFFECTIVE_L2_QUEUE_SIZE(7) |
1289                CONTEXT1_IDENTITY_ACCESS_MODE(1));
1290         WREG32(VM_L2_CNTL2, 0);
1291         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1292                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1293         radeon_gart_table_vram_unpin(rdev);
1294 }
1295
1296 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1297 {
1298         cayman_pcie_gart_disable(rdev);
1299         radeon_gart_table_vram_free(rdev);
1300         radeon_gart_fini(rdev);
1301 }
1302
1303 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1304                               int ring, u32 cp_int_cntl)
1305 {
1306         u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1307
1308         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1309         WREG32(CP_INT_CNTL, cp_int_cntl);
1310 }
1311
1312 /*
1313  * CP.
1314  */
1315 void cayman_fence_ring_emit(struct radeon_device *rdev,
1316                             struct radeon_fence *fence)
1317 {
1318         struct radeon_ring *ring = &rdev->ring[fence->ring];
1319         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1320
1321         /* flush read cache over gart for this vmid */
1322         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1323         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1324         radeon_ring_write(ring, 0);
1325         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1326         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1327         radeon_ring_write(ring, 0xFFFFFFFF);
1328         radeon_ring_write(ring, 0);
1329         radeon_ring_write(ring, 10); /* poll interval */
1330         /* EVENT_WRITE_EOP - flush caches, send int */
1331         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1332         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1333         radeon_ring_write(ring, addr & 0xffffffff);
1334         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1335         radeon_ring_write(ring, fence->seq);
1336         radeon_ring_write(ring, 0);
1337 }
1338
1339 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1340 {
1341         struct radeon_ring *ring = &rdev->ring[ib->ring];
1342
1343         /* set to DX10/11 mode */
1344         radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1345         radeon_ring_write(ring, 1);
1346
1347         if (ring->rptr_save_reg) {
1348                 uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1349                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1350                 radeon_ring_write(ring, ((ring->rptr_save_reg - 
1351                                           PACKET3_SET_CONFIG_REG_START) >> 2));
1352                 radeon_ring_write(ring, next_rptr);
1353         }
1354
1355         radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1356         radeon_ring_write(ring,
1357 #ifdef __BIG_ENDIAN
1358                           (2 << 0) |
1359 #endif
1360                           (ib->gpu_addr & 0xFFFFFFFC));
1361         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1362         radeon_ring_write(ring, ib->length_dw | 
1363                           (ib->vm ? (ib->vm->id << 24) : 0));
1364
1365         /* flush read cache over gart for this vmid */
1366         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1367         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1368         radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1369         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1370         radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1371         radeon_ring_write(ring, 0xFFFFFFFF);
1372         radeon_ring_write(ring, 0);
1373         radeon_ring_write(ring, 10); /* poll interval */
1374 }
1375
1376 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1377                                struct radeon_ring *ring,
1378                                struct radeon_semaphore *semaphore,
1379                                bool emit_wait)
1380 {
1381         uint64_t addr = semaphore->gpu_addr;
1382
1383         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1384         radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1385
1386         radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1387         radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1388
1389         radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1390         radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1391 }
1392
1393 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1394 {
1395         if (enable)
1396                 WREG32(CP_ME_CNTL, 0);
1397         else {
1398                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1399                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1400                 WREG32(SCRATCH_UMSK, 0);
1401                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1402         }
1403 }
1404
1405 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1406 {
1407         const __be32 *fw_data;
1408         int i;
1409
1410         if (!rdev->me_fw || !rdev->pfp_fw)
1411                 return -EINVAL;
1412
1413         cayman_cp_enable(rdev, false);
1414
1415         fw_data = (const __be32 *)rdev->pfp_fw->data;
1416         WREG32(CP_PFP_UCODE_ADDR, 0);
1417         for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1418                 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1419         WREG32(CP_PFP_UCODE_ADDR, 0);
1420
1421         fw_data = (const __be32 *)rdev->me_fw->data;
1422         WREG32(CP_ME_RAM_WADDR, 0);
1423         for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1424                 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1425
1426         WREG32(CP_PFP_UCODE_ADDR, 0);
1427         WREG32(CP_ME_RAM_WADDR, 0);
1428         WREG32(CP_ME_RAM_RADDR, 0);
1429         return 0;
1430 }
1431
1432 static int cayman_cp_start(struct radeon_device *rdev)
1433 {
1434         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1435         int r, i;
1436
1437         r = radeon_ring_lock(rdev, ring, 7);
1438         if (r) {
1439                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1440                 return r;
1441         }
1442         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1443         radeon_ring_write(ring, 0x1);
1444         radeon_ring_write(ring, 0x0);
1445         radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1446         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1447         radeon_ring_write(ring, 0);
1448         radeon_ring_write(ring, 0);
1449         radeon_ring_unlock_commit(rdev, ring);
1450
1451         cayman_cp_enable(rdev, true);
1452
1453         r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1454         if (r) {
1455                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1456                 return r;
1457         }
1458
1459         /* setup clear context state */
1460         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1461         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1462
1463         for (i = 0; i < cayman_default_size; i++)
1464                 radeon_ring_write(ring, cayman_default_state[i]);
1465
1466         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1467         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1468
1469         /* set clear context state */
1470         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1471         radeon_ring_write(ring, 0);
1472
1473         /* SQ_VTX_BASE_VTX_LOC */
1474         radeon_ring_write(ring, 0xc0026f00);
1475         radeon_ring_write(ring, 0x00000000);
1476         radeon_ring_write(ring, 0x00000000);
1477         radeon_ring_write(ring, 0x00000000);
1478
1479         /* Clear consts */
1480         radeon_ring_write(ring, 0xc0036f00);
1481         radeon_ring_write(ring, 0x00000bc4);
1482         radeon_ring_write(ring, 0xffffffff);
1483         radeon_ring_write(ring, 0xffffffff);
1484         radeon_ring_write(ring, 0xffffffff);
1485
1486         radeon_ring_write(ring, 0xc0026900);
1487         radeon_ring_write(ring, 0x00000316);
1488         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1489         radeon_ring_write(ring, 0x00000010); /*  */
1490
1491         radeon_ring_unlock_commit(rdev, ring);
1492
1493         /* XXX init other rings */
1494
1495         return 0;
1496 }
1497
1498 static void cayman_cp_fini(struct radeon_device *rdev)
1499 {
1500         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1501         cayman_cp_enable(rdev, false);
1502         radeon_ring_fini(rdev, ring);
1503         radeon_scratch_free(rdev, ring->rptr_save_reg);
1504 }
1505
1506 static int cayman_cp_resume(struct radeon_device *rdev)
1507 {
1508         static const int ridx[] = {
1509                 RADEON_RING_TYPE_GFX_INDEX,
1510                 CAYMAN_RING_TYPE_CP1_INDEX,
1511                 CAYMAN_RING_TYPE_CP2_INDEX
1512         };
1513         static const unsigned cp_rb_cntl[] = {
1514                 CP_RB0_CNTL,
1515                 CP_RB1_CNTL,
1516                 CP_RB2_CNTL,
1517         };
1518         static const unsigned cp_rb_rptr_addr[] = {
1519                 CP_RB0_RPTR_ADDR,
1520                 CP_RB1_RPTR_ADDR,
1521                 CP_RB2_RPTR_ADDR
1522         };
1523         static const unsigned cp_rb_rptr_addr_hi[] = {
1524                 CP_RB0_RPTR_ADDR_HI,
1525                 CP_RB1_RPTR_ADDR_HI,
1526                 CP_RB2_RPTR_ADDR_HI
1527         };
1528         static const unsigned cp_rb_base[] = {
1529                 CP_RB0_BASE,
1530                 CP_RB1_BASE,
1531                 CP_RB2_BASE
1532         };
1533         struct radeon_ring *ring;
1534         int i, r;
1535
1536         /* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1537         WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1538                                  SOFT_RESET_PA |
1539                                  SOFT_RESET_SH |
1540                                  SOFT_RESET_VGT |
1541                                  SOFT_RESET_SPI |
1542                                  SOFT_RESET_SX));
1543         RREG32(GRBM_SOFT_RESET);
1544         mdelay(15);
1545         WREG32(GRBM_SOFT_RESET, 0);
1546         RREG32(GRBM_SOFT_RESET);
1547
1548         WREG32(CP_SEM_WAIT_TIMER, 0x0);
1549         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1550
1551         /* Set the write pointer delay */
1552         WREG32(CP_RB_WPTR_DELAY, 0);
1553
1554         WREG32(CP_DEBUG, (1 << 27));
1555
1556         /* set the wb address whether it's enabled or not */
1557         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1558         WREG32(SCRATCH_UMSK, 0xff);
1559
1560         for (i = 0; i < 3; ++i) {
1561                 uint32_t rb_cntl;
1562                 uint64_t addr;
1563
1564                 /* Set ring buffer size */
1565                 ring = &rdev->ring[ridx[i]];
1566                 rb_cntl = drm_order(ring->ring_size / 8);
1567                 rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1568 #ifdef __BIG_ENDIAN
1569                 rb_cntl |= BUF_SWAP_32BIT;
1570 #endif
1571                 WREG32(cp_rb_cntl[i], rb_cntl);
1572
1573                 /* set the wb address whether it's enabled or not */
1574                 addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1575                 WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1576                 WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1577         }
1578
1579         /* set the rb base addr, this causes an internal reset of ALL rings */
1580         for (i = 0; i < 3; ++i) {
1581                 ring = &rdev->ring[ridx[i]];
1582                 WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1583         }
1584
1585         for (i = 0; i < 3; ++i) {
1586                 /* Initialize the ring buffer's read and write pointers */
1587                 ring = &rdev->ring[ridx[i]];
1588                 WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1589
1590                 ring->rptr = ring->wptr = 0;
1591                 WREG32(ring->rptr_reg, ring->rptr);
1592                 WREG32(ring->wptr_reg, ring->wptr);
1593
1594                 mdelay(1);
1595                 WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1596         }
1597
1598         /* start the rings */
1599         cayman_cp_start(rdev);
1600         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1601         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1602         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1603         /* this only test cp0 */
1604         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1605         if (r) {
1606                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1607                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1608                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1609                 return r;
1610         }
1611
1612         return 0;
1613 }
1614
1615 /*
1616  * DMA
1617  * Starting with R600, the GPU has an asynchronous
1618  * DMA engine.  The programming model is very similar
1619  * to the 3D engine (ring buffer, IBs, etc.), but the
1620  * DMA controller has it's own packet format that is
1621  * different form the PM4 format used by the 3D engine.
1622  * It supports copying data, writing embedded data,
1623  * solid fills, and a number of other things.  It also
1624  * has support for tiling/detiling of buffers.
1625  * Cayman and newer support two asynchronous DMA engines.
1626  */
1627 /**
1628  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1629  *
1630  * @rdev: radeon_device pointer
1631  * @ib: IB object to schedule
1632  *
1633  * Schedule an IB in the DMA ring (cayman-SI).
1634  */
1635 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1636                                 struct radeon_ib *ib)
1637 {
1638         struct radeon_ring *ring = &rdev->ring[ib->ring];
1639
1640         if (rdev->wb.enabled) {
1641                 u32 next_rptr = ring->wptr + 4;
1642                 while ((next_rptr & 7) != 5)
1643                         next_rptr++;
1644                 next_rptr += 3;
1645                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1646                 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1647                 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1648                 radeon_ring_write(ring, next_rptr);
1649         }
1650
1651         /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1652          * Pad as necessary with NOPs.
1653          */
1654         while ((ring->wptr & 7) != 5)
1655                 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1656         radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1657         radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1658         radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1659
1660 }
1661
1662 /**
1663  * cayman_dma_stop - stop the async dma engines
1664  *
1665  * @rdev: radeon_device pointer
1666  *
1667  * Stop the async dma engines (cayman-SI).
1668  */
1669 void cayman_dma_stop(struct radeon_device *rdev)
1670 {
1671         u32 rb_cntl;
1672
1673         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1674
1675         /* dma0 */
1676         rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1677         rb_cntl &= ~DMA_RB_ENABLE;
1678         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1679
1680         /* dma1 */
1681         rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1682         rb_cntl &= ~DMA_RB_ENABLE;
1683         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1684
1685         rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1686         rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1687 }
1688
1689 /**
1690  * cayman_dma_resume - setup and start the async dma engines
1691  *
1692  * @rdev: radeon_device pointer
1693  *
1694  * Set up the DMA ring buffers and enable them. (cayman-SI).
1695  * Returns 0 for success, error for failure.
1696  */
1697 int cayman_dma_resume(struct radeon_device *rdev)
1698 {
1699         struct radeon_ring *ring;
1700         u32 rb_cntl, dma_cntl, ib_cntl;
1701         u32 rb_bufsz;
1702         u32 reg_offset, wb_offset;
1703         int i, r;
1704
1705         /* Reset dma */
1706         WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1707         RREG32(SRBM_SOFT_RESET);
1708         udelay(50);
1709         WREG32(SRBM_SOFT_RESET, 0);
1710
1711         for (i = 0; i < 2; i++) {
1712                 if (i == 0) {
1713                         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1714                         reg_offset = DMA0_REGISTER_OFFSET;
1715                         wb_offset = R600_WB_DMA_RPTR_OFFSET;
1716                 } else {
1717                         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1718                         reg_offset = DMA1_REGISTER_OFFSET;
1719                         wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1720                 }
1721
1722                 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1723                 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1724
1725                 /* Set ring buffer size in dwords */
1726                 rb_bufsz = drm_order(ring->ring_size / 4);
1727                 rb_cntl = rb_bufsz << 1;
1728 #ifdef __BIG_ENDIAN
1729                 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1730 #endif
1731                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1732
1733                 /* Initialize the ring buffer's read and write pointers */
1734                 WREG32(DMA_RB_RPTR + reg_offset, 0);
1735                 WREG32(DMA_RB_WPTR + reg_offset, 0);
1736
1737                 /* set the wb address whether it's enabled or not */
1738                 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1739                        upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1740                 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1741                        ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1742
1743                 if (rdev->wb.enabled)
1744                         rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1745
1746                 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1747
1748                 /* enable DMA IBs */
1749                 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1750 #ifdef __BIG_ENDIAN
1751                 ib_cntl |= DMA_IB_SWAP_ENABLE;
1752 #endif
1753                 WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1754
1755                 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1756                 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1757                 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1758
1759                 ring->wptr = 0;
1760                 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1761
1762                 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1763
1764                 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1765
1766                 ring->ready = true;
1767
1768                 r = radeon_ring_test(rdev, ring->idx, ring);
1769                 if (r) {
1770                         ring->ready = false;
1771                         return r;
1772                 }
1773         }
1774
1775         radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1776
1777         return 0;
1778 }
1779
1780 /**
1781  * cayman_dma_fini - tear down the async dma engines
1782  *
1783  * @rdev: radeon_device pointer
1784  *
1785  * Stop the async dma engines and free the rings (cayman-SI).
1786  */
1787 void cayman_dma_fini(struct radeon_device *rdev)
1788 {
1789         cayman_dma_stop(rdev);
1790         radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1791         radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1792 }
1793
1794 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1795 {
1796         u32 reset_mask = 0;
1797         u32 tmp;
1798
1799         /* GRBM_STATUS */
1800         tmp = RREG32(GRBM_STATUS);
1801         if (tmp & (PA_BUSY | SC_BUSY |
1802                    SH_BUSY | SX_BUSY |
1803                    TA_BUSY | VGT_BUSY |
1804                    DB_BUSY | CB_BUSY |
1805                    GDS_BUSY | SPI_BUSY |
1806                    IA_BUSY | IA_BUSY_NO_DMA))
1807                 reset_mask |= RADEON_RESET_GFX;
1808
1809         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1810                    CP_BUSY | CP_COHERENCY_BUSY))
1811                 reset_mask |= RADEON_RESET_CP;
1812
1813         if (tmp & GRBM_EE_BUSY)
1814                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1815
1816         /* DMA_STATUS_REG 0 */
1817         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1818         if (!(tmp & DMA_IDLE))
1819                 reset_mask |= RADEON_RESET_DMA;
1820
1821         /* DMA_STATUS_REG 1 */
1822         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1823         if (!(tmp & DMA_IDLE))
1824                 reset_mask |= RADEON_RESET_DMA1;
1825
1826         /* SRBM_STATUS2 */
1827         tmp = RREG32(SRBM_STATUS2);
1828         if (tmp & DMA_BUSY)
1829                 reset_mask |= RADEON_RESET_DMA;
1830
1831         if (tmp & DMA1_BUSY)
1832                 reset_mask |= RADEON_RESET_DMA1;
1833
1834         /* SRBM_STATUS */
1835         tmp = RREG32(SRBM_STATUS);
1836         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1837                 reset_mask |= RADEON_RESET_RLC;
1838
1839         if (tmp & IH_BUSY)
1840                 reset_mask |= RADEON_RESET_IH;
1841
1842         if (tmp & SEM_BUSY)
1843                 reset_mask |= RADEON_RESET_SEM;
1844
1845         if (tmp & GRBM_RQ_PENDING)
1846                 reset_mask |= RADEON_RESET_GRBM;
1847
1848         if (tmp & VMC_BUSY)
1849                 reset_mask |= RADEON_RESET_VMC;
1850
1851         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1852                    MCC_BUSY | MCD_BUSY))
1853                 reset_mask |= RADEON_RESET_MC;
1854
1855         if (evergreen_is_display_hung(rdev))
1856                 reset_mask |= RADEON_RESET_DISPLAY;
1857
1858         /* VM_L2_STATUS */
1859         tmp = RREG32(VM_L2_STATUS);
1860         if (tmp & L2_BUSY)
1861                 reset_mask |= RADEON_RESET_VMC;
1862
1863         /* Skip MC reset as it's mostly likely not hung, just busy */
1864         if (reset_mask & RADEON_RESET_MC) {
1865                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1866                 reset_mask &= ~RADEON_RESET_MC;
1867         }
1868
1869         return reset_mask;
1870 }
1871
1872 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1873 {
1874         struct evergreen_mc_save save;
1875         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1876         u32 tmp;
1877
1878         if (reset_mask == 0)
1879                 return;
1880
1881         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1882
1883         evergreen_print_gpu_status_regs(rdev);
1884         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1885                  RREG32(0x14F8));
1886         dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1887                  RREG32(0x14D8));
1888         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1889                  RREG32(0x14FC));
1890         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1891                  RREG32(0x14DC));
1892
1893         /* Disable CP parsing/prefetching */
1894         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1895
1896         if (reset_mask & RADEON_RESET_DMA) {
1897                 /* dma0 */
1898                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1899                 tmp &= ~DMA_RB_ENABLE;
1900                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1901         }
1902
1903         if (reset_mask & RADEON_RESET_DMA1) {
1904                 /* dma1 */
1905                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1906                 tmp &= ~DMA_RB_ENABLE;
1907                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1908         }
1909
1910         udelay(50);
1911
1912         evergreen_mc_stop(rdev, &save);
1913         if (evergreen_mc_wait_for_idle(rdev)) {
1914                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1915         }
1916
1917         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1918                 grbm_soft_reset = SOFT_RESET_CB |
1919                         SOFT_RESET_DB |
1920                         SOFT_RESET_GDS |
1921                         SOFT_RESET_PA |
1922                         SOFT_RESET_SC |
1923                         SOFT_RESET_SPI |
1924                         SOFT_RESET_SH |
1925                         SOFT_RESET_SX |
1926                         SOFT_RESET_TC |
1927                         SOFT_RESET_TA |
1928                         SOFT_RESET_VGT |
1929                         SOFT_RESET_IA;
1930         }
1931
1932         if (reset_mask & RADEON_RESET_CP) {
1933                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1934
1935                 srbm_soft_reset |= SOFT_RESET_GRBM;
1936         }
1937
1938         if (reset_mask & RADEON_RESET_DMA)
1939                 srbm_soft_reset |= SOFT_RESET_DMA;
1940
1941         if (reset_mask & RADEON_RESET_DMA1)
1942                 srbm_soft_reset |= SOFT_RESET_DMA1;
1943
1944         if (reset_mask & RADEON_RESET_DISPLAY)
1945                 srbm_soft_reset |= SOFT_RESET_DC;
1946
1947         if (reset_mask & RADEON_RESET_RLC)
1948                 srbm_soft_reset |= SOFT_RESET_RLC;
1949
1950         if (reset_mask & RADEON_RESET_SEM)
1951                 srbm_soft_reset |= SOFT_RESET_SEM;
1952
1953         if (reset_mask & RADEON_RESET_IH)
1954                 srbm_soft_reset |= SOFT_RESET_IH;
1955
1956         if (reset_mask & RADEON_RESET_GRBM)
1957                 srbm_soft_reset |= SOFT_RESET_GRBM;
1958
1959         if (reset_mask & RADEON_RESET_VMC)
1960                 srbm_soft_reset |= SOFT_RESET_VMC;
1961
1962         if (!(rdev->flags & RADEON_IS_IGP)) {
1963                 if (reset_mask & RADEON_RESET_MC)
1964                         srbm_soft_reset |= SOFT_RESET_MC;
1965         }
1966
1967         if (grbm_soft_reset) {
1968                 tmp = RREG32(GRBM_SOFT_RESET);
1969                 tmp |= grbm_soft_reset;
1970                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1971                 WREG32(GRBM_SOFT_RESET, tmp);
1972                 tmp = RREG32(GRBM_SOFT_RESET);
1973
1974                 udelay(50);
1975
1976                 tmp &= ~grbm_soft_reset;
1977                 WREG32(GRBM_SOFT_RESET, tmp);
1978                 tmp = RREG32(GRBM_SOFT_RESET);
1979         }
1980
1981         if (srbm_soft_reset) {
1982                 tmp = RREG32(SRBM_SOFT_RESET);
1983                 tmp |= srbm_soft_reset;
1984                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1985                 WREG32(SRBM_SOFT_RESET, tmp);
1986                 tmp = RREG32(SRBM_SOFT_RESET);
1987
1988                 udelay(50);
1989
1990                 tmp &= ~srbm_soft_reset;
1991                 WREG32(SRBM_SOFT_RESET, tmp);
1992                 tmp = RREG32(SRBM_SOFT_RESET);
1993         }
1994
1995         /* Wait a little for things to settle down */
1996         udelay(50);
1997
1998         evergreen_mc_resume(rdev, &save);
1999         udelay(50);
2000
2001         evergreen_print_gpu_status_regs(rdev);
2002 }
2003
2004 int cayman_asic_reset(struct radeon_device *rdev)
2005 {
2006         u32 reset_mask;
2007
2008         reset_mask = cayman_gpu_check_soft_reset(rdev);
2009
2010         if (reset_mask)
2011                 r600_set_bios_scratch_engine_hung(rdev, true);
2012
2013         cayman_gpu_soft_reset(rdev, reset_mask);
2014
2015         reset_mask = cayman_gpu_check_soft_reset(rdev);
2016
2017         if (!reset_mask)
2018                 r600_set_bios_scratch_engine_hung(rdev, false);
2019
2020         return 0;
2021 }
2022
2023 /**
2024  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
2025  *
2026  * @rdev: radeon_device pointer
2027  * @ring: radeon_ring structure holding ring information
2028  *
2029  * Check if the GFX engine is locked up.
2030  * Returns true if the engine appears to be locked up, false if not.
2031  */
2032 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2033 {
2034         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2035
2036         if (!(reset_mask & (RADEON_RESET_GFX |
2037                             RADEON_RESET_COMPUTE |
2038                             RADEON_RESET_CP))) {
2039                 radeon_ring_lockup_update(ring);
2040                 return false;
2041         }
2042         /* force CP activities */
2043         radeon_ring_force_activity(rdev, ring);
2044         return radeon_ring_test_lockup(rdev, ring);
2045 }
2046
2047 /**
2048  * cayman_dma_is_lockup - Check if the DMA engine is locked up
2049  *
2050  * @rdev: radeon_device pointer
2051  * @ring: radeon_ring structure holding ring information
2052  *
2053  * Check if the async DMA engine is locked up.
2054  * Returns true if the engine appears to be locked up, false if not.
2055  */
2056 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2057 {
2058         u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2059         u32 mask;
2060
2061         if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2062                 mask = RADEON_RESET_DMA;
2063         else
2064                 mask = RADEON_RESET_DMA1;
2065
2066         if (!(reset_mask & mask)) {
2067                 radeon_ring_lockup_update(ring);
2068                 return false;
2069         }
2070         /* force ring activities */
2071         radeon_ring_force_activity(rdev, ring);
2072         return radeon_ring_test_lockup(rdev, ring);
2073 }
2074
2075 static int cayman_startup(struct radeon_device *rdev)
2076 {
2077         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2078         int r;
2079
2080         /* enable pcie gen2 link */
2081         evergreen_pcie_gen2_enable(rdev);
2082         /* enable aspm */
2083         evergreen_program_aspm(rdev);
2084
2085         evergreen_mc_program(rdev);
2086
2087         if (rdev->flags & RADEON_IS_IGP) {
2088                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
2089                         r = ni_init_microcode(rdev);
2090                         if (r) {
2091                                 DRM_ERROR("Failed to load firmware!\n");
2092                                 return r;
2093                         }
2094                 }
2095         } else {
2096                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
2097                         r = ni_init_microcode(rdev);
2098                         if (r) {
2099                                 DRM_ERROR("Failed to load firmware!\n");
2100                                 return r;
2101                         }
2102                 }
2103
2104                 r = ni_mc_load_microcode(rdev);
2105                 if (r) {
2106                         DRM_ERROR("Failed to load MC firmware!\n");
2107                         return r;
2108                 }
2109         }
2110
2111         r = r600_vram_scratch_init(rdev);
2112         if (r)
2113                 return r;
2114
2115         r = cayman_pcie_gart_enable(rdev);
2116         if (r)
2117                 return r;
2118         cayman_gpu_init(rdev);
2119
2120         /* allocate rlc buffers */
2121         if (rdev->flags & RADEON_IS_IGP) {
2122                 rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
2123                 rdev->rlc.reg_list_size =
2124                         (u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
2125                 rdev->rlc.cs_data = cayman_cs_data;
2126                 r = sumo_rlc_init(rdev);
2127                 if (r) {
2128                         DRM_ERROR("Failed to init rlc BOs!\n");
2129                         return r;
2130                 }
2131         }
2132
2133         /* allocate wb buffer */
2134         r = radeon_wb_init(rdev);
2135         if (r)
2136                 return r;
2137
2138         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2139         if (r) {
2140                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2141                 return r;
2142         }
2143
2144         r = rv770_uvd_resume(rdev);
2145         if (!r) {
2146                 r = radeon_fence_driver_start_ring(rdev,
2147                                                    R600_RING_TYPE_UVD_INDEX);
2148                 if (r)
2149                         dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2150         }
2151         if (r)
2152                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2153
2154         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2155         if (r) {
2156                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2157                 return r;
2158         }
2159
2160         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2161         if (r) {
2162                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2163                 return r;
2164         }
2165
2166         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2167         if (r) {
2168                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2169                 return r;
2170         }
2171
2172         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2173         if (r) {
2174                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2175                 return r;
2176         }
2177
2178         /* Enable IRQ */
2179         if (!rdev->irq.installed) {
2180                 r = radeon_irq_kms_init(rdev);
2181                 if (r)
2182                         return r;
2183         }
2184
2185         r = r600_irq_init(rdev);
2186         if (r) {
2187                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
2188                 radeon_irq_kms_fini(rdev);
2189                 return r;
2190         }
2191         evergreen_irq_set(rdev);
2192
2193         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2194                              CP_RB0_RPTR, CP_RB0_WPTR,
2195                              RADEON_CP_PACKET2);
2196         if (r)
2197                 return r;
2198
2199         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2200         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2201                              DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2202                              DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2203                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2204         if (r)
2205                 return r;
2206
2207         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2208         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2209                              DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2210                              DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2211                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2212         if (r)
2213                 return r;
2214
2215         r = cayman_cp_load_microcode(rdev);
2216         if (r)
2217                 return r;
2218         r = cayman_cp_resume(rdev);
2219         if (r)
2220                 return r;
2221
2222         r = cayman_dma_resume(rdev);
2223         if (r)
2224                 return r;
2225
2226         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2227         if (ring->ring_size) {
2228                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
2229                                      UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2230                                      RADEON_CP_PACKET2);
2231                 if (!r)
2232                         r = r600_uvd_init(rdev, true);
2233                 if (r)
2234                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2235         }
2236
2237         r = radeon_ib_pool_init(rdev);
2238         if (r) {
2239                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2240                 return r;
2241         }
2242
2243         r = radeon_vm_manager_init(rdev);
2244         if (r) {
2245                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2246                 return r;
2247         }
2248
2249         r = r600_audio_init(rdev);
2250         if (r)
2251                 return r;
2252
2253         return 0;
2254 }
2255
2256 int cayman_resume(struct radeon_device *rdev)
2257 {
2258         int r;
2259
2260         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2261          * posting will perform necessary task to bring back GPU into good
2262          * shape.
2263          */
2264         /* post card */
2265         atom_asic_init(rdev->mode_info.atom_context);
2266
2267         /* init golden registers */
2268         ni_init_golden_registers(rdev);
2269
2270         rdev->accel_working = true;
2271         r = cayman_startup(rdev);
2272         if (r) {
2273                 DRM_ERROR("cayman startup failed on resume\n");
2274                 rdev->accel_working = false;
2275                 return r;
2276         }
2277         return r;
2278 }
2279
2280 int cayman_suspend(struct radeon_device *rdev)
2281 {
2282         r600_audio_fini(rdev);
2283         radeon_vm_manager_fini(rdev);
2284         cayman_cp_enable(rdev, false);
2285         cayman_dma_stop(rdev);
2286         r600_uvd_stop(rdev);
2287         radeon_uvd_suspend(rdev);
2288         evergreen_irq_suspend(rdev);
2289         radeon_wb_disable(rdev);
2290         cayman_pcie_gart_disable(rdev);
2291         return 0;
2292 }
2293
2294 /* Plan is to move initialization in that function and use
2295  * helper function so that radeon_device_init pretty much
2296  * do nothing more than calling asic specific function. This
2297  * should also allow to remove a bunch of callback function
2298  * like vram_info.
2299  */
2300 int cayman_init(struct radeon_device *rdev)
2301 {
2302         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2303         int r;
2304
2305         /* Read BIOS */
2306         if (!radeon_get_bios(rdev)) {
2307                 if (ASIC_IS_AVIVO(rdev))
2308                         return -EINVAL;
2309         }
2310         /* Must be an ATOMBIOS */
2311         if (!rdev->is_atom_bios) {
2312                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2313                 return -EINVAL;
2314         }
2315         r = radeon_atombios_init(rdev);
2316         if (r)
2317                 return r;
2318
2319         /* Post card if necessary */
2320         if (!radeon_card_posted(rdev)) {
2321                 if (!rdev->bios) {
2322                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2323                         return -EINVAL;
2324                 }
2325                 DRM_INFO("GPU not posted. posting now...\n");
2326                 atom_asic_init(rdev->mode_info.atom_context);
2327         }
2328         /* init golden registers */
2329         ni_init_golden_registers(rdev);
2330         /* Initialize scratch registers */
2331         r600_scratch_init(rdev);
2332         /* Initialize surface registers */
2333         radeon_surface_init(rdev);
2334         /* Initialize clocks */
2335         radeon_get_clock_info(rdev->ddev);
2336         /* Fence driver */
2337         r = radeon_fence_driver_init(rdev);
2338         if (r)
2339                 return r;
2340         /* initialize memory controller */
2341         r = evergreen_mc_init(rdev);
2342         if (r)
2343                 return r;
2344         /* Memory manager */
2345         r = radeon_bo_init(rdev);
2346         if (r)
2347                 return r;
2348
2349         ring->ring_obj = NULL;
2350         r600_ring_init(rdev, ring, 1024 * 1024);
2351
2352         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2353         ring->ring_obj = NULL;
2354         r600_ring_init(rdev, ring, 64 * 1024);
2355
2356         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2357         ring->ring_obj = NULL;
2358         r600_ring_init(rdev, ring, 64 * 1024);
2359
2360         r = radeon_uvd_init(rdev);
2361         if (!r) {
2362                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2363                 ring->ring_obj = NULL;
2364                 r600_ring_init(rdev, ring, 4096);
2365         }
2366
2367         rdev->ih.ring_obj = NULL;
2368         r600_ih_ring_init(rdev, 64 * 1024);
2369
2370         r = r600_pcie_gart_init(rdev);
2371         if (r)
2372                 return r;
2373
2374         rdev->accel_working = true;
2375         r = cayman_startup(rdev);
2376         if (r) {
2377                 dev_err(rdev->dev, "disabling GPU acceleration\n");
2378                 cayman_cp_fini(rdev);
2379                 cayman_dma_fini(rdev);
2380                 r600_irq_fini(rdev);
2381                 if (rdev->flags & RADEON_IS_IGP)
2382                         sumo_rlc_fini(rdev);
2383                 radeon_wb_fini(rdev);
2384                 radeon_ib_pool_fini(rdev);
2385                 radeon_vm_manager_fini(rdev);
2386                 radeon_irq_kms_fini(rdev);
2387                 cayman_pcie_gart_fini(rdev);
2388                 rdev->accel_working = false;
2389         }
2390
2391         /* Don't start up if the MC ucode is missing.
2392          * The default clocks and voltages before the MC ucode
2393          * is loaded are not suffient for advanced operations.
2394          *
2395          * We can skip this check for TN, because there is no MC
2396          * ucode.
2397          */
2398         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2399                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
2400                 return -EINVAL;
2401         }
2402
2403         return 0;
2404 }
2405
2406 void cayman_fini(struct radeon_device *rdev)
2407 {
2408         cayman_cp_fini(rdev);
2409         cayman_dma_fini(rdev);
2410         r600_irq_fini(rdev);
2411         if (rdev->flags & RADEON_IS_IGP)
2412                 sumo_rlc_fini(rdev);
2413         radeon_wb_fini(rdev);
2414         radeon_vm_manager_fini(rdev);
2415         radeon_ib_pool_fini(rdev);
2416         radeon_irq_kms_fini(rdev);
2417         r600_uvd_stop(rdev);
2418         radeon_uvd_fini(rdev);
2419         cayman_pcie_gart_fini(rdev);
2420         r600_vram_scratch_fini(rdev);
2421         radeon_gem_fini(rdev);
2422         radeon_fence_driver_fini(rdev);
2423         radeon_bo_fini(rdev);
2424         radeon_atombios_fini(rdev);
2425         kfree(rdev->bios);
2426         rdev->bios = NULL;
2427 }
2428
2429 /*
2430  * vm
2431  */
2432 int cayman_vm_init(struct radeon_device *rdev)
2433 {
2434         /* number of VMs */
2435         rdev->vm_manager.nvm = 8;
2436         /* base offset of vram pages */
2437         if (rdev->flags & RADEON_IS_IGP) {
2438                 u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2439                 tmp <<= 22;
2440                 rdev->vm_manager.vram_base_offset = tmp;
2441         } else
2442                 rdev->vm_manager.vram_base_offset = 0;
2443         return 0;
2444 }
2445
2446 void cayman_vm_fini(struct radeon_device *rdev)
2447 {
2448 }
2449
2450 /**
2451  * cayman_vm_decode_fault - print human readable fault info
2452  *
2453  * @rdev: radeon_device pointer
2454  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2455  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2456  *
2457  * Print human readable fault information (cayman/TN).
2458  */
2459 void cayman_vm_decode_fault(struct radeon_device *rdev,
2460                             u32 status, u32 addr)
2461 {
2462         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2463         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2464         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2465         char *block;
2466
2467         switch (mc_id) {
2468         case 32:
2469         case 16:
2470         case 96:
2471         case 80:
2472         case 160:
2473         case 144:
2474         case 224:
2475         case 208:
2476                 block = "CB";
2477                 break;
2478         case 33:
2479         case 17:
2480         case 97:
2481         case 81:
2482         case 161:
2483         case 145:
2484         case 225:
2485         case 209:
2486                 block = "CB_FMASK";
2487                 break;
2488         case 34:
2489         case 18:
2490         case 98:
2491         case 82:
2492         case 162:
2493         case 146:
2494         case 226:
2495         case 210:
2496                 block = "CB_CMASK";
2497                 break;
2498         case 35:
2499         case 19:
2500         case 99:
2501         case 83:
2502         case 163:
2503         case 147:
2504         case 227:
2505         case 211:
2506                 block = "CB_IMMED";
2507                 break;
2508         case 36:
2509         case 20:
2510         case 100:
2511         case 84:
2512         case 164:
2513         case 148:
2514         case 228:
2515         case 212:
2516                 block = "DB";
2517                 break;
2518         case 37:
2519         case 21:
2520         case 101:
2521         case 85:
2522         case 165:
2523         case 149:
2524         case 229:
2525         case 213:
2526                 block = "DB_HTILE";
2527                 break;
2528         case 38:
2529         case 22:
2530         case 102:
2531         case 86:
2532         case 166:
2533         case 150:
2534         case 230:
2535         case 214:
2536                 block = "SX";
2537                 break;
2538         case 39:
2539         case 23:
2540         case 103:
2541         case 87:
2542         case 167:
2543         case 151:
2544         case 231:
2545         case 215:
2546                 block = "DB_STEN";
2547                 break;
2548         case 40:
2549         case 24:
2550         case 104:
2551         case 88:
2552         case 232:
2553         case 216:
2554         case 168:
2555         case 152:
2556                 block = "TC_TFETCH";
2557                 break;
2558         case 41:
2559         case 25:
2560         case 105:
2561         case 89:
2562         case 233:
2563         case 217:
2564         case 169:
2565         case 153:
2566                 block = "TC_VFETCH";
2567                 break;
2568         case 42:
2569         case 26:
2570         case 106:
2571         case 90:
2572         case 234:
2573         case 218:
2574         case 170:
2575         case 154:
2576                 block = "VC";
2577                 break;
2578         case 112:
2579                 block = "CP";
2580                 break;
2581         case 113:
2582         case 114:
2583                 block = "SH";
2584                 break;
2585         case 115:
2586                 block = "VGT";
2587                 break;
2588         case 178:
2589                 block = "IH";
2590                 break;
2591         case 51:
2592                 block = "RLC";
2593                 break;
2594         case 55:
2595                 block = "DMA";
2596                 break;
2597         case 56:
2598                 block = "HDP";
2599                 break;
2600         default:
2601                 block = "unknown";
2602                 break;
2603         }
2604
2605         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2606                protections, vmid, addr,
2607                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2608                block, mc_id);
2609 }
2610
2611 #define R600_ENTRY_VALID   (1 << 0)
2612 #define R600_PTE_SYSTEM    (1 << 1)
2613 #define R600_PTE_SNOOPED   (1 << 2)
2614 #define R600_PTE_READABLE  (1 << 5)
2615 #define R600_PTE_WRITEABLE (1 << 6)
2616
2617 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2618 {
2619         uint32_t r600_flags = 0;
2620         r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2621         r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2622         r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2623         if (flags & RADEON_VM_PAGE_SYSTEM) {
2624                 r600_flags |= R600_PTE_SYSTEM;
2625                 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2626         }
2627         return r600_flags;
2628 }
2629
2630 /**
2631  * cayman_vm_set_page - update the page tables using the CP
2632  *
2633  * @rdev: radeon_device pointer
2634  * @ib: indirect buffer to fill with commands
2635  * @pe: addr of the page entry
2636  * @addr: dst addr to write into pe
2637  * @count: number of page entries to update
2638  * @incr: increase next addr by incr bytes
2639  * @flags: access flags
2640  *
2641  * Update the page tables using the CP (cayman/TN).
2642  */
2643 void cayman_vm_set_page(struct radeon_device *rdev,
2644                         struct radeon_ib *ib,
2645                         uint64_t pe,
2646                         uint64_t addr, unsigned count,
2647                         uint32_t incr, uint32_t flags)
2648 {
2649         uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2650         uint64_t value;
2651         unsigned ndw;
2652
2653         if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2654                 while (count) {
2655                         ndw = 1 + count * 2;
2656                         if (ndw > 0x3FFF)
2657                                 ndw = 0x3FFF;
2658
2659                         ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2660                         ib->ptr[ib->length_dw++] = pe;
2661                         ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2662                         for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2663                                 if (flags & RADEON_VM_PAGE_SYSTEM) {
2664                                         value = radeon_vm_map_gart(rdev, addr);
2665                                         value &= 0xFFFFFFFFFFFFF000ULL;
2666                                 } else if (flags & RADEON_VM_PAGE_VALID) {
2667                                         value = addr;
2668                                 } else {
2669                                         value = 0;
2670                                 }
2671                                 addr += incr;
2672                                 value |= r600_flags;
2673                                 ib->ptr[ib->length_dw++] = value;
2674                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2675                         }
2676                 }
2677         } else {
2678                 if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2679                     (count == 1)) {
2680                         while (count) {
2681                                 ndw = count * 2;
2682                                 if (ndw > 0xFFFFE)
2683                                         ndw = 0xFFFFE;
2684
2685                                 /* for non-physically contiguous pages (system) */
2686                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2687                                 ib->ptr[ib->length_dw++] = pe;
2688                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2689                                 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2690                                         if (flags & RADEON_VM_PAGE_SYSTEM) {
2691                                                 value = radeon_vm_map_gart(rdev, addr);
2692                                                 value &= 0xFFFFFFFFFFFFF000ULL;
2693                                         } else if (flags & RADEON_VM_PAGE_VALID) {
2694                                                 value = addr;
2695                                         } else {
2696                                                 value = 0;
2697                                         }
2698                                         addr += incr;
2699                                         value |= r600_flags;
2700                                         ib->ptr[ib->length_dw++] = value;
2701                                         ib->ptr[ib->length_dw++] = upper_32_bits(value);
2702                                 }
2703                         }
2704                         while (ib->length_dw & 0x7)
2705                                 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2706                 } else {
2707                         while (count) {
2708                                 ndw = count * 2;
2709                                 if (ndw > 0xFFFFE)
2710                                         ndw = 0xFFFFE;
2711
2712                                 if (flags & RADEON_VM_PAGE_VALID)
2713                                         value = addr;
2714                                 else
2715                                         value = 0;
2716                                 /* for physically contiguous pages (vram) */
2717                                 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2718                                 ib->ptr[ib->length_dw++] = pe; /* dst addr */
2719                                 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2720                                 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2721                                 ib->ptr[ib->length_dw++] = 0;
2722                                 ib->ptr[ib->length_dw++] = value; /* value */
2723                                 ib->ptr[ib->length_dw++] = upper_32_bits(value);
2724                                 ib->ptr[ib->length_dw++] = incr; /* increment size */
2725                                 ib->ptr[ib->length_dw++] = 0;
2726                                 pe += ndw * 4;
2727                                 addr += (ndw / 2) * incr;
2728                                 count -= ndw / 2;
2729                         }
2730                 }
2731                 while (ib->length_dw & 0x7)
2732                         ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2733         }
2734 }
2735
2736 /**
2737  * cayman_vm_flush - vm flush using the CP
2738  *
2739  * @rdev: radeon_device pointer
2740  *
2741  * Update the page table base and flush the VM TLB
2742  * using the CP (cayman-si).
2743  */
2744 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2745 {
2746         struct radeon_ring *ring = &rdev->ring[ridx];
2747
2748         if (vm == NULL)
2749                 return;
2750
2751         radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2752         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2753
2754         /* flush hdp cache */
2755         radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2756         radeon_ring_write(ring, 0x1);
2757
2758         /* bits 0-7 are the VM contexts0-7 */
2759         radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2760         radeon_ring_write(ring, 1 << vm->id);
2761
2762         /* sync PFP to ME, otherwise we might get invalid PFP reads */
2763         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2764         radeon_ring_write(ring, 0x0);
2765 }
2766
2767 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2768 {
2769         struct radeon_ring *ring = &rdev->ring[ridx];
2770
2771         if (vm == NULL)
2772                 return;
2773
2774         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2775         radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2776         radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2777
2778         /* flush hdp cache */
2779         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2780         radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2781         radeon_ring_write(ring, 1);
2782
2783         /* bits 0-7 are the VM contexts0-7 */
2784         radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2785         radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2786         radeon_ring_write(ring, 1 << vm->id);
2787 }
2788