Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * arch/sh/mm/cache-sh4.c | |
3 | * | |
4 | * Copyright (C) 1999, 2000, 2002 Niibe Yutaka | |
d10040f7 | 5 | * Copyright (C) 2001 - 2007 Paul Mundt |
1da177e4 | 6 | * Copyright (C) 2003 Richard Curnow |
09b5a10c | 7 | * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. |
1da177e4 LT |
8 | * |
9 | * This file is subject to the terms and conditions of the GNU General Public | |
10 | * License. See the file "COPYING" in the main directory of this archive | |
11 | * for more details. | |
12 | */ | |
1da177e4 | 13 | #include <linux/init.h> |
1da177e4 | 14 | #include <linux/mm.h> |
52e27782 PM |
15 | #include <linux/io.h> |
16 | #include <linux/mutex.h> | |
1da177e4 LT |
17 | #include <asm/mmu_context.h> |
18 | #include <asm/cacheflush.h> | |
19 | ||
28ccf7f9 PM |
20 | /* |
21 | * The maximum number of pages we support up to when doing ranged dcache | |
22 | * flushing. Anything exceeding this will simply flush the dcache in its | |
23 | * entirety. | |
24 | */ | |
25 | #define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ | |
09b5a10c | 26 | #define MAX_ICACHE_PAGES 32 |
28ccf7f9 | 27 | |
b638d0b9 RC |
28 | static void __flush_dcache_segment_1way(unsigned long start, |
29 | unsigned long extent); | |
30 | static void __flush_dcache_segment_2way(unsigned long start, | |
31 | unsigned long extent); | |
32 | static void __flush_dcache_segment_4way(unsigned long start, | |
33 | unsigned long extent); | |
34 | ||
35 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | |
a252710f | 36 | unsigned long exec_offset); |
b638d0b9 RC |
37 | |
38 | /* | |
39 | * This is initialised here to ensure that it is not placed in the BSS. If | |
40 | * that were to happen, note that cache_init gets called before the BSS is | |
41 | * cleared, so this would get nulled out which would be hopeless. | |
42 | */ | |
43 | static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = | |
44 | (void (*)(unsigned long, unsigned long))0xdeadbeef; | |
45 | ||
46 | static void compute_alias(struct cache_info *c) | |
47 | { | |
48 | c->alias_mask = ((c->sets - 1) << c->entry_shift) & ~(PAGE_SIZE - 1); | |
d10040f7 | 49 | c->n_aliases = c->alias_mask ? (c->alias_mask >> PAGE_SHIFT) + 1 : 0; |
b638d0b9 RC |
50 | } |
51 | ||
52 | static void __init emit_cache_params(void) | |
53 | { | |
54 | printk("PVR=%08x CVR=%08x PRR=%08x\n", | |
55 | ctrl_inl(CCN_PVR), | |
56 | ctrl_inl(CCN_CVR), | |
57 | ctrl_inl(CCN_PRR)); | |
58 | printk("I-cache : n_ways=%d n_sets=%d way_incr=%d\n", | |
7ec9d6f8 PM |
59 | boot_cpu_data.icache.ways, |
60 | boot_cpu_data.icache.sets, | |
61 | boot_cpu_data.icache.way_incr); | |
b638d0b9 | 62 | printk("I-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", |
7ec9d6f8 PM |
63 | boot_cpu_data.icache.entry_mask, |
64 | boot_cpu_data.icache.alias_mask, | |
65 | boot_cpu_data.icache.n_aliases); | |
b638d0b9 | 66 | printk("D-cache : n_ways=%d n_sets=%d way_incr=%d\n", |
7ec9d6f8 PM |
67 | boot_cpu_data.dcache.ways, |
68 | boot_cpu_data.dcache.sets, | |
69 | boot_cpu_data.dcache.way_incr); | |
b638d0b9 | 70 | printk("D-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", |
7ec9d6f8 PM |
71 | boot_cpu_data.dcache.entry_mask, |
72 | boot_cpu_data.dcache.alias_mask, | |
73 | boot_cpu_data.dcache.n_aliases); | |
b638d0b9 | 74 | |
ab27f620 PM |
75 | /* |
76 | * Emit Secondary Cache parameters if the CPU has a probed L2. | |
77 | */ | |
78 | if (boot_cpu_data.flags & CPU_HAS_L2_CACHE) { | |
79 | printk("S-cache : n_ways=%d n_sets=%d way_incr=%d\n", | |
80 | boot_cpu_data.scache.ways, | |
81 | boot_cpu_data.scache.sets, | |
82 | boot_cpu_data.scache.way_incr); | |
83 | printk("S-cache : entry_mask=0x%08x alias_mask=0x%08x n_aliases=%d\n", | |
84 | boot_cpu_data.scache.entry_mask, | |
85 | boot_cpu_data.scache.alias_mask, | |
86 | boot_cpu_data.scache.n_aliases); | |
87 | } | |
88 | ||
b638d0b9 RC |
89 | if (!__flush_dcache_segment_fn) |
90 | panic("unknown number of cache ways\n"); | |
91 | } | |
1da177e4 LT |
92 | |
93 | /* | |
94 | * SH-4 has virtually indexed and physically tagged cache. | |
95 | */ | |
1da177e4 LT |
96 | void __init p3_cache_init(void) |
97 | { | |
7ec9d6f8 PM |
98 | compute_alias(&boot_cpu_data.icache); |
99 | compute_alias(&boot_cpu_data.dcache); | |
ab27f620 | 100 | compute_alias(&boot_cpu_data.scache); |
b638d0b9 | 101 | |
7ec9d6f8 | 102 | switch (boot_cpu_data.dcache.ways) { |
b638d0b9 RC |
103 | case 1: |
104 | __flush_dcache_segment_fn = __flush_dcache_segment_1way; | |
105 | break; | |
106 | case 2: | |
107 | __flush_dcache_segment_fn = __flush_dcache_segment_2way; | |
108 | break; | |
109 | case 4: | |
110 | __flush_dcache_segment_fn = __flush_dcache_segment_4way; | |
111 | break; | |
112 | default: | |
113 | __flush_dcache_segment_fn = NULL; | |
114 | break; | |
115 | } | |
116 | ||
117 | emit_cache_params(); | |
1da177e4 LT |
118 | } |
119 | ||
120 | /* | |
121 | * Write back the dirty D-caches, but not invalidate them. | |
122 | * | |
123 | * START: Virtual Address (U0, P1, or P3) | |
124 | * SIZE: Size of the region. | |
125 | */ | |
126 | void __flush_wback_region(void *start, int size) | |
127 | { | |
128 | unsigned long v; | |
129 | unsigned long begin, end; | |
130 | ||
131 | begin = (unsigned long)start & ~(L1_CACHE_BYTES-1); | |
132 | end = ((unsigned long)start + size + L1_CACHE_BYTES-1) | |
133 | & ~(L1_CACHE_BYTES-1); | |
134 | for (v = begin; v < end; v+=L1_CACHE_BYTES) { | |
135 | asm volatile("ocbwb %0" | |
136 | : /* no output */ | |
137 | : "m" (__m(v))); | |
138 | } | |
139 | } | |
140 | ||
141 | /* | |
142 | * Write back the dirty D-caches and invalidate them. | |
143 | * | |
144 | * START: Virtual Address (U0, P1, or P3) | |
145 | * SIZE: Size of the region. | |
146 | */ | |
147 | void __flush_purge_region(void *start, int size) | |
148 | { | |
149 | unsigned long v; | |
150 | unsigned long begin, end; | |
151 | ||
152 | begin = (unsigned long)start & ~(L1_CACHE_BYTES-1); | |
153 | end = ((unsigned long)start + size + L1_CACHE_BYTES-1) | |
154 | & ~(L1_CACHE_BYTES-1); | |
155 | for (v = begin; v < end; v+=L1_CACHE_BYTES) { | |
156 | asm volatile("ocbp %0" | |
157 | : /* no output */ | |
158 | : "m" (__m(v))); | |
159 | } | |
160 | } | |
161 | ||
1da177e4 LT |
162 | /* |
163 | * No write back please | |
164 | */ | |
165 | void __flush_invalidate_region(void *start, int size) | |
166 | { | |
167 | unsigned long v; | |
168 | unsigned long begin, end; | |
169 | ||
170 | begin = (unsigned long)start & ~(L1_CACHE_BYTES-1); | |
171 | end = ((unsigned long)start + size + L1_CACHE_BYTES-1) | |
172 | & ~(L1_CACHE_BYTES-1); | |
173 | for (v = begin; v < end; v+=L1_CACHE_BYTES) { | |
174 | asm volatile("ocbi %0" | |
175 | : /* no output */ | |
176 | : "m" (__m(v))); | |
177 | } | |
178 | } | |
179 | ||
1da177e4 LT |
180 | /* |
181 | * Write back the range of D-cache, and purge the I-cache. | |
182 | * | |
09b5a10c CS |
183 | * Called from kernel/module.c:sys_init_module and routine for a.out format, |
184 | * signal handler code and kprobes code | |
1da177e4 LT |
185 | */ |
186 | void flush_icache_range(unsigned long start, unsigned long end) | |
187 | { | |
09b5a10c CS |
188 | int icacheaddr; |
189 | unsigned long flags, v; | |
1da177e4 LT |
190 | int i; |
191 | ||
09b5a10c CS |
192 | /* If there are too many pages then just blow the caches */ |
193 | if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { | |
194 | flush_cache_all(); | |
195 | } else { | |
196 | /* selectively flush d-cache then invalidate the i-cache */ | |
197 | /* this is inefficient, so only use for small ranges */ | |
198 | start &= ~(L1_CACHE_BYTES-1); | |
199 | end += L1_CACHE_BYTES-1; | |
200 | end &= ~(L1_CACHE_BYTES-1); | |
1da177e4 | 201 | |
09b5a10c CS |
202 | local_irq_save(flags); |
203 | jump_to_uncached(); | |
1da177e4 | 204 | |
09b5a10c CS |
205 | for (v = start; v < end; v+=L1_CACHE_BYTES) { |
206 | asm volatile("ocbwb %0" | |
207 | : /* no output */ | |
208 | : "m" (__m(v))); | |
b638d0b9 | 209 | |
09b5a10c CS |
210 | icacheaddr = CACHE_IC_ADDRESS_ARRAY | ( |
211 | v & cpu_data->icache.entry_mask); | |
b638d0b9 | 212 | |
09b5a10c CS |
213 | for (i = 0; i < cpu_data->icache.ways; |
214 | i++, icacheaddr += cpu_data->icache.way_incr) | |
215 | /* Clear i-cache line valid-bit */ | |
216 | ctrl_outl(0, icacheaddr); | |
217 | } | |
218 | ||
219 | back_to_cached(); | |
220 | local_irq_restore(flags); | |
221 | } | |
1da177e4 LT |
222 | } |
223 | ||
224 | static inline void flush_cache_4096(unsigned long start, | |
225 | unsigned long phys) | |
226 | { | |
33573c0e PM |
227 | unsigned long flags, exec_offset = 0; |
228 | ||
1da177e4 | 229 | /* |
b638d0b9 RC |
230 | * All types of SH-4 require PC to be in P2 to operate on the I-cache. |
231 | * Some types of SH-4 require PC to be in P2 to operate on the D-cache. | |
1da177e4 | 232 | */ |
7ec9d6f8 | 233 | if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) || |
33573c0e | 234 | (start < CACHE_OC_ADDRESS_ARRAY)) |
510c72ad | 235 | exec_offset = 0x20000000; |
33573c0e PM |
236 | |
237 | local_irq_save(flags); | |
238 | __flush_cache_4096(start | SH_CACHE_ASSOC, | |
239 | P1SEGADDR(phys), exec_offset); | |
240 | local_irq_restore(flags); | |
1da177e4 LT |
241 | } |
242 | ||
243 | /* | |
244 | * Write back & invalidate the D-cache of the page. | |
245 | * (To avoid "alias" issues) | |
246 | */ | |
247 | void flush_dcache_page(struct page *page) | |
248 | { | |
39e688a9 | 249 | if (test_bit(PG_mapped, &page->flags)) { |
1da177e4 | 250 | unsigned long phys = PHYSADDR(page_address(page)); |
b638d0b9 RC |
251 | unsigned long addr = CACHE_OC_ADDRESS_ARRAY; |
252 | int i, n; | |
1da177e4 LT |
253 | |
254 | /* Loop all the D-cache */ | |
7ec9d6f8 | 255 | n = boot_cpu_data.dcache.n_aliases; |
510c72ad | 256 | for (i = 0; i < n; i++, addr += 4096) |
b638d0b9 | 257 | flush_cache_4096(addr, phys); |
1da177e4 | 258 | } |
fdfc74f9 PM |
259 | |
260 | wmb(); | |
1da177e4 LT |
261 | } |
262 | ||
28ccf7f9 | 263 | /* TODO: Selective icache invalidation through IC address array.. */ |
205a3b43 | 264 | static void __uses_jump_to_uncached flush_icache_all(void) |
1da177e4 LT |
265 | { |
266 | unsigned long flags, ccr; | |
267 | ||
268 | local_irq_save(flags); | |
cbaa118e | 269 | jump_to_uncached(); |
1da177e4 LT |
270 | |
271 | /* Flush I-cache */ | |
272 | ccr = ctrl_inl(CCR); | |
273 | ccr |= CCR_CACHE_ICI; | |
274 | ctrl_outl(ccr, CCR); | |
275 | ||
29847622 | 276 | /* |
cbaa118e | 277 | * back_to_cached() will take care of the barrier for us, don't add |
29847622 PM |
278 | * another one! |
279 | */ | |
280 | ||
cbaa118e | 281 | back_to_cached(); |
1da177e4 LT |
282 | local_irq_restore(flags); |
283 | } | |
284 | ||
a252710f | 285 | void flush_dcache_all(void) |
1da177e4 | 286 | { |
7ec9d6f8 | 287 | (*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size); |
fdfc74f9 | 288 | wmb(); |
a252710f PM |
289 | } |
290 | ||
291 | void flush_cache_all(void) | |
292 | { | |
293 | flush_dcache_all(); | |
1da177e4 LT |
294 | flush_icache_all(); |
295 | } | |
296 | ||
28ccf7f9 PM |
297 | static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, |
298 | unsigned long end) | |
299 | { | |
300 | unsigned long d = 0, p = start & PAGE_MASK; | |
7ec9d6f8 PM |
301 | unsigned long alias_mask = boot_cpu_data.dcache.alias_mask; |
302 | unsigned long n_aliases = boot_cpu_data.dcache.n_aliases; | |
28ccf7f9 PM |
303 | unsigned long select_bit; |
304 | unsigned long all_aliases_mask; | |
305 | unsigned long addr_offset; | |
306 | pgd_t *dir; | |
307 | pmd_t *pmd; | |
308 | pud_t *pud; | |
309 | pte_t *pte; | |
310 | int i; | |
311 | ||
312 | dir = pgd_offset(mm, p); | |
313 | pud = pud_offset(dir, p); | |
314 | pmd = pmd_offset(pud, p); | |
315 | end = PAGE_ALIGN(end); | |
316 | ||
317 | all_aliases_mask = (1 << n_aliases) - 1; | |
318 | ||
319 | do { | |
320 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { | |
321 | p &= PMD_MASK; | |
322 | p += PMD_SIZE; | |
323 | pmd++; | |
324 | ||
325 | continue; | |
326 | } | |
327 | ||
328 | pte = pte_offset_kernel(pmd, p); | |
329 | ||
330 | do { | |
331 | unsigned long phys; | |
332 | pte_t entry = *pte; | |
333 | ||
334 | if (!(pte_val(entry) & _PAGE_PRESENT)) { | |
335 | pte++; | |
336 | p += PAGE_SIZE; | |
337 | continue; | |
338 | } | |
339 | ||
340 | phys = pte_val(entry) & PTE_PHYS_MASK; | |
341 | ||
342 | if ((p ^ phys) & alias_mask) { | |
343 | d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); | |
344 | d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); | |
345 | ||
346 | if (d == all_aliases_mask) | |
347 | goto loop_exit; | |
348 | } | |
349 | ||
350 | pte++; | |
351 | p += PAGE_SIZE; | |
352 | } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); | |
353 | pmd++; | |
354 | } while (p < end); | |
355 | ||
356 | loop_exit: | |
357 | addr_offset = 0; | |
358 | select_bit = 1; | |
359 | ||
360 | for (i = 0; i < n_aliases; i++) { | |
361 | if (d & select_bit) { | |
362 | (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); | |
363 | wmb(); | |
364 | } | |
365 | ||
366 | select_bit <<= 1; | |
367 | addr_offset += PAGE_SIZE; | |
368 | } | |
369 | } | |
370 | ||
371 | /* | |
372 | * Note : (RPC) since the caches are physically tagged, the only point | |
373 | * of flush_cache_mm for SH-4 is to get rid of aliases from the | |
374 | * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that | |
375 | * lines can stay resident so long as the virtual address they were | |
376 | * accessed with (hence cache set) is in accord with the physical | |
377 | * address (i.e. tag). It's no different here. So I reckon we don't | |
378 | * need to flush the I-cache, since aliases don't matter for that. We | |
379 | * should try that. | |
380 | * | |
381 | * Caller takes mm->mmap_sem. | |
382 | */ | |
1da177e4 LT |
383 | void flush_cache_mm(struct mm_struct *mm) |
384 | { | |
b638d0b9 | 385 | /* |
28ccf7f9 PM |
386 | * If cache is only 4k-per-way, there are never any 'aliases'. Since |
387 | * the cache is physically tagged, the data can just be left in there. | |
388 | */ | |
7ec9d6f8 | 389 | if (boot_cpu_data.dcache.n_aliases == 0) |
28ccf7f9 PM |
390 | return; |
391 | ||
392 | /* | |
393 | * Don't bother groveling around the dcache for the VMA ranges | |
394 | * if there are too many PTEs to make it worthwhile. | |
b638d0b9 | 395 | */ |
28ccf7f9 PM |
396 | if (mm->nr_ptes >= MAX_DCACHE_PAGES) |
397 | flush_dcache_all(); | |
398 | else { | |
399 | struct vm_area_struct *vma; | |
400 | ||
401 | /* | |
402 | * In this case there are reasonably sized ranges to flush, | |
403 | * iterate through the VMA list and take care of any aliases. | |
404 | */ | |
405 | for (vma = mm->mmap; vma; vma = vma->vm_next) | |
406 | __flush_cache_mm(mm, vma->vm_start, vma->vm_end); | |
407 | } | |
408 | ||
409 | /* Only touch the icache if one of the VMAs has VM_EXEC set. */ | |
410 | if (mm->exec_vm) | |
411 | flush_icache_all(); | |
1da177e4 LT |
412 | } |
413 | ||
414 | /* | |
415 | * Write back and invalidate I/D-caches for the page. | |
416 | * | |
417 | * ADDR: Virtual Address (U0 address) | |
418 | * PFN: Physical page number | |
419 | */ | |
28ccf7f9 PM |
420 | void flush_cache_page(struct vm_area_struct *vma, unsigned long address, |
421 | unsigned long pfn) | |
1da177e4 LT |
422 | { |
423 | unsigned long phys = pfn << PAGE_SHIFT; | |
b638d0b9 RC |
424 | unsigned int alias_mask; |
425 | ||
7ec9d6f8 | 426 | alias_mask = boot_cpu_data.dcache.alias_mask; |
1da177e4 LT |
427 | |
428 | /* We only need to flush D-cache when we have alias */ | |
b638d0b9 | 429 | if ((address^phys) & alias_mask) { |
1da177e4 LT |
430 | /* Loop 4K of the D-cache */ |
431 | flush_cache_4096( | |
b638d0b9 | 432 | CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), |
1da177e4 LT |
433 | phys); |
434 | /* Loop another 4K of the D-cache */ | |
435 | flush_cache_4096( | |
b638d0b9 | 436 | CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), |
1da177e4 LT |
437 | phys); |
438 | } | |
439 | ||
7ec9d6f8 | 440 | alias_mask = boot_cpu_data.icache.alias_mask; |
b638d0b9 RC |
441 | if (vma->vm_flags & VM_EXEC) { |
442 | /* | |
443 | * Evict entries from the portion of the cache from which code | |
444 | * may have been executed at this address (virtual). There's | |
445 | * no need to evict from the portion corresponding to the | |
446 | * physical address as for the D-cache, because we know the | |
447 | * kernel has never executed the code through its identity | |
448 | * translation. | |
449 | */ | |
1da177e4 | 450 | flush_cache_4096( |
b638d0b9 | 451 | CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), |
1da177e4 | 452 | phys); |
b638d0b9 | 453 | } |
1da177e4 LT |
454 | } |
455 | ||
456 | /* | |
457 | * Write back and invalidate D-caches. | |
458 | * | |
459 | * START, END: Virtual Address (U0 address) | |
460 | * | |
461 | * NOTE: We need to flush the _physical_ page entry. | |
462 | * Flushing the cache lines for U0 only isn't enough. | |
463 | * We need to flush for P1 too, which may contain aliases. | |
464 | */ | |
465 | void flush_cache_range(struct vm_area_struct *vma, unsigned long start, | |
466 | unsigned long end) | |
467 | { | |
b638d0b9 RC |
468 | /* |
469 | * If cache is only 4k-per-way, there are never any 'aliases'. Since | |
470 | * the cache is physically tagged, the data can just be left in there. | |
471 | */ | |
7ec9d6f8 | 472 | if (boot_cpu_data.dcache.n_aliases == 0) |
b638d0b9 RC |
473 | return; |
474 | ||
a252710f PM |
475 | /* |
476 | * Don't bother with the lookup and alias check if we have a | |
477 | * wide range to cover, just blow away the dcache in its | |
478 | * entirety instead. -- PFM. | |
479 | */ | |
28ccf7f9 | 480 | if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) |
a252710f | 481 | flush_dcache_all(); |
28ccf7f9 PM |
482 | else |
483 | __flush_cache_mm(vma->vm_mm, start, end); | |
b638d0b9 RC |
484 | |
485 | if (vma->vm_flags & VM_EXEC) { | |
486 | /* | |
487 | * TODO: Is this required??? Need to look at how I-cache | |
488 | * coherency is assured when new programs are loaded to see if | |
489 | * this matters. | |
490 | */ | |
1da177e4 | 491 | flush_icache_all(); |
b638d0b9 | 492 | } |
1da177e4 LT |
493 | } |
494 | ||
495 | /* | |
496 | * flush_icache_user_range | |
497 | * @vma: VMA of the process | |
498 | * @page: page | |
499 | * @addr: U0 address | |
500 | * @len: length of the range (< page size) | |
501 | */ | |
502 | void flush_icache_user_range(struct vm_area_struct *vma, | |
503 | struct page *page, unsigned long addr, int len) | |
504 | { | |
505 | flush_cache_page(vma, addr, page_to_pfn(page)); | |
fdfc74f9 | 506 | mb(); |
1da177e4 LT |
507 | } |
508 | ||
b638d0b9 RC |
509 | /** |
510 | * __flush_cache_4096 | |
511 | * | |
512 | * @addr: address in memory mapped cache array | |
513 | * @phys: P1 address to flush (has to match tags if addr has 'A' bit | |
514 | * set i.e. associative write) | |
515 | * @exec_offset: set to 0x20000000 if flush has to be executed from P2 | |
516 | * region else 0x0 | |
517 | * | |
518 | * The offset into the cache array implied by 'addr' selects the | |
519 | * 'colour' of the virtual address range that will be flushed. The | |
520 | * operation (purge/write-back) is selected by the lower 2 bits of | |
521 | * 'phys'. | |
522 | */ | |
523 | static void __flush_cache_4096(unsigned long addr, unsigned long phys, | |
524 | unsigned long exec_offset) | |
525 | { | |
526 | int way_count; | |
527 | unsigned long base_addr = addr; | |
528 | struct cache_info *dcache; | |
529 | unsigned long way_incr; | |
530 | unsigned long a, ea, p; | |
531 | unsigned long temp_pc; | |
532 | ||
7ec9d6f8 | 533 | dcache = &boot_cpu_data.dcache; |
b638d0b9 RC |
534 | /* Write this way for better assembly. */ |
535 | way_count = dcache->ways; | |
536 | way_incr = dcache->way_incr; | |
537 | ||
538 | /* | |
539 | * Apply exec_offset (i.e. branch to P2 if required.). | |
540 | * | |
541 | * FIXME: | |
542 | * | |
543 | * If I write "=r" for the (temp_pc), it puts this in r6 hence | |
544 | * trashing exec_offset before it's been added on - why? Hence | |
545 | * "=&r" as a 'workaround' | |
546 | */ | |
547 | asm volatile("mov.l 1f, %0\n\t" | |
548 | "add %1, %0\n\t" | |
549 | "jmp @%0\n\t" | |
550 | "nop\n\t" | |
551 | ".balign 4\n\t" | |
552 | "1: .long 2f\n\t" | |
553 | "2:\n" : "=&r" (temp_pc) : "r" (exec_offset)); | |
554 | ||
555 | /* | |
556 | * We know there will be >=1 iteration, so write as do-while to avoid | |
557 | * pointless nead-of-loop check for 0 iterations. | |
558 | */ | |
559 | do { | |
560 | ea = base_addr + PAGE_SIZE; | |
561 | a = base_addr; | |
562 | p = phys; | |
563 | ||
564 | do { | |
565 | *(volatile unsigned long *)a = p; | |
566 | /* | |
567 | * Next line: intentionally not p+32, saves an add, p | |
568 | * will do since only the cache tag bits need to | |
569 | * match. | |
570 | */ | |
571 | *(volatile unsigned long *)(a+32) = p; | |
572 | a += 64; | |
573 | p += 64; | |
574 | } while (a < ea); | |
575 | ||
576 | base_addr += way_incr; | |
577 | } while (--way_count != 0); | |
578 | } | |
579 | ||
580 | /* | |
581 | * Break the 1, 2 and 4 way variants of this out into separate functions to | |
582 | * avoid nearly all the overhead of having the conditional stuff in the function | |
583 | * bodies (+ the 1 and 2 way cases avoid saving any registers too). | |
584 | */ | |
585 | static void __flush_dcache_segment_1way(unsigned long start, | |
586 | unsigned long extent_per_way) | |
587 | { | |
588 | unsigned long orig_sr, sr_with_bl; | |
589 | unsigned long base_addr; | |
590 | unsigned long way_incr, linesz, way_size; | |
591 | struct cache_info *dcache; | |
592 | register unsigned long a0, a0e; | |
593 | ||
594 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | |
595 | sr_with_bl = orig_sr | (1<<28); | |
596 | base_addr = ((unsigned long)&empty_zero_page[0]); | |
597 | ||
598 | /* | |
599 | * The previous code aligned base_addr to 16k, i.e. the way_size of all | |
600 | * existing SH-4 D-caches. Whilst I don't see a need to have this | |
601 | * aligned to any better than the cache line size (which it will be | |
602 | * anyway by construction), let's align it to at least the way_size of | |
603 | * any existing or conceivable SH-4 D-cache. -- RPC | |
604 | */ | |
605 | base_addr = ((base_addr >> 16) << 16); | |
606 | base_addr |= start; | |
607 | ||
7ec9d6f8 | 608 | dcache = &boot_cpu_data.dcache; |
b638d0b9 RC |
609 | linesz = dcache->linesz; |
610 | way_incr = dcache->way_incr; | |
611 | way_size = dcache->way_size; | |
612 | ||
613 | a0 = base_addr; | |
614 | a0e = base_addr + extent_per_way; | |
615 | do { | |
616 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | |
617 | asm volatile("movca.l r0, @%0\n\t" | |
618 | "ocbi @%0" : : "r" (a0)); | |
619 | a0 += linesz; | |
620 | asm volatile("movca.l r0, @%0\n\t" | |
621 | "ocbi @%0" : : "r" (a0)); | |
622 | a0 += linesz; | |
623 | asm volatile("movca.l r0, @%0\n\t" | |
624 | "ocbi @%0" : : "r" (a0)); | |
625 | a0 += linesz; | |
626 | asm volatile("movca.l r0, @%0\n\t" | |
627 | "ocbi @%0" : : "r" (a0)); | |
628 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | |
629 | a0 += linesz; | |
630 | } while (a0 < a0e); | |
631 | } | |
632 | ||
633 | static void __flush_dcache_segment_2way(unsigned long start, | |
634 | unsigned long extent_per_way) | |
635 | { | |
636 | unsigned long orig_sr, sr_with_bl; | |
637 | unsigned long base_addr; | |
638 | unsigned long way_incr, linesz, way_size; | |
639 | struct cache_info *dcache; | |
640 | register unsigned long a0, a1, a0e; | |
641 | ||
642 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | |
643 | sr_with_bl = orig_sr | (1<<28); | |
644 | base_addr = ((unsigned long)&empty_zero_page[0]); | |
645 | ||
646 | /* See comment under 1-way above */ | |
647 | base_addr = ((base_addr >> 16) << 16); | |
648 | base_addr |= start; | |
649 | ||
7ec9d6f8 | 650 | dcache = &boot_cpu_data.dcache; |
b638d0b9 RC |
651 | linesz = dcache->linesz; |
652 | way_incr = dcache->way_incr; | |
653 | way_size = dcache->way_size; | |
654 | ||
655 | a0 = base_addr; | |
656 | a1 = a0 + way_incr; | |
657 | a0e = base_addr + extent_per_way; | |
658 | do { | |
659 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | |
660 | asm volatile("movca.l r0, @%0\n\t" | |
661 | "movca.l r0, @%1\n\t" | |
662 | "ocbi @%0\n\t" | |
663 | "ocbi @%1" : : | |
664 | "r" (a0), "r" (a1)); | |
665 | a0 += linesz; | |
666 | a1 += linesz; | |
667 | asm volatile("movca.l r0, @%0\n\t" | |
668 | "movca.l r0, @%1\n\t" | |
669 | "ocbi @%0\n\t" | |
670 | "ocbi @%1" : : | |
671 | "r" (a0), "r" (a1)); | |
672 | a0 += linesz; | |
673 | a1 += linesz; | |
674 | asm volatile("movca.l r0, @%0\n\t" | |
675 | "movca.l r0, @%1\n\t" | |
676 | "ocbi @%0\n\t" | |
677 | "ocbi @%1" : : | |
678 | "r" (a0), "r" (a1)); | |
679 | a0 += linesz; | |
680 | a1 += linesz; | |
681 | asm volatile("movca.l r0, @%0\n\t" | |
682 | "movca.l r0, @%1\n\t" | |
683 | "ocbi @%0\n\t" | |
684 | "ocbi @%1" : : | |
685 | "r" (a0), "r" (a1)); | |
686 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | |
687 | a0 += linesz; | |
688 | a1 += linesz; | |
689 | } while (a0 < a0e); | |
690 | } | |
691 | ||
692 | static void __flush_dcache_segment_4way(unsigned long start, | |
693 | unsigned long extent_per_way) | |
694 | { | |
695 | unsigned long orig_sr, sr_with_bl; | |
696 | unsigned long base_addr; | |
697 | unsigned long way_incr, linesz, way_size; | |
698 | struct cache_info *dcache; | |
699 | register unsigned long a0, a1, a2, a3, a0e; | |
700 | ||
701 | asm volatile("stc sr, %0" : "=r" (orig_sr)); | |
702 | sr_with_bl = orig_sr | (1<<28); | |
703 | base_addr = ((unsigned long)&empty_zero_page[0]); | |
704 | ||
705 | /* See comment under 1-way above */ | |
706 | base_addr = ((base_addr >> 16) << 16); | |
707 | base_addr |= start; | |
708 | ||
7ec9d6f8 | 709 | dcache = &boot_cpu_data.dcache; |
b638d0b9 RC |
710 | linesz = dcache->linesz; |
711 | way_incr = dcache->way_incr; | |
712 | way_size = dcache->way_size; | |
713 | ||
714 | a0 = base_addr; | |
715 | a1 = a0 + way_incr; | |
716 | a2 = a1 + way_incr; | |
717 | a3 = a2 + way_incr; | |
718 | a0e = base_addr + extent_per_way; | |
719 | do { | |
720 | asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); | |
721 | asm volatile("movca.l r0, @%0\n\t" | |
722 | "movca.l r0, @%1\n\t" | |
723 | "movca.l r0, @%2\n\t" | |
724 | "movca.l r0, @%3\n\t" | |
725 | "ocbi @%0\n\t" | |
726 | "ocbi @%1\n\t" | |
727 | "ocbi @%2\n\t" | |
728 | "ocbi @%3\n\t" : : | |
729 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | |
730 | a0 += linesz; | |
731 | a1 += linesz; | |
732 | a2 += linesz; | |
733 | a3 += linesz; | |
734 | asm volatile("movca.l r0, @%0\n\t" | |
735 | "movca.l r0, @%1\n\t" | |
736 | "movca.l r0, @%2\n\t" | |
737 | "movca.l r0, @%3\n\t" | |
738 | "ocbi @%0\n\t" | |
739 | "ocbi @%1\n\t" | |
740 | "ocbi @%2\n\t" | |
741 | "ocbi @%3\n\t" : : | |
742 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | |
743 | a0 += linesz; | |
744 | a1 += linesz; | |
745 | a2 += linesz; | |
746 | a3 += linesz; | |
747 | asm volatile("movca.l r0, @%0\n\t" | |
748 | "movca.l r0, @%1\n\t" | |
749 | "movca.l r0, @%2\n\t" | |
750 | "movca.l r0, @%3\n\t" | |
751 | "ocbi @%0\n\t" | |
752 | "ocbi @%1\n\t" | |
753 | "ocbi @%2\n\t" | |
754 | "ocbi @%3\n\t" : : | |
755 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | |
756 | a0 += linesz; | |
757 | a1 += linesz; | |
758 | a2 += linesz; | |
759 | a3 += linesz; | |
760 | asm volatile("movca.l r0, @%0\n\t" | |
761 | "movca.l r0, @%1\n\t" | |
762 | "movca.l r0, @%2\n\t" | |
763 | "movca.l r0, @%3\n\t" | |
764 | "ocbi @%0\n\t" | |
765 | "ocbi @%1\n\t" | |
766 | "ocbi @%2\n\t" | |
767 | "ocbi @%3\n\t" : : | |
768 | "r" (a0), "r" (a1), "r" (a2), "r" (a3)); | |
769 | asm volatile("ldc %0, sr" : : "r" (orig_sr)); | |
770 | a0 += linesz; | |
771 | a1 += linesz; | |
772 | a2 += linesz; | |
773 | a3 += linesz; | |
774 | } while (a0 < a0e); | |
775 | } |