[ARM] 4584/2: ARMv7: Add Advanced SIMD (NEON) extension support
authorCatalin Marinas <catalin.marinas@arm.com>
Thu, 10 Jan 2008 18:16:17 +0000 (19:16 +0100)
committerRussell King <rmk+kernel@arm.linux.org.uk>
Sat, 26 Jan 2008 14:44:02 +0000 (14:44 +0000)
This patch enables the use of the Advanced SIMD (NEON) extension on
ARMv7. The NEON technology is a 64/128-bit hybrid SIMD architecture
for accelerating the performance of multimedia and signal processing
applications. The extension shares the registers with the VFP unit and
enabling/disabling and saving/restoring follow the same rules. In
addition, there are instructions that do not have the appropriate CP
number encoded, the checks being made in the call_fpe function.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
arch/arm/Kconfig
arch/arm/kernel/entry-armv.S

index f4eeb03bc6a9795e517255e2f2f1ce57e69fff2b..709f9d383c8705ebaa73323b65a02a316c484184 100644 (file)
@@ -966,6 +966,13 @@ config VFPv3
        depends on VFP
        default y if CPU_V7
 
+config NEON
+       bool "Advanced SIMD (NEON) Extension support"
+       depends on VFPv3 && CPU_V7
+       help
+         Say Y to include support code for NEON, the ARMv7 Advanced SIMD
+         Extension.
+
 endmenu
 
 menu "Userspace binary formats"
index 29dec080a60431dd88bfa7b732940449054374ad..8de21f51e48cc1f4988ba5ad2d5ac75240fab37a 100644 (file)
@@ -480,6 +480,13 @@ __und_usr:
  * co-processor instructions.  However, we have to watch out
  * for the ARM6/ARM7 SWI bug.
  *
+ * NEON is a special case that has to be handled here. Not all
+ * NEON instructions are co-processor instructions, so we have
+ * to make a special case of checking for them. Plus, there's
+ * five groups of them, so we have a table of mask/opcode pairs
+ * to check against, and if any match then we branch off into the
+ * NEON handler code.
+ *
  * Emulators may wish to make use of the following registers:
  *  r0  = instruction opcode.
  *  r2  = PC+4
@@ -488,6 +495,23 @@ __und_usr:
  *  lr  = unrecognised instruction return address
  */
 call_fpe:
+#ifdef CONFIG_NEON
+       adr     r6, .LCneon_opcodes
+2:
+       ldr     r7, [r6], #4                    @ mask value
+       cmp     r7, #0                          @ end mask?
+       beq     1f
+       and     r8, r0, r7
+       ldr     r7, [r6], #4                    @ opcode bits matching in mask
+       cmp     r8, r7                          @ NEON instruction?
+       bne     2b
+       get_thread_info r10
+       mov     r7, #1
+       strb    r7, [r10, #TI_USED_CP + 10]     @ mark CP#10 as used
+       strb    r7, [r10, #TI_USED_CP + 11]     @ mark CP#11 as used
+       b       do_vfp                          @ let VFP handler handle this
+1:
+#endif
        tst     r0, #0x08000000                 @ only CDP/CPRT/LDC/STC have bit 27
 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
        and     r8, r0, #0x0f000000             @ mask out op-code bits
@@ -537,6 +561,20 @@ call_fpe:
        mov     pc, lr                          @ CP#14 (Debug)
        mov     pc, lr                          @ CP#15 (Control)
 
+#ifdef CONFIG_NEON
+       .align  6
+
+.LCneon_opcodes:
+       .word   0xfe000000                      @ mask
+       .word   0xf2000000                      @ opcode
+
+       .word   0xff100000                      @ mask
+       .word   0xf4000000                      @ opcode
+
+       .word   0x00000000                      @ mask
+       .word   0x00000000                      @ opcode
+#endif
+
 do_fpe:
        enable_irq
        ldr     r4, .LCfp