ARCv2: mm: micro-optimize region flush generated code
authorVineet Gupta <vgupta@synopsys.com>
Tue, 2 May 2017 23:23:57 +0000 (16:23 -0700)
committerVineet Gupta <vgupta@synopsys.com>
Tue, 2 May 2017 23:40:29 +0000 (16:40 -0700)
DC_CTRL.RGN_OP is 3 bits wide, however only 1 bit is used in current
programming model (0: flush, 1: invalidate)

The current code targetting 3 bits leads to additional 8 byte AND
operation which can be elided given that only 1 bit is ever set by
software and/or looked at by hardware

before
------

80b63324 <__dma_cache_wback_inv_l1>:
80b63324: clri r3
80b63328: lr r2,[dc_ctrl]
80b6332c: and r2,r2,0xfffff1ff <--- 8 bytes insn
80b63334: or r2,r2,576
80b63338: sr r2,[dc_ctrl]
| ...
| ...
80b63360 <__dma_cache_inv_l1>:
80b63360: clri r3
80b63364: lr r2,[dc_ctrl]
80b63368: and r2,r2,0xfffff1ff <--- 8 bytes insn
80b63370: bset_s r2,r2,0x9
80b63372: sr r2,[dc_ctrl]
| ...
| ...
80b6338c <__dma_cache_wback_l1>:
80b6338c: clri r3
80b63390: lr r2,[dc_ctrl]
80b63394: and r2,r2,0xfffff1ff <--- 8 bytes insn
80b6339c: sr r2,[dc_ctrl]

after (AND elided totally in 2 cases, replaced with 2 byte BCLR in 3rd)
-----

80b63324 <__dma_cache_wback_inv_l1>:
80b63324: clri r3
80b63328: lr r2,[dc_ctrl]
80b6332c: or r2,r2,576
80b63330: sr r2,[dc_ctrl]
| ...
| ...
80b63358 <__dma_cache_inv_l1>:
80b63358: clri r3
80b6335c: lr r2,[dc_ctrl]
80b63360: bset_s r2,r2,0x9
80b63362: sr r2,[dc_ctrl]
| ...
| ...
80b6337c <__dma_cache_wback_l1>:
80b6337c: clri r3
80b63380: lr r2,[dc_ctrl]
80b63384: bclr_s r2,r2,0x9
80b63386: sr r2,[dc_ctrl]

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
arch/arc/include/asm/cache.h

index 16e457706129646607b8eb5d8ef96861c04d6e49..19ebddffb279db05ca8e53e2c4753665376cc9ad 100644 (file)
@@ -88,7 +88,7 @@ extern unsigned long perip_base, perip_end;
 #define DC_CTRL_INV_MODE_FLUSH 0x040
 #define DC_CTRL_FLUSH_STATUS   0x100
 #define DC_CTRL_RGN_OP_INV     0x200
-#define DC_CTRL_RGN_OP_MSK     0xE00
+#define DC_CTRL_RGN_OP_MSK     0x200
 
 /*System-level cache (L2 cache) related Auxiliary registers */
 #define ARC_REG_SLC_CFG                0x901