Commit | Line | Data |
---|---|---|
1f7e3dc0 CZ |
1 | /* |
2 | * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | */ | |
8 | ||
9 | #include <linux/linkage.h> | |
10 | ||
11 | #ifdef __LITTLE_ENDIAN__ | |
12 | # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << | |
13 | # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> | |
14 | # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM | |
15 | # define MERGE_2(RX,RY,IMM) | |
16 | # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF | |
17 | # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM | |
18 | #else | |
19 | # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> | |
20 | # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << | |
21 | # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << | |
22 | # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << | |
23 | # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM | |
24 | # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 | |
25 | #endif | |
26 | ||
27 | #ifdef CONFIG_ARC_HAS_LL64 | |
28 | # define PREFETCH_READ(RX) prefetch [RX, 56] | |
29 | # define PREFETCH_WRITE(RX) prefetchw [RX, 64] | |
30 | # define LOADX(DST,RX) ldd.ab DST, [RX, 8] | |
31 | # define STOREX(SRC,RX) std.ab SRC, [RX, 8] | |
32 | # define ZOLSHFT 5 | |
33 | # define ZOLAND 0x1F | |
34 | #else | |
35 | # define PREFETCH_READ(RX) prefetch [RX, 28] | |
36 | # define PREFETCH_WRITE(RX) prefetchw [RX, 32] | |
37 | # define LOADX(DST,RX) ld.ab DST, [RX, 4] | |
38 | # define STOREX(SRC,RX) st.ab SRC, [RX, 4] | |
39 | # define ZOLSHFT 4 | |
40 | # define ZOLAND 0xF | |
41 | #endif | |
42 | ||
43 | ENTRY(memcpy) | |
44 | prefetch [r1] ; Prefetch the read location | |
45 | prefetchw [r0] ; Prefetch the write location | |
46 | mov.f 0, r2 | |
47 | ;;; if size is zero | |
48 | jz.d [blink] | |
49 | mov r3, r0 ; don;t clobber ret val | |
50 | ||
51 | ;;; if size <= 8 | |
52 | cmp r2, 8 | |
53 | bls.d @smallchunk | |
54 | mov.f lp_count, r2 | |
55 | ||
56 | and.f r4, r0, 0x03 | |
57 | rsub lp_count, r4, 4 | |
58 | lpnz @aligndestination | |
59 | ;; LOOP BEGIN | |
60 | ldb.ab r5, [r1,1] | |
61 | sub r2, r2, 1 | |
62 | stb.ab r5, [r3,1] | |
63 | aligndestination: | |
64 | ||
65 | ;;; Check the alignment of the source | |
66 | and.f r4, r1, 0x03 | |
67 | bnz.d @sourceunaligned | |
68 | ||
69 | ;;; CASE 0: Both source and destination are 32bit aligned | |
70 | ;;; Convert len to Dwords, unfold x4 | |
71 | lsr.f lp_count, r2, ZOLSHFT | |
72 | lpnz @copy32_64bytes | |
73 | ;; LOOP START | |
74 | LOADX (r6, r1) | |
75 | PREFETCH_READ (r1) | |
76 | PREFETCH_WRITE (r3) | |
77 | LOADX (r8, r1) | |
78 | LOADX (r10, r1) | |
79 | LOADX (r4, r1) | |
80 | STOREX (r6, r3) | |
81 | STOREX (r8, r3) | |
82 | STOREX (r10, r3) | |
83 | STOREX (r4, r3) | |
84 | copy32_64bytes: | |
85 | ||
86 | and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes | |
87 | smallchunk: | |
88 | lpnz @copyremainingbytes | |
89 | ;; LOOP START | |
90 | ldb.ab r5, [r1,1] | |
91 | stb.ab r5, [r3,1] | |
92 | copyremainingbytes: | |
93 | ||
94 | j [blink] | |
95 | ;;; END CASE 0 | |
96 | ||
97 | sourceunaligned: | |
98 | cmp r4, 2 | |
99 | beq.d @unalignedOffby2 | |
100 | sub r2, r2, 1 | |
101 | ||
102 | bhi.d @unalignedOffby3 | |
103 | ldb.ab r5, [r1, 1] | |
104 | ||
105 | ;;; CASE 1: The source is unaligned, off by 1 | |
106 | ;; Hence I need to read 1 byte for a 16bit alignment | |
107 | ;; and 2bytes to reach 32bit alignment | |
108 | ldh.ab r6, [r1, 2] | |
109 | sub r2, r2, 2 | |
110 | ;; Convert to words, unfold x2 | |
111 | lsr.f lp_count, r2, 3 | |
112 | MERGE_1 (r6, r6, 8) | |
113 | MERGE_2 (r5, r5, 24) | |
114 | or r5, r5, r6 | |
115 | ||
116 | ;; Both src and dst are aligned | |
117 | lpnz @copy8bytes_1 | |
118 | ;; LOOP START | |
119 | ld.ab r6, [r1, 4] | |
120 | prefetch [r1, 28] ;Prefetch the next read location | |
121 | ld.ab r8, [r1,4] | |
122 | prefetchw [r3, 32] ;Prefetch the next write location | |
123 | ||
124 | SHIFT_1 (r7, r6, 24) | |
125 | or r7, r7, r5 | |
126 | SHIFT_2 (r5, r6, 8) | |
127 | ||
128 | SHIFT_1 (r9, r8, 24) | |
129 | or r9, r9, r5 | |
130 | SHIFT_2 (r5, r8, 8) | |
131 | ||
132 | st.ab r7, [r3, 4] | |
133 | st.ab r9, [r3, 4] | |
134 | copy8bytes_1: | |
135 | ||
136 | ;; Write back the remaining 16bits | |
137 | EXTRACT_1 (r6, r5, 16) | |
138 | sth.ab r6, [r3, 2] | |
139 | ;; Write back the remaining 8bits | |
140 | EXTRACT_2 (r5, r5, 16) | |
141 | stb.ab r5, [r3, 1] | |
142 | ||
143 | and.f lp_count, r2, 0x07 ;Last 8bytes | |
144 | lpnz @copybytewise_1 | |
145 | ;; LOOP START | |
146 | ldb.ab r6, [r1,1] | |
147 | stb.ab r6, [r3,1] | |
148 | copybytewise_1: | |
149 | j [blink] | |
150 | ||
151 | unalignedOffby2: | |
152 | ;;; CASE 2: The source is unaligned, off by 2 | |
153 | ldh.ab r5, [r1, 2] | |
154 | sub r2, r2, 1 | |
155 | ||
156 | ;; Both src and dst are aligned | |
157 | ;; Convert to words, unfold x2 | |
158 | lsr.f lp_count, r2, 3 | |
159 | #ifdef __BIG_ENDIAN__ | |
160 | asl.nz r5, r5, 16 | |
161 | #endif | |
162 | lpnz @copy8bytes_2 | |
163 | ;; LOOP START | |
164 | ld.ab r6, [r1, 4] | |
165 | prefetch [r1, 28] ;Prefetch the next read location | |
166 | ld.ab r8, [r1,4] | |
167 | prefetchw [r3, 32] ;Prefetch the next write location | |
168 | ||
169 | SHIFT_1 (r7, r6, 16) | |
170 | or r7, r7, r5 | |
171 | SHIFT_2 (r5, r6, 16) | |
172 | ||
173 | SHIFT_1 (r9, r8, 16) | |
174 | or r9, r9, r5 | |
175 | SHIFT_2 (r5, r8, 16) | |
176 | ||
177 | st.ab r7, [r3, 4] | |
178 | st.ab r9, [r3, 4] | |
179 | copy8bytes_2: | |
180 | ||
181 | #ifdef __BIG_ENDIAN__ | |
182 | lsr.nz r5, r5, 16 | |
183 | #endif | |
184 | sth.ab r5, [r3, 2] | |
185 | ||
186 | and.f lp_count, r2, 0x07 ;Last 8bytes | |
187 | lpnz @copybytewise_2 | |
188 | ;; LOOP START | |
189 | ldb.ab r6, [r1,1] | |
190 | stb.ab r6, [r3,1] | |
191 | copybytewise_2: | |
192 | j [blink] | |
193 | ||
194 | unalignedOffby3: | |
195 | ;;; CASE 3: The source is unaligned, off by 3 | |
196 | ;;; Hence, I need to read 1byte for achieve the 32bit alignment | |
197 | ||
198 | ;; Both src and dst are aligned | |
199 | ;; Convert to words, unfold x2 | |
200 | lsr.f lp_count, r2, 3 | |
201 | #ifdef __BIG_ENDIAN__ | |
202 | asl.ne r5, r5, 24 | |
203 | #endif | |
204 | lpnz @copy8bytes_3 | |
205 | ;; LOOP START | |
206 | ld.ab r6, [r1, 4] | |
207 | prefetch [r1, 28] ;Prefetch the next read location | |
208 | ld.ab r8, [r1,4] | |
21481f2c | 209 | prefetchw [r3, 32] ;Prefetch the next write location |
1f7e3dc0 CZ |
210 | |
211 | SHIFT_1 (r7, r6, 8) | |
212 | or r7, r7, r5 | |
213 | SHIFT_2 (r5, r6, 24) | |
214 | ||
215 | SHIFT_1 (r9, r8, 8) | |
216 | or r9, r9, r5 | |
217 | SHIFT_2 (r5, r8, 24) | |
218 | ||
219 | st.ab r7, [r3, 4] | |
220 | st.ab r9, [r3, 4] | |
221 | copy8bytes_3: | |
222 | ||
223 | #ifdef __BIG_ENDIAN__ | |
224 | lsr.nz r5, r5, 24 | |
225 | #endif | |
226 | stb.ab r5, [r3, 1] | |
227 | ||
228 | and.f lp_count, r2, 0x07 ;Last 8bytes | |
229 | lpnz @copybytewise_3 | |
230 | ;; LOOP START | |
231 | ldb.ab r6, [r1,1] | |
232 | stb.ab r6, [r3,1] | |
233 | copybytewise_3: | |
234 | j [blink] | |
235 | ||
236 | END(memcpy) |