Commit | Line | Data |
---|---|---|
eea9507a LFT |
1 | /* Extracted from GLIBC memcpy.c and memcopy.h, which is: |
2 | Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Torbjorn Granlund (tege@sics.se). | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <linux/types.h> | |
21 | ||
22 | /* Type to use for aligned memory operations. | |
23 | This should normally be the biggest type supported by a single load | |
24 | and store. */ | |
25 | #define op_t unsigned long int | |
26 | #define OPSIZ (sizeof(op_t)) | |
27 | ||
28 | /* Optimal type for storing bytes in registers. */ | |
29 | #define reg_char char | |
30 | ||
31 | #define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) | |
32 | ||
33 | /* Copy exactly NBYTES bytes from SRC_BP to DST_BP, | |
34 | without any assumptions about alignment of the pointers. */ | |
35 | #define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \ | |
36 | do { \ | |
37 | size_t __nbytes = (nbytes); \ | |
38 | while (__nbytes > 0) { \ | |
39 | unsigned char __x = ((unsigned char *) src_bp)[0]; \ | |
40 | src_bp += 1; \ | |
41 | __nbytes -= 1; \ | |
42 | ((unsigned char *) dst_bp)[0] = __x; \ | |
43 | dst_bp += 1; \ | |
44 | } \ | |
45 | } while (0) | |
46 | ||
47 | /* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with | |
48 | the assumption that DST_BP is aligned on an OPSIZ multiple. If | |
49 | not all bytes could be easily copied, store remaining number of bytes | |
50 | in NBYTES_LEFT, otherwise store 0. */ | |
51 | /* extern void _wordcopy_fwd_aligned __P ((long int, long int, size_t)); */ | |
52 | /* extern void _wordcopy_fwd_dest_aligned __P ((long int, long int, size_t)); */ | |
53 | #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ | |
54 | do { \ | |
55 | if (src_bp % OPSIZ == 0) \ | |
56 | _wordcopy_fwd_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | |
57 | else \ | |
58 | _wordcopy_fwd_dest_aligned(dst_bp, src_bp, (nbytes) / OPSIZ);\ | |
59 | src_bp += (nbytes) & -OPSIZ; \ | |
60 | dst_bp += (nbytes) & -OPSIZ; \ | |
61 | (nbytes_left) = (nbytes) % OPSIZ; \ | |
62 | } while (0) | |
63 | ||
64 | ||
65 | /* Threshold value for when to enter the unrolled loops. */ | |
66 | #define OP_T_THRES 16 | |
67 | ||
68 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to | |
69 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
70 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ | |
71 | /* stream-lined (read x8 + write x8) */ | |
72 | static void _wordcopy_fwd_aligned(long int dstp, long int srcp, size_t len) | |
73 | { | |
74 | while (len > 7) { | |
75 | register op_t a0, a1, a2, a3, a4, a5, a6, a7; | |
76 | ||
77 | a0 = ((op_t *) srcp)[0]; | |
78 | a1 = ((op_t *) srcp)[1]; | |
79 | a2 = ((op_t *) srcp)[2]; | |
80 | a3 = ((op_t *) srcp)[3]; | |
81 | a4 = ((op_t *) srcp)[4]; | |
82 | a5 = ((op_t *) srcp)[5]; | |
83 | a6 = ((op_t *) srcp)[6]; | |
84 | a7 = ((op_t *) srcp)[7]; | |
85 | ((op_t *) dstp)[0] = a0; | |
86 | ((op_t *) dstp)[1] = a1; | |
87 | ((op_t *) dstp)[2] = a2; | |
88 | ((op_t *) dstp)[3] = a3; | |
89 | ((op_t *) dstp)[4] = a4; | |
90 | ((op_t *) dstp)[5] = a5; | |
91 | ((op_t *) dstp)[6] = a6; | |
92 | ((op_t *) dstp)[7] = a7; | |
93 | ||
94 | srcp += 8 * OPSIZ; | |
95 | dstp += 8 * OPSIZ; | |
96 | len -= 8; | |
97 | } | |
98 | while (len > 0) { | |
99 | *(op_t *)dstp = *(op_t *)srcp; | |
100 | ||
101 | srcp += OPSIZ; | |
102 | dstp += OPSIZ; | |
103 | len -= 1; | |
104 | } | |
105 | } | |
106 | ||
107 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to | |
108 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). | |
109 | DSTP should be aligned for memory operations on `op_t's, but SRCP must | |
110 | *not* be aligned. */ | |
111 | /* stream-lined (read x4 + write x4) */ | |
112 | static void _wordcopy_fwd_dest_aligned(long int dstp, long int srcp, | |
113 | size_t len) | |
114 | { | |
115 | op_t ap; | |
116 | int sh_1, sh_2; | |
117 | ||
118 | /* Calculate how to shift a word read at the memory operation | |
119 | aligned srcp to make it aligned for copy. */ | |
120 | ||
121 | sh_1 = 8 * (srcp % OPSIZ); | |
122 | sh_2 = 8 * OPSIZ - sh_1; | |
123 | ||
124 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' | |
125 | it points in the middle of. */ | |
126 | srcp &= -OPSIZ; | |
127 | ap = ((op_t *) srcp)[0]; | |
128 | srcp += OPSIZ; | |
129 | ||
130 | while (len > 3) { | |
131 | op_t a0, a1, a2, a3; | |
132 | ||
133 | a0 = ((op_t *) srcp)[0]; | |
134 | a1 = ((op_t *) srcp)[1]; | |
135 | a2 = ((op_t *) srcp)[2]; | |
136 | a3 = ((op_t *) srcp)[3]; | |
137 | ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | |
138 | ((op_t *) dstp)[1] = MERGE(a0, sh_1, a1, sh_2); | |
139 | ((op_t *) dstp)[2] = MERGE(a1, sh_1, a2, sh_2); | |
140 | ((op_t *) dstp)[3] = MERGE(a2, sh_1, a3, sh_2); | |
141 | ||
142 | ap = a3; | |
143 | srcp += 4 * OPSIZ; | |
144 | dstp += 4 * OPSIZ; | |
145 | len -= 4; | |
146 | } | |
147 | while (len > 0) { | |
148 | register op_t a0; | |
149 | ||
150 | a0 = ((op_t *) srcp)[0]; | |
151 | ((op_t *) dstp)[0] = MERGE(ap, sh_1, a0, sh_2); | |
152 | ||
153 | ap = a0; | |
154 | srcp += OPSIZ; | |
155 | dstp += OPSIZ; | |
156 | len -= 1; | |
157 | } | |
158 | } | |
159 | ||
160 | void *memcpy(void *dstpp, const void *srcpp, size_t len) | |
161 | { | |
162 | unsigned long int dstp = (long int) dstpp; | |
163 | unsigned long int srcp = (long int) srcpp; | |
164 | ||
165 | /* Copy from the beginning to the end. */ | |
166 | ||
167 | /* If there not too few bytes to copy, use word copy. */ | |
168 | if (len >= OP_T_THRES) { | |
169 | /* Copy just a few bytes to make DSTP aligned. */ | |
170 | len -= (-dstp) % OPSIZ; | |
171 | BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ); | |
172 | ||
173 | /* Copy whole pages from SRCP to DSTP by virtual address | |
174 | manipulation, as much as possible. */ | |
175 | ||
176 | /* PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len); */ | |
177 | ||
178 | /* Copy from SRCP to DSTP taking advantage of the known | |
179 | alignment of DSTP. Number of bytes remaining is put in the | |
180 | third argument, i.e. in LEN. This number may vary from | |
181 | machine to machine. */ | |
182 | ||
183 | WORD_COPY_FWD(dstp, srcp, len, len); | |
184 | ||
185 | /* Fall out and copy the tail. */ | |
186 | } | |
187 | ||
188 | /* There are just a few bytes to copy. Use byte memory operations. */ | |
189 | BYTE_COPY_FWD(dstp, srcp, len); | |
190 | ||
191 | return dstpp; | |
192 | } | |
193 | ||
194 | void *memcpyb(void *dstpp, const void *srcpp, unsigned len) | |
195 | { | |
196 | unsigned long int dstp = (long int) dstpp; | |
197 | unsigned long int srcp = (long int) srcpp; | |
198 | ||
199 | BYTE_COPY_FWD(dstp, srcp, len); | |
200 | ||
201 | return dstpp; | |
202 | } |