Commit | Line | Data |
---|---|---|
b920de1b DH |
1 | /* Optimised simple memory checksum |
2 | * | |
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public Licence | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the Licence, or (at your option) any later version. | |
10 | */ | |
11 | #include <asm/cache.h> | |
12 | ||
13 | .section .text | |
14 | .balign L1_CACHE_BYTES | |
15 | ||
16 | ############################################################################### | |
17 | # | |
18 | # unsigned int do_csum(const unsigned char *buff, size_t len) | |
19 | # | |
20 | ############################################################################### | |
21 | .globl do_csum | |
22 | .type do_csum,@function | |
23 | do_csum: | |
24 | movm [d2,d3],(sp) | |
25 | mov d0,(12,sp) | |
26 | mov d1,(16,sp) | |
27 | mov d1,d2 # count | |
28 | mov d0,a0 # buff | |
29 | clr d1 # accumulator | |
30 | ||
31 | cmp +0,d2 | |
32 | beq do_csum_done # return if zero-length buffer | |
33 | ||
34 | # 4-byte align the buffer pointer | |
35 | btst +3,a0 | |
36 | beq do_csum_now_4b_aligned | |
37 | ||
38 | btst +1,a0 | |
39 | beq do_csum_addr_not_odd | |
40 | movbu (a0),d0 | |
41 | inc a0 | |
42 | asl +8,d0 | |
43 | add d0,d1 | |
44 | addc +0,d1 | |
45 | add -1,d2 | |
46 | do_csum_addr_not_odd: | |
47 | ||
48 | cmp +2,d2 | |
49 | bcs do_csum_fewer_than_4 | |
50 | btst +2,a0 | |
51 | beq do_csum_now_4b_aligned | |
52 | movhu (a0+),d0 | |
53 | add d0,d1 | |
54 | addc +0,d1 | |
55 | add -2,d2 | |
56 | cmp +4,d2 | |
57 | bcs do_csum_fewer_than_4 | |
58 | ||
59 | do_csum_now_4b_aligned: | |
60 | # we want to checksum as much as we can in chunks of 32 bytes | |
61 | cmp +31,d2 | |
62 | bls do_csum_remainder # 4-byte aligned remainder | |
63 | ||
64 | add -32,d2 | |
65 | mov +32,d3 | |
66 | ||
67 | do_csum_loop: | |
68 | mov (a0+),d0 | |
69 | add d0,d1 | |
70 | mov (a0+),e0 | |
71 | addc e0,d1 | |
72 | mov (a0+),e1 | |
73 | addc e1,d1 | |
74 | mov (a0+),e3 | |
75 | addc e3,d1 | |
76 | mov (a0+),d0 | |
77 | addc d0,d1 | |
78 | mov (a0+),e0 | |
79 | addc e0,d1 | |
80 | mov (a0+),e1 | |
81 | addc e1,d1 | |
82 | mov (a0+),e3 | |
83 | addc e3,d1 | |
84 | addc +0,d1 | |
85 | ||
86 | sub d3,d2 | |
87 | bcc do_csum_loop | |
88 | ||
89 | add d3,d2 | |
90 | beq do_csum_done | |
91 | ||
92 | do_csum_remainder: | |
93 | # cut 16-31 bytes down to 0-15 | |
94 | cmp +16,d2 | |
95 | bcs do_csum_fewer_than_16 | |
96 | mov (a0+),d0 | |
97 | add d0,d1 | |
98 | mov (a0+),e0 | |
99 | addc e0,d1 | |
100 | mov (a0+),e1 | |
101 | addc e1,d1 | |
102 | mov (a0+),e3 | |
103 | addc e3,d1 | |
104 | addc +0,d1 | |
105 | add -16,d2 | |
106 | beq do_csum_done | |
107 | ||
108 | do_csum_fewer_than_16: | |
109 | # copy the remaining whole words | |
110 | cmp +4,d2 | |
111 | bcs do_csum_fewer_than_4 | |
112 | cmp +8,d2 | |
113 | bcs do_csum_one_word | |
114 | cmp +12,d2 | |
115 | bcs do_csum_two_words | |
116 | mov (a0+),d0 | |
117 | add d0,d1 | |
118 | addc +0,d1 | |
119 | do_csum_two_words: | |
120 | mov (a0+),d0 | |
121 | add d0,d1 | |
122 | addc +0,d1 | |
123 | do_csum_one_word: | |
124 | mov (a0+),d0 | |
125 | add d0,d1 | |
126 | addc +0,d1 | |
127 | ||
128 | do_csum_fewer_than_4: | |
129 | and +3,d2 | |
130 | beq do_csum_done | |
131 | xor_cmp d0,d0,+2,d2 | |
132 | bcs do_csum_fewer_than_2 | |
133 | movhu (a0+),d0 | |
134 | do_csum_fewer_than_2: | |
135 | and +1,d2 | |
136 | beq do_csum_add_last_bit | |
137 | movbu (a0),d3 | |
138 | add d3,d0 | |
139 | do_csum_add_last_bit: | |
140 | add d0,d1 | |
141 | addc +0,d1 | |
142 | ||
143 | do_csum_done: | |
144 | # compress the checksum down to 16 bits | |
145 | mov +0xffff0000,d2 | |
146 | and d1,d2 | |
147 | asl +16,d1 | |
148 | add d2,d1,d0 | |
149 | addc +0xffff,d0 | |
150 | lsr +16,d0 | |
151 | ||
152 | # flip the halves of the word result if the buffer was oddly aligned | |
153 | mov (12,sp),d1 | |
154 | and +1,d1 | |
155 | beq do_csum_not_oddly_aligned | |
156 | swaph d0,d0 # exchange bits 15:8 with 7:0 | |
157 | ||
158 | do_csum_not_oddly_aligned: | |
159 | ret [d2,d3],8 | |
160 | ||
161 | do_csum_end: | |
162 | .size do_csum, do_csum_end-do_csum |