Commit | Line | Data |
---|---|---|
b920de1b DH |
1 | /* Optimised simple memory checksum |
2 | * | |
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public Licence | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the Licence, or (at your option) any later version. | |
10 | */ | |
11 | #include <asm/cache.h> | |
12 | ||
ab244c1a AT |
13 | .section .text |
14 | .balign L1_CACHE_BYTES | |
b920de1b DH |
15 | |
16 | ############################################################################### | |
17 | # | |
ab244c1a | 18 | # unsigned int do_csum(const unsigned char *buff, int len) |
b920de1b DH |
19 | # |
20 | ############################################################################### | |
21 | .globl do_csum | |
ab244c1a | 22 | .type do_csum,@function |
b920de1b DH |
23 | do_csum: |
24 | movm [d2,d3],(sp) | |
b920de1b DH |
25 | mov d1,d2 # count |
26 | mov d0,a0 # buff | |
ab244c1a | 27 | mov a0,a1 |
b920de1b DH |
28 | clr d1 # accumulator |
29 | ||
30 | cmp +0,d2 | |
ab244c1a | 31 | ble do_csum_done # check for zero length or negative |
b920de1b DH |
32 | |
33 | # 4-byte align the buffer pointer | |
34 | btst +3,a0 | |
35 | beq do_csum_now_4b_aligned | |
36 | ||
37 | btst +1,a0 | |
38 | beq do_csum_addr_not_odd | |
39 | movbu (a0),d0 | |
40 | inc a0 | |
41 | asl +8,d0 | |
42 | add d0,d1 | |
b920de1b | 43 | add -1,d2 |
b920de1b | 44 | |
ab244c1a | 45 | do_csum_addr_not_odd: |
b920de1b DH |
46 | cmp +2,d2 |
47 | bcs do_csum_fewer_than_4 | |
48 | btst +2,a0 | |
49 | beq do_csum_now_4b_aligned | |
50 | movhu (a0+),d0 | |
51 | add d0,d1 | |
b920de1b DH |
52 | add -2,d2 |
53 | cmp +4,d2 | |
54 | bcs do_csum_fewer_than_4 | |
55 | ||
56 | do_csum_now_4b_aligned: | |
57 | # we want to checksum as much as we can in chunks of 32 bytes | |
58 | cmp +31,d2 | |
59 | bls do_csum_remainder # 4-byte aligned remainder | |
60 | ||
61 | add -32,d2 | |
62 | mov +32,d3 | |
63 | ||
64 | do_csum_loop: | |
65 | mov (a0+),d0 | |
b920de1b | 66 | mov (a0+),e0 |
b920de1b | 67 | mov (a0+),e1 |
b920de1b | 68 | mov (a0+),e3 |
ab244c1a AT |
69 | add d0,d1 |
70 | addc e0,d1 | |
71 | addc e1,d1 | |
b920de1b DH |
72 | addc e3,d1 |
73 | mov (a0+),d0 | |
b920de1b | 74 | mov (a0+),e0 |
b920de1b | 75 | mov (a0+),e1 |
b920de1b | 76 | mov (a0+),e3 |
ab244c1a AT |
77 | addc d0,d1 |
78 | addc e0,d1 | |
79 | addc e1,d1 | |
b920de1b DH |
80 | addc e3,d1 |
81 | addc +0,d1 | |
82 | ||
83 | sub d3,d2 | |
84 | bcc do_csum_loop | |
85 | ||
86 | add d3,d2 | |
87 | beq do_csum_done | |
88 | ||
89 | do_csum_remainder: | |
90 | # cut 16-31 bytes down to 0-15 | |
91 | cmp +16,d2 | |
92 | bcs do_csum_fewer_than_16 | |
93 | mov (a0+),d0 | |
b920de1b | 94 | mov (a0+),e0 |
b920de1b | 95 | mov (a0+),e1 |
b920de1b | 96 | mov (a0+),e3 |
ab244c1a AT |
97 | add d0,d1 |
98 | addc e0,d1 | |
99 | addc e1,d1 | |
b920de1b DH |
100 | addc e3,d1 |
101 | addc +0,d1 | |
102 | add -16,d2 | |
103 | beq do_csum_done | |
104 | ||
105 | do_csum_fewer_than_16: | |
106 | # copy the remaining whole words | |
107 | cmp +4,d2 | |
108 | bcs do_csum_fewer_than_4 | |
109 | cmp +8,d2 | |
110 | bcs do_csum_one_word | |
111 | cmp +12,d2 | |
112 | bcs do_csum_two_words | |
113 | mov (a0+),d0 | |
114 | add d0,d1 | |
115 | addc +0,d1 | |
116 | do_csum_two_words: | |
117 | mov (a0+),d0 | |
118 | add d0,d1 | |
119 | addc +0,d1 | |
120 | do_csum_one_word: | |
121 | mov (a0+),d0 | |
122 | add d0,d1 | |
123 | addc +0,d1 | |
124 | ||
125 | do_csum_fewer_than_4: | |
126 | and +3,d2 | |
127 | beq do_csum_done | |
128 | xor_cmp d0,d0,+2,d2 | |
129 | bcs do_csum_fewer_than_2 | |
130 | movhu (a0+),d0 | |
b920de1b DH |
131 | and +1,d2 |
132 | beq do_csum_add_last_bit | |
ab244c1a | 133 | do_csum_fewer_than_2: |
b920de1b DH |
134 | movbu (a0),d3 |
135 | add d3,d0 | |
136 | do_csum_add_last_bit: | |
137 | add d0,d1 | |
138 | addc +0,d1 | |
139 | ||
140 | do_csum_done: | |
141 | # compress the checksum down to 16 bits | |
ab244c1a AT |
142 | mov +0xffff0000,d0 |
143 | and d1,d0 | |
b920de1b | 144 | asl +16,d1 |
ab244c1a | 145 | add d1,d0 |
b920de1b DH |
146 | addc +0xffff,d0 |
147 | lsr +16,d0 | |
148 | ||
149 | # flip the halves of the word result if the buffer was oddly aligned | |
ab244c1a | 150 | and +1,a1 |
b920de1b DH |
151 | beq do_csum_not_oddly_aligned |
152 | swaph d0,d0 # exchange bits 15:8 with 7:0 | |
153 | ||
154 | do_csum_not_oddly_aligned: | |
155 | ret [d2,d3],8 | |
156 | ||
ab244c1a | 157 | .size do_csum, .-do_csum |