Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * arch/x86_64/lib/csum-partial.c | |
4 | * | |
5 | * This file contains network checksum routines that are better done | |
6 | * in an architecture-specific manner due to speed. | |
7 | */ | |
688eb819 | 8 | |
1da177e4 | 9 | #include <linux/compiler.h> |
e683014c | 10 | #include <linux/export.h> |
1da177e4 | 11 | #include <asm/checksum.h> |
34115065 | 12 | #include <asm/word-at-a-time.h> |
1da177e4 | 13 | |
688eb819 | 14 | static inline unsigned short from32to16(unsigned a) |
1da177e4 | 15 | { |
688eb819 | 16 | unsigned short b = a >> 16; |
1da177e4 | 17 | asm("addw %w2,%w0\n\t" |
688eb819 | 18 | "adcw $0,%w0\n" |
1da177e4 LT |
19 | : "=r" (b) |
20 | : "0" (b), "r" (a)); | |
21 | return b; | |
22 | } | |
23 | ||
688eb819 NG |
24 | static inline __wsum csum_tail(unsigned int result, u64 temp64, int odd) |
25 | { | |
26 | result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); | |
27 | if (unlikely(odd)) { | |
28 | result = from32to16(result); | |
29 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |
30 | } | |
31 | return (__force __wsum)result; | |
32 | } | |
33 | ||
1da177e4 | 34 | /* |
34115065 | 35 | * Do a checksum on an arbitrary memory area. |
1da177e4 LT |
36 | * Returns a 32bit checksum. |
37 | * | |
38 | * This isn't as time critical as it used to be because many NICs | |
39 | * do hardware checksumming these days. | |
34115065 ED |
40 | * |
41 | * Still, with CHECKSUM_COMPLETE this is called to compute | |
42 | * checksums on IPv6 headers (40 bytes) and other small parts. | |
43 | * it's best to have buff aligned on a 64-bit boundary | |
1da177e4 | 44 | */ |
34115065 | 45 | __wsum csum_partial(const void *buff, int len, __wsum sum) |
1da177e4 | 46 | { |
34115065 ED |
47 | u64 temp64 = (__force u64)sum; |
48 | unsigned odd, result; | |
1da177e4 | 49 | |
1da177e4 LT |
50 | odd = 1 & (unsigned long) buff; |
51 | if (unlikely(odd)) { | |
34115065 ED |
52 | if (unlikely(len == 0)) |
53 | return sum; | |
54 | temp64 = ror32((__force u32)sum, 8); | |
55 | temp64 += (*(unsigned char *)buff << 8); | |
1da177e4 LT |
56 | len--; |
57 | buff++; | |
58 | } | |
1da177e4 | 59 | |
688eb819 NG |
60 | /* |
61 | * len == 40 is the hot case due to IPv6 headers, but annotating it likely() | |
62 | * has noticeable negative affect on codegen for all other cases with | |
63 | * minimal performance benefit here. | |
64 | */ | |
65 | if (len == 40) { | |
34115065 ED |
66 | asm("addq 0*8(%[src]),%[res]\n\t" |
67 | "adcq 1*8(%[src]),%[res]\n\t" | |
68 | "adcq 2*8(%[src]),%[res]\n\t" | |
69 | "adcq 3*8(%[src]),%[res]\n\t" | |
70 | "adcq 4*8(%[src]),%[res]\n\t" | |
34115065 | 71 | "adcq $0,%[res]" |
688eb819 NG |
72 | : [res] "+r"(temp64) |
73 | : [src] "r"(buff), "m"(*(const char(*)[40])buff)); | |
74 | return csum_tail(result, temp64, odd); | |
75 | } | |
76 | if (unlikely(len >= 64)) { | |
77 | /* | |
78 | * Extra accumulators for better ILP in the loop. | |
79 | */ | |
80 | u64 tmp_accum, tmp_carries; | |
81 | ||
82 | asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" | |
83 | "xorl %k[tmp_carries],%k[tmp_carries]\n\t" | |
84 | "subl $64, %[len]\n\t" | |
85 | "1:\n\t" | |
86 | "addq 0*8(%[src]),%[res]\n\t" | |
87 | "adcq 1*8(%[src]),%[res]\n\t" | |
88 | "adcq 2*8(%[src]),%[res]\n\t" | |
89 | "adcq 3*8(%[src]),%[res]\n\t" | |
90 | "adcl $0,%k[tmp_carries]\n\t" | |
91 | "addq 4*8(%[src]),%[tmp_accum]\n\t" | |
92 | "adcq 5*8(%[src]),%[tmp_accum]\n\t" | |
93 | "adcq 6*8(%[src]),%[tmp_accum]\n\t" | |
94 | "adcq 7*8(%[src]),%[tmp_accum]\n\t" | |
95 | "adcl $0,%k[tmp_carries]\n\t" | |
96 | "addq $64, %[src]\n\t" | |
97 | "subl $64, %[len]\n\t" | |
98 | "jge 1b\n\t" | |
99 | "addq %[tmp_accum],%[res]\n\t" | |
100 | "adcq %[tmp_carries],%[res]\n\t" | |
101 | "adcq $0,%[res]" | |
102 | : [tmp_accum] "=&r"(tmp_accum), | |
103 | [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), | |
104 | [len] "+r"(len), [src] "+r"(buff) | |
105 | : "m"(*(const char *)buff)); | |
34115065 ED |
106 | } |
107 | ||
108 | if (len & 32) { | |
109 | asm("addq 0*8(%[src]),%[res]\n\t" | |
110 | "adcq 1*8(%[src]),%[res]\n\t" | |
111 | "adcq 2*8(%[src]),%[res]\n\t" | |
112 | "adcq 3*8(%[src]),%[res]\n\t" | |
113 | "adcq $0,%[res]" | |
688eb819 NG |
114 | : [res] "+r"(temp64) |
115 | : [src] "r"(buff), "m"(*(const char(*)[32])buff)); | |
34115065 ED |
116 | buff += 32; |
117 | } | |
118 | if (len & 16) { | |
119 | asm("addq 0*8(%[src]),%[res]\n\t" | |
120 | "adcq 1*8(%[src]),%[res]\n\t" | |
121 | "adcq $0,%[res]" | |
688eb819 NG |
122 | : [res] "+r"(temp64) |
123 | : [src] "r"(buff), "m"(*(const char(*)[16])buff)); | |
34115065 ED |
124 | buff += 16; |
125 | } | |
126 | if (len & 8) { | |
127 | asm("addq 0*8(%[src]),%[res]\n\t" | |
128 | "adcq $0,%[res]" | |
688eb819 NG |
129 | : [res] "+r"(temp64) |
130 | : [src] "r"(buff), "m"(*(const char(*)[8])buff)); | |
34115065 ED |
131 | buff += 8; |
132 | } | |
133 | if (len & 7) { | |
688eb819 | 134 | unsigned int shift = (-len << 3) & 63; |
34115065 | 135 | unsigned long trail; |
1da177e4 | 136 | |
34115065 | 137 | trail = (load_unaligned_zeropad(buff) << shift) >> shift; |
1da177e4 | 138 | |
34115065 ED |
139 | asm("addq %[trail],%[res]\n\t" |
140 | "adcq $0,%[res]" | |
688eb819 NG |
141 | : [res] "+r"(temp64) |
142 | : [trail] "r"(trail)); | |
1da177e4 | 143 | } |
688eb819 | 144 | return csum_tail(result, temp64, odd); |
1da177e4 | 145 | } |
784d5699 | 146 | EXPORT_SYMBOL(csum_partial); |
1da177e4 | 147 | |
1da177e4 LT |
148 | /* |
149 | * this routine is used for miscellaneous IP-like checksums, mainly | |
150 | * in icmp.c | |
151 | */ | |
a4f89fb7 | 152 | __sum16 ip_compute_csum(const void *buff, int len) |
1da177e4 | 153 | { |
688eb819 | 154 | return csum_fold(csum_partial(buff, len, 0)); |
1da177e4 | 155 | } |
2ee60e17 | 156 | EXPORT_SYMBOL(ip_compute_csum); |