Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 LT |
2 | /* |
3 | * arch/x86_64/lib/csum-partial.c | |
4 | * | |
5 | * This file contains network checksum routines that are better done | |
6 | * in an architecture-specific manner due to speed. | |
7 | */ | |
688eb819 | 8 | |
1da177e4 | 9 | #include <linux/compiler.h> |
e683014c | 10 | #include <linux/export.h> |
1da177e4 | 11 | #include <asm/checksum.h> |
34115065 | 12 | #include <asm/word-at-a-time.h> |
1da177e4 | 13 | |
5d4acb62 | 14 | static inline __wsum csum_finalize_sum(u64 temp64) |
1da177e4 | 15 | { |
5d4acb62 | 16 | return (__force __wsum)((temp64 + ror64(temp64, 32)) >> 32); |
688eb819 NG |
17 | } |
18 | ||
a476aae3 LT |
19 | static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5]) |
20 | { | |
21 | asm("addq %1,%0\n\t" | |
22 | "adcq %2,%0\n\t" | |
23 | "adcq %3,%0\n\t" | |
24 | "adcq %4,%0\n\t" | |
25 | "adcq %5,%0\n\t" | |
26 | "adcq $0,%0" | |
27 | :"+r" (sum) | |
28 | :"m" (m[0]), "m" (m[1]), "m" (m[2]), | |
29 | "m" (m[3]), "m" (m[4])); | |
30 | return sum; | |
31 | } | |
32 | ||
1da177e4 | 33 | /* |
34115065 | 34 | * Do a checksum on an arbitrary memory area. |
1da177e4 LT |
35 | * Returns a 32bit checksum. |
36 | * | |
37 | * This isn't as time critical as it used to be because many NICs | |
38 | * do hardware checksumming these days. | |
34115065 ED |
39 | * |
40 | * Still, with CHECKSUM_COMPLETE this is called to compute | |
41 | * checksums on IPv6 headers (40 bytes) and other small parts. | |
42 | * it's best to have buff aligned on a 64-bit boundary | |
1da177e4 | 43 | */ |
34115065 | 44 | __wsum csum_partial(const void *buff, int len, __wsum sum) |
1da177e4 | 45 | { |
34115065 | 46 | u64 temp64 = (__force u64)sum; |
1da177e4 | 47 | |
a476aae3 LT |
48 | /* Do two 40-byte chunks in parallel to get better ILP */ |
49 | if (likely(len >= 80)) { | |
50 | u64 temp64_2 = 0; | |
51 | do { | |
52 | temp64 = update_csum_40b(temp64, buff); | |
53 | temp64_2 = update_csum_40b(temp64_2, buff + 40); | |
54 | buff += 80; | |
55 | len -= 80; | |
56 | } while (len >= 80); | |
57 | ||
58 | asm("addq %1,%0\n\t" | |
59 | "adcq $0,%0" | |
60 | :"+r" (temp64): "r" (temp64_2)); | |
688eb819 | 61 | } |
688eb819 | 62 | |
a476aae3 LT |
63 | /* |
64 | * len == 40 is the hot case due to IPv6 headers, so return | |
65 | * early for that exact case without checking the tail bytes. | |
66 | */ | |
67 | if (len >= 40) { | |
68 | temp64 = update_csum_40b(temp64, buff); | |
69 | len -= 40; | |
70 | if (!len) | |
71 | return csum_finalize_sum(temp64); | |
72 | buff += 40; | |
34115065 ED |
73 | } |
74 | ||
75 | if (len & 32) { | |
76 | asm("addq 0*8(%[src]),%[res]\n\t" | |
77 | "adcq 1*8(%[src]),%[res]\n\t" | |
78 | "adcq 2*8(%[src]),%[res]\n\t" | |
79 | "adcq 3*8(%[src]),%[res]\n\t" | |
80 | "adcq $0,%[res]" | |
688eb819 NG |
81 | : [res] "+r"(temp64) |
82 | : [src] "r"(buff), "m"(*(const char(*)[32])buff)); | |
34115065 ED |
83 | buff += 32; |
84 | } | |
85 | if (len & 16) { | |
86 | asm("addq 0*8(%[src]),%[res]\n\t" | |
87 | "adcq 1*8(%[src]),%[res]\n\t" | |
88 | "adcq $0,%[res]" | |
688eb819 NG |
89 | : [res] "+r"(temp64) |
90 | : [src] "r"(buff), "m"(*(const char(*)[16])buff)); | |
34115065 ED |
91 | buff += 16; |
92 | } | |
93 | if (len & 8) { | |
94 | asm("addq 0*8(%[src]),%[res]\n\t" | |
95 | "adcq $0,%[res]" | |
688eb819 NG |
96 | : [res] "+r"(temp64) |
97 | : [src] "r"(buff), "m"(*(const char(*)[8])buff)); | |
34115065 ED |
98 | buff += 8; |
99 | } | |
100 | if (len & 7) { | |
688eb819 | 101 | unsigned int shift = (-len << 3) & 63; |
34115065 | 102 | unsigned long trail; |
1da177e4 | 103 | |
34115065 | 104 | trail = (load_unaligned_zeropad(buff) << shift) >> shift; |
1da177e4 | 105 | |
34115065 ED |
106 | asm("addq %[trail],%[res]\n\t" |
107 | "adcq $0,%[res]" | |
688eb819 NG |
108 | : [res] "+r"(temp64) |
109 | : [trail] "r"(trail)); | |
1da177e4 | 110 | } |
5d4acb62 | 111 | return csum_finalize_sum(temp64); |
1da177e4 | 112 | } |
784d5699 | 113 | EXPORT_SYMBOL(csum_partial); |
1da177e4 | 114 | |
1da177e4 LT |
115 | /* |
116 | * this routine is used for miscellaneous IP-like checksums, mainly | |
117 | * in icmp.c | |
118 | */ | |
a4f89fb7 | 119 | __sum16 ip_compute_csum(const void *buff, int len) |
1da177e4 | 120 | { |
688eb819 | 121 | return csum_fold(csum_partial(buff, len, 0)); |
1da177e4 | 122 | } |
2ee60e17 | 123 | EXPORT_SYMBOL(ip_compute_csum); |