Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
6a2d7a95 ED |
2 | #ifndef _LINUX_RECIPROCAL_DIV_H |
3 | #define _LINUX_RECIPROCAL_DIV_H | |
4 | ||
5 | #include <linux/types.h> | |
6 | ||
7 | /* | |
809fa972 HFS |
8 | * This algorithm is based on the paper "Division by Invariant |
9 | * Integers Using Multiplication" by Torbjörn Granlund and Peter | |
10 | * L. Montgomery. | |
6a2d7a95 | 11 | * |
809fa972 HFS |
12 | * The assembler implementation from Agner Fog, which this code is |
13 | * based on, can be found here: | |
14 | * http://www.agner.org/optimize/asmlib.zip | |
6a2d7a95 | 15 | * |
809fa972 HFS |
16 | * This optimization for A/B is helpful if the divisor B is mostly |
17 | * runtime invariant. The reciprocal of B is calculated in the | |
18 | * slow-path with reciprocal_value(). The fast-path can then just use | |
19 | * a much faster multiplication operation with a variable dividend A | |
20 | * to calculate the division A/B. | |
6a2d7a95 ED |
21 | */ |
22 | ||
809fa972 HFS |
23 | struct reciprocal_value { |
24 | u32 m; | |
25 | u8 sh1, sh2; | |
26 | }; | |
6a2d7a95 | 27 | |
06ae4826 JW |
28 | /* "reciprocal_value" and "reciprocal_divide" together implement the basic |
29 | * version of the algorithm described in Figure 4.1 of the paper. | |
30 | */ | |
809fa972 | 31 | struct reciprocal_value reciprocal_value(u32 d); |
6a2d7a95 | 32 | |
809fa972 | 33 | static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) |
6a2d7a95 | 34 | { |
809fa972 HFS |
35 | u32 t = (u32)(((u64)a * R.m) >> 32); |
36 | return (t + ((a - t) >> R.sh1)) >> R.sh2; | |
6a2d7a95 | 37 | } |
809fa972 | 38 | |
06ae4826 JW |
39 | struct reciprocal_value_adv { |
40 | u32 m; | |
41 | u8 sh, exp; | |
42 | bool is_wide_m; | |
43 | }; | |
44 | ||
45 | /* "reciprocal_value_adv" implements the advanced version of the algorithm | |
46 | * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose | |
47 | * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The | |
48 | * exception case could be easily handled before calling "reciprocal_value_adv". | |
49 | * | |
50 | * The advanced version requires more complex calculation to get the reciprocal | |
51 | * multiplier and other control variables, but then could reduce the required | |
52 | * emulation operations. | |
53 | * | |
54 | * It makes no sense to use this advanced version for host divide emulation, | |
55 | * those extra complexities for calculating multiplier etc could completely | |
56 | * waive our saving on emulation operations. | |
57 | * | |
58 | * However, it makes sense to use it for JIT divide code generation for which | |
59 | * we are willing to trade performance of JITed code with that of host. As shown | |
60 | * by the following pseudo code, the required emulation operations could go down | |
61 | * from 6 (the basic version) to 3 or 4. | |
62 | * | |
63 | * To use the result of "reciprocal_value_adv", suppose we want to calculate | |
64 | * n/d, the pseudo C code will be: | |
65 | * | |
66 | * struct reciprocal_value_adv rvalue; | |
67 | * u8 pre_shift, exp; | |
68 | * | |
69 | * // handle exception case. | |
70 | * if (d >= (1U << 31)) { | |
71 | * result = n >= d; | |
72 | * return; | |
73 | * } | |
74 | * | |
75 | * rvalue = reciprocal_value_adv(d, 32) | |
76 | * exp = rvalue.exp; | |
77 | * if (rvalue.is_wide_m && !(d & 1)) { | |
78 | * // floor(log2(d & (2^32 -d))) | |
79 | * pre_shift = fls(d & -d) - 1; | |
80 | * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); | |
81 | * } else { | |
82 | * pre_shift = 0; | |
83 | * } | |
84 | * | |
85 | * // code generation starts. | |
86 | * if (imm == 1U << exp) { | |
87 | * result = n >> exp; | |
88 | * } else if (rvalue.is_wide_m) { | |
89 | * // pre_shift must be zero when reached here. | |
90 | * t = (n * rvalue.m) >> 32; | |
91 | * result = n - t; | |
92 | * result >>= 1; | |
93 | * result += t; | |
94 | * result >>= rvalue.sh - 1; | |
95 | * } else { | |
96 | * if (pre_shift) | |
97 | * result = n >> pre_shift; | |
98 | * result = ((u64)result * rvalue.m) >> 32; | |
99 | * result >>= rvalue.sh; | |
100 | * } | |
101 | */ | |
102 | struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); | |
103 | ||
809fa972 | 104 | #endif /* _LINUX_RECIPROCAL_DIV_H */ |