nft_set_pipapo: Prepare for vectorised implementation: helpers
[linux-block.git] / net / netfilter / nft_set_pipapo.h
CommitLineData
8683f4b9
SB
1// SPDX-License-Identifier: GPL-2.0-only
2
3#ifndef _NFT_SET_PIPAPO_H
4
5#include <linux/log2.h>
6#include <net/ipv6.h> /* For the maximum length of a field */
7
8/* Count of concatenated fields depends on count of 32-bit nftables registers */
9#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT
10
11/* Largest supported field size */
12#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr))
13#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE)
14
15/* Bits to be grouped together in table buckets depending on set size */
16#define NFT_PIPAPO_GROUP_BITS_INIT NFT_PIPAPO_GROUP_BITS_SMALL_SET
17#define NFT_PIPAPO_GROUP_BITS_SMALL_SET 8
18#define NFT_PIPAPO_GROUP_BITS_LARGE_SET 4
19#define NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4 \
20 BUILD_BUG_ON((NFT_PIPAPO_GROUP_BITS_SMALL_SET != 8) || \
21 (NFT_PIPAPO_GROUP_BITS_LARGE_SET != 4))
22#define NFT_PIPAPO_GROUPS_PER_BYTE(f) (BITS_PER_BYTE / (f)->bb)
23
24/* If a lookup table gets bigger than NFT_PIPAPO_LT_SIZE_HIGH, switch to the
25 * small group width, and switch to the big group width if the table gets
26 * smaller than NFT_PIPAPO_LT_SIZE_LOW.
27 *
28 * Picking 2MiB as threshold (for a single table) avoids as much as possible
29 * crossing page boundaries on most architectures (x86-64 and MIPS huge pages,
30 * ARMv7 supersections, POWER "large" pages, SPARC Level 1 regions, etc.), which
31 * keeps performance nice in case kvmalloc() gives us non-contiguous areas.
32 */
33#define NFT_PIPAPO_LT_SIZE_THRESHOLD (1 << 21)
34#define NFT_PIPAPO_LT_SIZE_HYSTERESIS (1 << 16)
35#define NFT_PIPAPO_LT_SIZE_HIGH NFT_PIPAPO_LT_SIZE_THRESHOLD
36#define NFT_PIPAPO_LT_SIZE_LOW NFT_PIPAPO_LT_SIZE_THRESHOLD - \
37 NFT_PIPAPO_LT_SIZE_HYSTERESIS
38
39/* Fields are padded to 32 bits in input registers */
40#define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \
41 (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32)))
42#define NFT_PIPAPO_GROUPS_PADDING(f) \
43 (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \
44 NFT_PIPAPO_GROUPS_PER_BYTE(f))
45
46/* Number of buckets given by 2 ^ n, with n bucket bits */
47#define NFT_PIPAPO_BUCKETS(bb) (1 << (bb))
48
49/* Each n-bit range maps to up to n * 2 rules */
50#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2))
51
52/* Use the rest of mapping table buckets for rule indices, but it makes no sense
53 * to exceed 32 bits
54 */
55#if BITS_PER_LONG == 64
56#define NFT_PIPAPO_MAP_TOBITS 32
57#else
58#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS)
59#endif
60
61/* ...which gives us the highest allowed index for a rule */
62#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \
63 - (1UL << NFT_PIPAPO_MAP_NBITS))
64
65/* Definitions for vectorised implementations */
66#ifdef NFT_PIPAPO_ALIGN
67#define NFT_PIPAPO_ALIGN_HEADROOM \
68 (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN)
69#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN))
70#define NFT_PIPAPO_LT_ASSIGN(field, x) \
71 do { \
72 (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \
73 (field)->lt = (x); \
74 } while (0)
75#else
76#define NFT_PIPAPO_ALIGN_HEADROOM 0
77#define NFT_PIPAPO_LT_ALIGN(lt) (lt)
78#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x))
79#endif /* NFT_PIPAPO_ALIGN */
80
81#define nft_pipapo_for_each_field(field, index, match) \
82 for ((field) = (match)->f, (index) = 0; \
83 (index) < (match)->field_count; \
84 (index)++, (field)++)
85
86/**
87 * union nft_pipapo_map_bucket - Bucket of mapping table
88 * @to: First rule number (in next field) this rule maps to
89 * @n: Number of rules (in next field) this rule maps to
90 * @e: If there's no next field, pointer to element this rule maps to
91 */
92union nft_pipapo_map_bucket {
93 struct {
94#if BITS_PER_LONG == 64
95 static_assert(NFT_PIPAPO_MAP_TOBITS <= 32);
96 u32 to;
97
98 static_assert(NFT_PIPAPO_MAP_NBITS <= 32);
99 u32 n;
100#else
101 unsigned long to:NFT_PIPAPO_MAP_TOBITS;
102 unsigned long n:NFT_PIPAPO_MAP_NBITS;
103#endif
104 };
105 struct nft_pipapo_elem *e;
106};
107
108/**
109 * struct nft_pipapo_field - Lookup, mapping tables and related data for a field
110 * @groups: Amount of bit groups
111 * @rules: Number of inserted rules
112 * @bsize: Size of each bucket in lookup table, in longs
113 * @bb: Number of bits grouped together in lookup table buckets
114 * @lt: Lookup table: 'groups' rows of buckets
115 * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes
116 * @mt: Mapping table: one bucket per rule
117 */
118struct nft_pipapo_field {
119 int groups;
120 unsigned long rules;
121 size_t bsize;
122 int bb;
123#ifdef NFT_PIPAPO_ALIGN
124 unsigned long *lt_aligned;
125#endif
126 unsigned long *lt;
127 union nft_pipapo_map_bucket *mt;
128};
129
130/**
131 * struct nft_pipapo_match - Data used for lookup and matching
132 * @field_count Amount of fields in set
133 * @scratch: Preallocated per-CPU maps for partial matching results
134 * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
135 * @bsize_max: Maximum lookup table bucket size of all fields, in longs
136 * @rcu Matching data is swapped on commits
137 * @f: Fields, with lookup and mapping tables
138 */
139struct nft_pipapo_match {
140 int field_count;
141#ifdef NFT_PIPAPO_ALIGN
142 unsigned long * __percpu *scratch_aligned;
143#endif
144 unsigned long * __percpu *scratch;
145 size_t bsize_max;
146 struct rcu_head rcu;
147 struct nft_pipapo_field f[];
148};
149
150/**
151 * struct nft_pipapo - Representation of a set
152 * @match: Currently in-use matching data
153 * @clone: Copy where pending insertions and deletions are kept
154 * @width: Total bytes to be matched for one packet, including padding
155 * @dirty: Working copy has pending insertions or deletions
156 * @last_gc: Timestamp of last garbage collection run, jiffies
157 */
158struct nft_pipapo {
159 struct nft_pipapo_match __rcu *match;
160 struct nft_pipapo_match *clone;
161 int width;
162 bool dirty;
163 unsigned long last_gc;
164};
165
166struct nft_pipapo_elem;
167
168/**
169 * struct nft_pipapo_elem - API-facing representation of single set element
170 * @ext: nftables API extensions
171 */
172struct nft_pipapo_elem {
173 struct nft_set_ext ext;
174};
175
176int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
177 union nft_pipapo_map_bucket *mt, bool match_only);
178
179/**
180 * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
181 * @f: Field including lookup table
182 * @dst: Area to store result
183 * @data: Input data selecting table buckets
184 */
185static inline void pipapo_and_field_buckets_4bit(struct nft_pipapo_field *f,
186 unsigned long *dst,
187 const u8 *data)
188{
189 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
190 int group;
191
192 for (group = 0; group < f->groups; group += BITS_PER_BYTE / 4, data++) {
193 u8 v;
194
195 v = *data >> 4;
196 __bitmap_and(dst, dst, lt + v * f->bsize,
197 f->bsize * BITS_PER_LONG);
198 lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
199
200 v = *data & 0x0f;
201 __bitmap_and(dst, dst, lt + v * f->bsize,
202 f->bsize * BITS_PER_LONG);
203 lt += f->bsize * NFT_PIPAPO_BUCKETS(4);
204 }
205}
206
207/**
208 * pipapo_and_field_buckets_8bit() - Intersect 8-bit buckets
209 * @f: Field including lookup table
210 * @dst: Area to store result
211 * @data: Input data selecting table buckets
212 */
213static inline void pipapo_and_field_buckets_8bit(struct nft_pipapo_field *f,
214 unsigned long *dst,
215 const u8 *data)
216{
217 unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt);
218 int group;
219
220 for (group = 0; group < f->groups; group++, data++) {
221 __bitmap_and(dst, dst, lt + *data * f->bsize,
222 f->bsize * BITS_PER_LONG);
223 lt += f->bsize * NFT_PIPAPO_BUCKETS(8);
224 }
225}
226
227/**
228 * pipapo_estimate_size() - Estimate worst-case for set size
229 * @desc: Set description, element count and field description used here
230 *
231 * The size for this set type can vary dramatically, as it depends on the number
232 * of rules (composing netmasks) the entries expand to. We compute the worst
233 * case here.
234 *
235 * In general, for a non-ranged entry or a single composing netmask, we need
236 * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that
237 * is, each input bit needs four bits of matching data), plus a bucket in the
238 * mapping table for each field.
239 *
240 * Return: worst-case set size in bytes, 0 on any overflow
241 */
242static u64 pipapo_estimate_size(const struct nft_set_desc *desc)
243{
244 unsigned long entry_size;
245 u64 size;
246 int i;
247
248 for (i = 0, entry_size = 0; i < desc->field_count; i++) {
249 unsigned long rules;
250
251 if (desc->field_len[i] > NFT_PIPAPO_MAX_BYTES)
252 return 0;
253
254 /* Worst-case ranges for each concatenated field: each n-bit
255 * field can expand to up to n * 2 rules in each bucket, and
256 * each rule also needs a mapping bucket.
257 */
258 rules = ilog2(desc->field_len[i] * BITS_PER_BYTE) * 2;
259 entry_size += rules *
260 NFT_PIPAPO_BUCKETS(NFT_PIPAPO_GROUP_BITS_INIT) /
261 BITS_PER_BYTE;
262 entry_size += rules * sizeof(union nft_pipapo_map_bucket);
263 }
264
265 /* Rules in lookup and mapping tables are needed for each entry */
266 size = desc->size * entry_size;
267 if (size && div_u64(size, desc->size) != entry_size)
268 return 0;
269
270 size += sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match) * 2;
271
272 size += sizeof(struct nft_pipapo_field) * desc->field_count;
273
274 return size;
275}
276
277#endif /* _NFT_SET_PIPAPO_H */