Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
20a69341 PM |
2 | /* |
3 | * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> | |
4 | * | |
20a69341 PM |
5 | * Development of this code funded by Astaro AG (http://www.astaro.com/) |
6 | */ | |
7 | ||
8 | #include <linux/kernel.h> | |
9 | #include <linux/init.h> | |
10 | #include <linux/module.h> | |
11 | #include <linux/list.h> | |
12 | #include <linux/rbtree.h> | |
13 | #include <linux/netlink.h> | |
14 | #include <linux/netfilter.h> | |
15 | #include <linux/netfilter/nf_tables.h> | |
5785cf15 | 16 | #include <net/netfilter/nf_tables_core.h> |
20a69341 PM |
17 | |
18 | struct nft_rbtree { | |
19 | struct rb_root root; | |
9b7e26ae | 20 | rwlock_t lock; |
b901892b | 21 | seqcount_rwlock_t count; |
8d8540c4 | 22 | struct delayed_work gc_work; |
20a69341 PM |
23 | }; |
24 | ||
25 | struct nft_rbtree_elem { | |
26 | struct rb_node node; | |
fe2811eb | 27 | struct nft_set_ext ext; |
20a69341 PM |
28 | }; |
29 | ||
ef1d20e0 PNA |
30 | static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) |
31 | { | |
32 | return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && | |
33 | (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); | |
34 | } | |
cc02e457 | 35 | |
6f7c9caf SB |
36 | static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) |
37 | { | |
38 | return !nft_rbtree_interval_end(rbe); | |
39 | } | |
40 | ||
c9e6978e PNA |
41 | static int nft_rbtree_cmp(const struct nft_set *set, |
42 | const struct nft_rbtree_elem *e1, | |
43 | const struct nft_rbtree_elem *e2) | |
e701001e | 44 | { |
c9e6978e PNA |
45 | return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), |
46 | set->klen); | |
e701001e PNA |
47 | } |
48 | ||
f6c383b8 PNA |
49 | static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) |
50 | { | |
51 | return nft_set_elem_expired(&rbe->ext) || | |
52 | nft_set_elem_is_dead(&rbe->ext); | |
53 | } | |
54 | ||
9b7e26ae FW |
55 | static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, |
56 | const u32 *key, const struct nft_set_ext **ext, | |
57 | unsigned int seq) | |
20a69341 | 58 | { |
03e5fd0e | 59 | struct nft_rbtree *priv = nft_set_priv(set); |
20a69341 | 60 | const struct nft_rbtree_elem *rbe, *interval = NULL; |
42a55769 | 61 | u8 genmask = nft_genmask_cur(net); |
16c45eda | 62 | const struct rb_node *parent; |
20a69341 PM |
63 | int d; |
64 | ||
9b7e26ae | 65 | parent = rcu_dereference_raw(priv->root.rb_node); |
20a69341 | 66 | while (parent != NULL) { |
9b7e26ae FW |
67 | if (read_seqcount_retry(&priv->count, seq)) |
68 | return false; | |
69 | ||
20a69341 PM |
70 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); |
71 | ||
c9e6978e | 72 | d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); |
20a69341 | 73 | if (d < 0) { |
9b7e26ae | 74 | parent = rcu_dereference_raw(parent->rb_left); |
f9121355 | 75 | if (interval && |
c9e6978e | 76 | !nft_rbtree_cmp(set, rbe, interval) && |
82e20b44 | 77 | nft_rbtree_interval_end(rbe) && |
6f7c9caf | 78 | nft_rbtree_interval_start(interval)) |
e701001e | 79 | continue; |
20a69341 PM |
80 | interval = rbe; |
81 | } else if (d > 0) | |
9b7e26ae | 82 | parent = rcu_dereference_raw(parent->rb_right); |
20a69341 | 83 | else { |
cc02e457 | 84 | if (!nft_set_elem_active(&rbe->ext, genmask)) { |
9b7e26ae | 85 | parent = rcu_dereference_raw(parent->rb_left); |
cc02e457 PM |
86 | continue; |
87 | } | |
340eaff6 | 88 | |
f6c383b8 | 89 | if (nft_rbtree_elem_expired(rbe)) |
340eaff6 PS |
90 | return false; |
91 | ||
db3b665d PNA |
92 | if (nft_rbtree_interval_end(rbe)) { |
93 | if (nft_set_is_anonymous(set)) | |
94 | return false; | |
95 | parent = rcu_dereference_raw(parent->rb_left); | |
96 | interval = NULL; | |
97 | continue; | |
98 | } | |
b2832dd6 PM |
99 | |
100 | *ext = &rbe->ext; | |
20a69341 PM |
101 | return true; |
102 | } | |
103 | } | |
104 | ||
c1eda3c6 PNA |
105 | if (set->flags & NFT_SET_INTERVAL && interval != NULL && |
106 | nft_set_elem_active(&interval->ext, genmask) && | |
f6c383b8 | 107 | !nft_rbtree_elem_expired(interval) && |
6f7c9caf | 108 | nft_rbtree_interval_start(interval)) { |
c1eda3c6 PNA |
109 | *ext = &interval->ext; |
110 | return true; | |
20a69341 | 111 | } |
db3b665d | 112 | |
20a69341 PM |
113 | return false; |
114 | } | |
115 | ||
f227925e FW |
116 | INDIRECT_CALLABLE_SCOPE |
117 | bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, | |
118 | const u32 *key, const struct nft_set_ext **ext) | |
9b7e26ae FW |
119 | { |
120 | struct nft_rbtree *priv = nft_set_priv(set); | |
121 | unsigned int seq = read_seqcount_begin(&priv->count); | |
122 | bool ret; | |
123 | ||
124 | ret = __nft_rbtree_lookup(net, set, key, ext, seq); | |
125 | if (ret || !read_seqcount_retry(&priv->count, seq)) | |
126 | return ret; | |
127 | ||
128 | read_lock_bh(&priv->lock); | |
129 | seq = read_seqcount_begin(&priv->count); | |
130 | ret = __nft_rbtree_lookup(net, set, key, ext, seq); | |
131 | read_unlock_bh(&priv->lock); | |
132 | ||
133 | return ret; | |
134 | } | |
135 | ||
ba0e4d99 PNA |
136 | static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, |
137 | const u32 *key, struct nft_rbtree_elem **elem, | |
138 | unsigned int seq, unsigned int flags, u8 genmask) | |
139 | { | |
140 | struct nft_rbtree_elem *rbe, *interval = NULL; | |
141 | struct nft_rbtree *priv = nft_set_priv(set); | |
142 | const struct rb_node *parent; | |
143 | const void *this; | |
144 | int d; | |
145 | ||
146 | parent = rcu_dereference_raw(priv->root.rb_node); | |
147 | while (parent != NULL) { | |
148 | if (read_seqcount_retry(&priv->count, seq)) | |
149 | return false; | |
150 | ||
151 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); | |
152 | ||
153 | this = nft_set_ext_key(&rbe->ext); | |
154 | d = memcmp(this, key, set->klen); | |
155 | if (d < 0) { | |
156 | parent = rcu_dereference_raw(parent->rb_left); | |
3b18d5eb PNA |
157 | if (!(flags & NFT_SET_ELEM_INTERVAL_END)) |
158 | interval = rbe; | |
ba0e4d99 PNA |
159 | } else if (d > 0) { |
160 | parent = rcu_dereference_raw(parent->rb_right); | |
3b18d5eb PNA |
161 | if (flags & NFT_SET_ELEM_INTERVAL_END) |
162 | interval = rbe; | |
ba0e4d99 | 163 | } else { |
db3b665d | 164 | if (!nft_set_elem_active(&rbe->ext, genmask)) { |
ba0e4d99 | 165 | parent = rcu_dereference_raw(parent->rb_left); |
db3b665d PNA |
166 | continue; |
167 | } | |
ba0e4d99 | 168 | |
340eaff6 PS |
169 | if (nft_set_elem_expired(&rbe->ext)) |
170 | return false; | |
171 | ||
ba0e4d99 PNA |
172 | if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || |
173 | (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == | |
174 | (flags & NFT_SET_ELEM_INTERVAL_END)) { | |
175 | *elem = rbe; | |
176 | return true; | |
177 | } | |
db3b665d PNA |
178 | |
179 | if (nft_rbtree_interval_end(rbe)) | |
180 | interval = NULL; | |
181 | ||
182 | parent = rcu_dereference_raw(parent->rb_left); | |
ba0e4d99 PNA |
183 | } |
184 | } | |
185 | ||
186 | if (set->flags & NFT_SET_INTERVAL && interval != NULL && | |
187 | nft_set_elem_active(&interval->ext, genmask) && | |
340eaff6 | 188 | !nft_set_elem_expired(&interval->ext) && |
3b18d5eb PNA |
189 | ((!nft_rbtree_interval_end(interval) && |
190 | !(flags & NFT_SET_ELEM_INTERVAL_END)) || | |
191 | (nft_rbtree_interval_end(interval) && | |
192 | (flags & NFT_SET_ELEM_INTERVAL_END)))) { | |
ba0e4d99 PNA |
193 | *elem = interval; |
194 | return true; | |
195 | } | |
196 | ||
197 | return false; | |
198 | } | |
199 | ||
200 | static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, | |
201 | const struct nft_set_elem *elem, unsigned int flags) | |
202 | { | |
203 | struct nft_rbtree *priv = nft_set_priv(set); | |
204 | unsigned int seq = read_seqcount_begin(&priv->count); | |
205 | struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); | |
206 | const u32 *key = (const u32 *)&elem->key.val; | |
207 | u8 genmask = nft_genmask_cur(net); | |
208 | bool ret; | |
209 | ||
210 | ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); | |
211 | if (ret || !read_seqcount_retry(&priv->count, seq)) | |
212 | return rbe; | |
213 | ||
214 | read_lock_bh(&priv->lock); | |
215 | seq = read_seqcount_begin(&priv->count); | |
216 | ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); | |
217 | if (!ret) | |
218 | rbe = ERR_PTR(-ENOENT); | |
219 | read_unlock_bh(&priv->lock); | |
220 | ||
221 | return rbe; | |
222 | } | |
223 | ||
f6c383b8 PNA |
224 | static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, |
225 | struct nft_rbtree *priv, | |
226 | struct nft_rbtree_elem *rbe) | |
227 | { | |
228 | struct nft_set_elem elem = { | |
229 | .priv = rbe, | |
230 | }; | |
231 | ||
232 | nft_setelem_data_deactivate(net, set, &elem); | |
233 | rb_erase(&rbe->node, &priv->root); | |
234 | } | |
235 | ||
c9e6978e PNA |
236 | static int nft_rbtree_gc_elem(const struct nft_set *__set, |
237 | struct nft_rbtree *priv, | |
f718863a FW |
238 | struct nft_rbtree_elem *rbe, |
239 | u8 genmask) | |
c9e6978e PNA |
240 | { |
241 | struct nft_set *set = (struct nft_set *)__set; | |
242 | struct rb_node *prev = rb_prev(&rbe->node); | |
f6c383b8 | 243 | struct net *net = read_pnet(&set->net); |
f718863a | 244 | struct nft_rbtree_elem *rbe_prev; |
f6c383b8 | 245 | struct nft_trans_gc *gc; |
c9e6978e | 246 | |
f6c383b8 PNA |
247 | gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC); |
248 | if (!gc) | |
c9e6978e PNA |
249 | return -ENOMEM; |
250 | ||
f718863a FW |
251 | /* search for end interval coming before this element. |
252 | * end intervals don't carry a timeout extension, they | |
253 | * are coupled with the interval start element. | |
254 | */ | |
61ae320a | 255 | while (prev) { |
c9e6978e | 256 | rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); |
f718863a FW |
257 | if (nft_rbtree_interval_end(rbe_prev) && |
258 | nft_set_elem_active(&rbe_prev->ext, genmask)) | |
c9e6978e PNA |
259 | break; |
260 | ||
261 | prev = rb_prev(prev); | |
61ae320a FW |
262 | } |
263 | ||
f718863a FW |
264 | if (prev) { |
265 | rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); | |
f6c383b8 | 266 | nft_rbtree_gc_remove(net, set, priv, rbe_prev); |
f718863a | 267 | |
f6c383b8 PNA |
268 | /* There is always room in this trans gc for this element, |
269 | * memory allocation never actually happens, hence, the warning | |
270 | * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT, | |
271 | * this is synchronous gc which never fails. | |
272 | */ | |
273 | gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); | |
274 | if (WARN_ON_ONCE(!gc)) | |
275 | return -ENOMEM; | |
276 | ||
277 | nft_trans_gc_elem_add(gc, rbe_prev); | |
61ae320a | 278 | } |
c9e6978e | 279 | |
f6c383b8 PNA |
280 | nft_rbtree_gc_remove(net, set, priv, rbe); |
281 | gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); | |
282 | if (WARN_ON_ONCE(!gc)) | |
283 | return -ENOMEM; | |
284 | ||
285 | nft_trans_gc_elem_add(gc, rbe); | |
c9e6978e | 286 | |
f6c383b8 | 287 | nft_trans_gc_queue_sync_done(gc); |
c9e6978e PNA |
288 | |
289 | return 0; | |
290 | } | |
291 | ||
292 | static bool nft_rbtree_update_first(const struct nft_set *set, | |
293 | struct nft_rbtree_elem *rbe, | |
294 | struct rb_node *first) | |
295 | { | |
296 | struct nft_rbtree_elem *first_elem; | |
297 | ||
298 | first_elem = rb_entry(first, struct nft_rbtree_elem, node); | |
299 | /* this element is closest to where the new element is to be inserted: | |
300 | * update the first element for the node list path. | |
301 | */ | |
302 | if (nft_rbtree_cmp(set, rbe, first_elem) < 0) | |
303 | return true; | |
304 | ||
305 | return false; | |
306 | } | |
307 | ||
42a55769 | 308 | static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, |
c016c7e4 PNA |
309 | struct nft_rbtree_elem *new, |
310 | struct nft_set_ext **ext) | |
20a69341 | 311 | { |
c9e6978e | 312 | struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; |
61ae320a | 313 | struct rb_node *node, *next, *parent, **p, *first = NULL; |
20a69341 | 314 | struct nft_rbtree *priv = nft_set_priv(set); |
2ee52ae9 | 315 | u8 cur_genmask = nft_genmask_cur(net); |
42a55769 | 316 | u8 genmask = nft_genmask_next(net); |
c9e6978e | 317 | int d, err; |
20a69341 | 318 | |
c9e6978e PNA |
319 | /* Descend the tree to search for an existing element greater than the |
320 | * key value to insert that is greater than the new element. This is the | |
321 | * first element to walk the ordered elements to find possible overlap. | |
7c84d414 | 322 | */ |
20a69341 PM |
323 | parent = NULL; |
324 | p = &priv->root.rb_node; | |
325 | while (*p != NULL) { | |
326 | parent = *p; | |
327 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); | |
c9e6978e PNA |
328 | d = nft_rbtree_cmp(set, rbe, new); |
329 | ||
7c84d414 | 330 | if (d < 0) { |
20a69341 | 331 | p = &parent->rb_left; |
7c84d414 | 332 | } else if (d > 0) { |
c9e6978e PNA |
333 | if (!first || |
334 | nft_rbtree_update_first(set, rbe, first)) | |
335 | first = &rbe->node; | |
7c84d414 | 336 | |
c9e6978e | 337 | p = &parent->rb_right; |
7c84d414 | 338 | } else { |
c9e6978e | 339 | if (nft_rbtree_interval_end(rbe)) |
d2df92e9 | 340 | p = &parent->rb_left; |
c9e6978e | 341 | else |
d2df92e9 | 342 | p = &parent->rb_right; |
c9e6978e PNA |
343 | } |
344 | } | |
345 | ||
346 | if (!first) | |
347 | first = rb_first(&priv->root); | |
348 | ||
349 | /* Detect overlap by going through the list of valid tree nodes. | |
350 | * Values stored in the tree are in reversed order, starting from | |
351 | * highest to lowest value. | |
352 | */ | |
61ae320a FW |
353 | for (node = first; node != NULL; node = next) { |
354 | next = rb_next(node); | |
355 | ||
c9e6978e PNA |
356 | rbe = rb_entry(node, struct nft_rbtree_elem, node); |
357 | ||
358 | if (!nft_set_elem_active(&rbe->ext, genmask)) | |
359 | continue; | |
360 | ||
2ee52ae9 PNA |
361 | /* perform garbage collection to avoid bogus overlap reports |
362 | * but skip new elements in this transaction. | |
363 | */ | |
364 | if (nft_set_elem_expired(&rbe->ext) && | |
365 | nft_set_elem_active(&rbe->ext, cur_genmask)) { | |
f718863a | 366 | err = nft_rbtree_gc_elem(set, priv, rbe, genmask); |
c9e6978e PNA |
367 | if (err < 0) |
368 | return err; | |
7c84d414 | 369 | |
c9e6978e PNA |
370 | continue; |
371 | } | |
372 | ||
373 | d = nft_rbtree_cmp(set, rbe, new); | |
374 | if (d == 0) { | |
375 | /* Matching end element: no need to look for an | |
376 | * overlapping greater or equal element. | |
377 | */ | |
378 | if (nft_rbtree_interval_end(rbe)) { | |
379 | rbe_le = rbe; | |
380 | break; | |
381 | } | |
382 | ||
383 | /* first element that is greater or equal to key value. */ | |
384 | if (!rbe_ge) { | |
385 | rbe_ge = rbe; | |
386 | continue; | |
e701001e | 387 | } |
c9e6978e PNA |
388 | |
389 | /* this is a closer more or equal element, update it. */ | |
390 | if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { | |
391 | rbe_ge = rbe; | |
392 | continue; | |
393 | } | |
394 | ||
395 | /* element is equal to key value, make sure flags are | |
396 | * the same, an existing more or equal start element | |
397 | * must not be replaced by more or equal end element. | |
398 | */ | |
399 | if ((nft_rbtree_interval_start(new) && | |
400 | nft_rbtree_interval_start(rbe_ge)) || | |
401 | (nft_rbtree_interval_end(new) && | |
402 | nft_rbtree_interval_end(rbe_ge))) { | |
403 | rbe_ge = rbe; | |
404 | continue; | |
405 | } | |
406 | } else if (d > 0) { | |
407 | /* annotate element greater than the new element. */ | |
408 | rbe_ge = rbe; | |
409 | continue; | |
410 | } else if (d < 0) { | |
411 | /* annotate element less than the new element. */ | |
412 | rbe_le = rbe; | |
413 | break; | |
cc02e457 | 414 | } |
c9e6978e | 415 | } |
07267630 | 416 | |
c9e6978e PNA |
417 | /* - new start element matching existing start element: full overlap |
418 | * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. | |
419 | */ | |
420 | if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && | |
421 | nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { | |
422 | *ext = &rbe_ge->ext; | |
423 | return -EEXIST; | |
20a69341 | 424 | } |
7c84d414 | 425 | |
c9e6978e PNA |
426 | /* - new end element matching existing end element: full overlap |
427 | * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. | |
428 | */ | |
429 | if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && | |
430 | nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { | |
431 | *ext = &rbe_le->ext; | |
432 | return -EEXIST; | |
433 | } | |
434 | ||
435 | /* - new start element with existing closest, less or equal key value | |
436 | * being a start element: partial overlap, reported as -ENOTEMPTY. | |
437 | * Anonymous sets allow for two consecutive start element since they | |
438 | * are constant, skip them to avoid bogus overlap reports. | |
439 | */ | |
440 | if (!nft_set_is_anonymous(set) && rbe_le && | |
441 | nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) | |
7c84d414 SB |
442 | return -ENOTEMPTY; |
443 | ||
c9e6978e PNA |
444 | /* - new end element with existing closest, less or equal key value |
445 | * being a end element: partial overlap, reported as -ENOTEMPTY. | |
446 | */ | |
447 | if (rbe_le && | |
448 | nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) | |
449 | return -ENOTEMPTY; | |
450 | ||
451 | /* - new end element with existing closest, greater or equal key value | |
452 | * being an end element: partial overlap, reported as -ENOTEMPTY | |
453 | */ | |
454 | if (rbe_ge && | |
455 | nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) | |
456 | return -ENOTEMPTY; | |
457 | ||
458 | /* Accepted element: pick insertion point depending on key value */ | |
459 | parent = NULL; | |
460 | p = &priv->root.rb_node; | |
461 | while (*p != NULL) { | |
462 | parent = *p; | |
463 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); | |
464 | d = nft_rbtree_cmp(set, rbe, new); | |
465 | ||
466 | if (d < 0) | |
467 | p = &parent->rb_left; | |
468 | else if (d > 0) | |
469 | p = &parent->rb_right; | |
470 | else if (nft_rbtree_interval_end(rbe)) | |
471 | p = &parent->rb_left; | |
472 | else | |
473 | p = &parent->rb_right; | |
474 | } | |
475 | ||
9b7e26ae | 476 | rb_link_node_rcu(&new->node, parent, p); |
20a69341 PM |
477 | rb_insert_color(&new->node, &priv->root); |
478 | return 0; | |
479 | } | |
480 | ||
42a55769 | 481 | static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, |
c016c7e4 PNA |
482 | const struct nft_set_elem *elem, |
483 | struct nft_set_ext **ext) | |
20a69341 | 484 | { |
03e5fd0e | 485 | struct nft_rbtree *priv = nft_set_priv(set); |
fe2811eb | 486 | struct nft_rbtree_elem *rbe = elem->priv; |
20a69341 PM |
487 | int err; |
488 | ||
03e5fd0e | 489 | write_lock_bh(&priv->lock); |
9b7e26ae | 490 | write_seqcount_begin(&priv->count); |
c016c7e4 | 491 | err = __nft_rbtree_insert(net, set, rbe, ext); |
9b7e26ae | 492 | write_seqcount_end(&priv->count); |
03e5fd0e | 493 | write_unlock_bh(&priv->lock); |
fe2811eb | 494 | |
20a69341 PM |
495 | return err; |
496 | } | |
497 | ||
5cb82a38 PNA |
498 | static void nft_rbtree_remove(const struct net *net, |
499 | const struct nft_set *set, | |
20a69341 PM |
500 | const struct nft_set_elem *elem) |
501 | { | |
502 | struct nft_rbtree *priv = nft_set_priv(set); | |
cc02e457 | 503 | struct nft_rbtree_elem *rbe = elem->priv; |
20a69341 | 504 | |
03e5fd0e | 505 | write_lock_bh(&priv->lock); |
9b7e26ae | 506 | write_seqcount_begin(&priv->count); |
20a69341 | 507 | rb_erase(&rbe->node, &priv->root); |
9b7e26ae | 508 | write_seqcount_end(&priv->count); |
03e5fd0e | 509 | write_unlock_bh(&priv->lock); |
20a69341 PM |
510 | } |
511 | ||
42a55769 PNA |
512 | static void nft_rbtree_activate(const struct net *net, |
513 | const struct nft_set *set, | |
cc02e457 PM |
514 | const struct nft_set_elem *elem) |
515 | { | |
516 | struct nft_rbtree_elem *rbe = elem->priv; | |
517 | ||
42a55769 | 518 | nft_set_elem_change_active(net, set, &rbe->ext); |
cc02e457 PM |
519 | } |
520 | ||
1ba1c414 PNA |
521 | static bool nft_rbtree_flush(const struct net *net, |
522 | const struct nft_set *set, void *priv) | |
37df5301 PNA |
523 | { |
524 | struct nft_rbtree_elem *rbe = priv; | |
525 | ||
f6c383b8 PNA |
526 | nft_set_elem_change_active(net, set, &rbe->ext); |
527 | ||
528 | return true; | |
37df5301 PNA |
529 | } |
530 | ||
42a55769 PNA |
531 | static void *nft_rbtree_deactivate(const struct net *net, |
532 | const struct nft_set *set, | |
cc02e457 | 533 | const struct nft_set_elem *elem) |
20a69341 PM |
534 | { |
535 | const struct nft_rbtree *priv = nft_set_priv(set); | |
536 | const struct rb_node *parent = priv->root.rb_node; | |
e701001e | 537 | struct nft_rbtree_elem *rbe, *this = elem->priv; |
42a55769 | 538 | u8 genmask = nft_genmask_next(net); |
20a69341 PM |
539 | int d; |
540 | ||
541 | while (parent != NULL) { | |
542 | rbe = rb_entry(parent, struct nft_rbtree_elem, node); | |
543 | ||
7d740264 PM |
544 | d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, |
545 | set->klen); | |
20a69341 PM |
546 | if (d < 0) |
547 | parent = parent->rb_left; | |
548 | else if (d > 0) | |
549 | parent = parent->rb_right; | |
550 | else { | |
e701001e | 551 | if (nft_rbtree_interval_end(rbe) && |
6f7c9caf | 552 | nft_rbtree_interval_start(this)) { |
e701001e PNA |
553 | parent = parent->rb_left; |
554 | continue; | |
6f7c9caf | 555 | } else if (nft_rbtree_interval_start(rbe) && |
e701001e PNA |
556 | nft_rbtree_interval_end(this)) { |
557 | parent = parent->rb_right; | |
558 | continue; | |
05b7639d PNA |
559 | } else if (!nft_set_elem_active(&rbe->ext, genmask)) { |
560 | parent = parent->rb_left; | |
561 | continue; | |
e701001e | 562 | } |
1ba1c414 | 563 | nft_rbtree_flush(net, set, rbe); |
cc02e457 | 564 | return rbe; |
20a69341 PM |
565 | } |
566 | } | |
cc02e457 | 567 | return NULL; |
20a69341 PM |
568 | } |
569 | ||
570 | static void nft_rbtree_walk(const struct nft_ctx *ctx, | |
de70185d | 571 | struct nft_set *set, |
20a69341 PM |
572 | struct nft_set_iter *iter) |
573 | { | |
03e5fd0e | 574 | struct nft_rbtree *priv = nft_set_priv(set); |
fe2811eb | 575 | struct nft_rbtree_elem *rbe; |
20a69341 PM |
576 | struct nft_set_elem elem; |
577 | struct rb_node *node; | |
578 | ||
03e5fd0e | 579 | read_lock_bh(&priv->lock); |
20a69341 | 580 | for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { |
cc02e457 PM |
581 | rbe = rb_entry(node, struct nft_rbtree_elem, node); |
582 | ||
20a69341 PM |
583 | if (iter->count < iter->skip) |
584 | goto cont; | |
8588ac09 | 585 | if (!nft_set_elem_active(&rbe->ext, iter->genmask)) |
cc02e457 | 586 | goto cont; |
20a69341 | 587 | |
fe2811eb | 588 | elem.priv = rbe; |
20a69341 PM |
589 | |
590 | iter->err = iter->fn(ctx, set, iter, &elem); | |
7632667d | 591 | if (iter->err < 0) { |
03e5fd0e | 592 | read_unlock_bh(&priv->lock); |
20a69341 | 593 | return; |
7632667d | 594 | } |
20a69341 PM |
595 | cont: |
596 | iter->count++; | |
597 | } | |
03e5fd0e | 598 | read_unlock_bh(&priv->lock); |
20a69341 PM |
599 | } |
600 | ||
8d8540c4 PNA |
601 | static void nft_rbtree_gc(struct work_struct *work) |
602 | { | |
f6c383b8 PNA |
603 | struct nft_rbtree_elem *rbe, *rbe_end = NULL; |
604 | struct nftables_pernet *nft_net; | |
8d8540c4 | 605 | struct nft_rbtree *priv; |
f6c383b8 | 606 | struct nft_trans_gc *gc; |
a13f814a | 607 | struct rb_node *node; |
8d8540c4 | 608 | struct nft_set *set; |
f6c383b8 | 609 | unsigned int gc_seq; |
5d235d6c | 610 | struct net *net; |
8d8540c4 PNA |
611 | |
612 | priv = container_of(work, struct nft_rbtree, gc_work.work); | |
613 | set = nft_set_container_of(priv); | |
5d235d6c | 614 | net = read_pnet(&set->net); |
f6c383b8 PNA |
615 | nft_net = nft_pernet(net); |
616 | gc_seq = READ_ONCE(nft_net->gc_seq); | |
617 | ||
8e51830e FW |
618 | if (nft_set_gc_is_pending(set)) |
619 | goto done; | |
620 | ||
f6c383b8 PNA |
621 | gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); |
622 | if (!gc) | |
623 | goto done; | |
8d8540c4 | 624 | |
96b33300 | 625 | read_lock_bh(&priv->lock); |
8d8540c4 | 626 | for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { |
f6c383b8 PNA |
627 | |
628 | /* Ruleset has been updated, try later. */ | |
629 | if (READ_ONCE(nft_net->gc_seq) != gc_seq) { | |
630 | nft_trans_gc_destroy(gc); | |
631 | gc = NULL; | |
632 | goto try_later; | |
633 | } | |
634 | ||
8d8540c4 PNA |
635 | rbe = rb_entry(node, struct nft_rbtree_elem, node); |
636 | ||
f6c383b8 PNA |
637 | if (nft_set_elem_is_dead(&rbe->ext)) |
638 | goto dead_elem; | |
5d235d6c PNA |
639 | |
640 | /* elements are reversed in the rbtree for historical reasons, | |
641 | * from highest to lowest value, that is why end element is | |
642 | * always visited before the start element. | |
643 | */ | |
8d8540c4 | 644 | if (nft_rbtree_interval_end(rbe)) { |
a13f814a | 645 | rbe_end = rbe; |
8d8540c4 PNA |
646 | continue; |
647 | } | |
648 | if (!nft_set_elem_expired(&rbe->ext)) | |
649 | continue; | |
5d235d6c | 650 | |
f6c383b8 PNA |
651 | nft_set_elem_dead(&rbe->ext); |
652 | ||
653 | if (!rbe_end) | |
8d8540c4 PNA |
654 | continue; |
655 | ||
f6c383b8 | 656 | nft_set_elem_dead(&rbe_end->ext); |
8d8540c4 | 657 | |
f6c383b8 PNA |
658 | gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); |
659 | if (!gc) | |
660 | goto try_later; | |
8d8540c4 | 661 | |
f6c383b8 PNA |
662 | nft_trans_gc_elem_add(gc, rbe_end); |
663 | rbe_end = NULL; | |
664 | dead_elem: | |
665 | gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); | |
666 | if (!gc) | |
667 | goto try_later; | |
668 | ||
669 | nft_trans_gc_elem_add(gc, rbe); | |
8d8540c4 | 670 | } |
f6c383b8 | 671 | |
4a9e12ea | 672 | gc = nft_trans_gc_catchall_async(gc, gc_seq); |
f6c383b8 PNA |
673 | |
674 | try_later: | |
96b33300 | 675 | read_unlock_bh(&priv->lock); |
8d8540c4 | 676 | |
f6c383b8 PNA |
677 | if (gc) |
678 | nft_trans_gc_queue_async_done(gc); | |
679 | done: | |
8d8540c4 PNA |
680 | queue_delayed_work(system_power_efficient_wq, &priv->gc_work, |
681 | nft_set_gc_interval(set)); | |
682 | } | |
683 | ||
4ef360dd TY |
684 | static u64 nft_rbtree_privsize(const struct nlattr * const nla[], |
685 | const struct nft_set_desc *desc) | |
20a69341 PM |
686 | { |
687 | return sizeof(struct nft_rbtree); | |
688 | } | |
689 | ||
690 | static int nft_rbtree_init(const struct nft_set *set, | |
c50b960c | 691 | const struct nft_set_desc *desc, |
20a69341 PM |
692 | const struct nlattr * const nla[]) |
693 | { | |
694 | struct nft_rbtree *priv = nft_set_priv(set); | |
695 | ||
03e5fd0e | 696 | rwlock_init(&priv->lock); |
b901892b | 697 | seqcount_rwlock_init(&priv->count, &priv->lock); |
20a69341 | 698 | priv->root = RB_ROOT; |
8d8540c4 PNA |
699 | |
700 | INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc); | |
701 | if (set->flags & NFT_SET_TIMEOUT) | |
702 | queue_delayed_work(system_power_efficient_wq, &priv->gc_work, | |
703 | nft_set_gc_interval(set)); | |
704 | ||
20a69341 PM |
705 | return 0; |
706 | } | |
707 | ||
628bd3e4 PNA |
708 | static void nft_rbtree_destroy(const struct nft_ctx *ctx, |
709 | const struct nft_set *set) | |
20a69341 PM |
710 | { |
711 | struct nft_rbtree *priv = nft_set_priv(set); | |
712 | struct nft_rbtree_elem *rbe; | |
713 | struct rb_node *node; | |
714 | ||
8d8540c4 | 715 | cancel_delayed_work_sync(&priv->gc_work); |
c293ac95 | 716 | rcu_barrier(); |
20a69341 PM |
717 | while ((node = priv->root.rb_node) != NULL) { |
718 | rb_erase(node, &priv->root); | |
719 | rbe = rb_entry(node, struct nft_rbtree_elem, node); | |
628bd3e4 | 720 | nf_tables_set_elem_destroy(ctx, set, rbe); |
20a69341 PM |
721 | } |
722 | } | |
723 | ||
c50b960c PM |
724 | static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, |
725 | struct nft_set_estimate *est) | |
726 | { | |
f3a2181e SB |
727 | if (desc->field_count > 1) |
728 | return false; | |
729 | ||
c50b960c | 730 | if (desc->size) |
080ed636 PNA |
731 | est->size = sizeof(struct nft_rbtree) + |
732 | desc->size * sizeof(struct nft_rbtree_elem); | |
c50b960c | 733 | else |
080ed636 | 734 | est->size = ~0; |
c50b960c | 735 | |
55af753c | 736 | est->lookup = NFT_SET_CLASS_O_LOG_N; |
0b5a7874 | 737 | est->space = NFT_SET_CLASS_O_N; |
c50b960c PM |
738 | |
739 | return true; | |
740 | } | |
741 | ||
24d19826 | 742 | const struct nft_set_type nft_set_rbtree_type = { |
8d8540c4 | 743 | .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, |
71cc0873 PS |
744 | .ops = { |
745 | .privsize = nft_rbtree_privsize, | |
746 | .elemsize = offsetof(struct nft_rbtree_elem, ext), | |
747 | .estimate = nft_rbtree_estimate, | |
748 | .init = nft_rbtree_init, | |
749 | .destroy = nft_rbtree_destroy, | |
750 | .insert = nft_rbtree_insert, | |
751 | .remove = nft_rbtree_remove, | |
752 | .deactivate = nft_rbtree_deactivate, | |
753 | .flush = nft_rbtree_flush, | |
754 | .activate = nft_rbtree_activate, | |
755 | .lookup = nft_rbtree_lookup, | |
756 | .walk = nft_rbtree_walk, | |
757 | .get = nft_rbtree_get, | |
758 | }, | |
20a69341 | 759 | }; |