ASoC: mediatek: update MT2701 AFE documentation to adapt mfd device
[linux-2.6-block.git] / net / sched / cls_u32.c
1 /*
2  * net/sched/cls_u32.c  Ugly (or Universal) 32bit key Packet Classifier.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  *      The filters are packed to hash tables of key nodes
12  *      with a set of 32bit key/mask pairs at every node.
13  *      Nodes reference next level hash tables etc.
14  *
15  *      This scheme is the best universal classifier I managed to
16  *      invent; it is not super-fast, but it is not slow (provided you
17  *      program it correctly), and general enough.  And its relative
18  *      speed grows as the number of rules becomes larger.
19  *
20  *      It seems that it represents the best middle point between
21  *      speed and manageability both by human and by machine.
22  *
23  *      It is especially useful for link sharing combined with QoS;
24  *      pure RSVP doesn't need such a general approach and can use
25  *      much simpler (and faster) schemes, sort of cls_rsvp.c.
26  *
27  *      JHS: We should remove the CONFIG_NET_CLS_IND from here
28  *      eventually when the meta match extension is made available
29  *
30  *      nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
31  */
32
33 #include <linux/module.h>
34 #include <linux/slab.h>
35 #include <linux/types.h>
36 #include <linux/kernel.h>
37 #include <linux/string.h>
38 #include <linux/errno.h>
39 #include <linux/percpu.h>
40 #include <linux/rtnetlink.h>
41 #include <linux/skbuff.h>
42 #include <linux/bitmap.h>
43 #include <linux/netdevice.h>
44 #include <linux/hash.h>
45 #include <net/netlink.h>
46 #include <net/act_api.h>
47 #include <net/pkt_cls.h>
48 #include <linux/netdevice.h>
49 #include <linux/idr.h>
50
51 struct tc_u_knode {
52         struct tc_u_knode __rcu *next;
53         u32                     handle;
54         struct tc_u_hnode __rcu *ht_up;
55         struct tcf_exts         exts;
56 #ifdef CONFIG_NET_CLS_IND
57         int                     ifindex;
58 #endif
59         u8                      fshift;
60         struct tcf_result       res;
61         struct tc_u_hnode __rcu *ht_down;
62 #ifdef CONFIG_CLS_U32_PERF
63         struct tc_u32_pcnt __percpu *pf;
64 #endif
65         u32                     flags;
66 #ifdef CONFIG_CLS_U32_MARK
67         u32                     val;
68         u32                     mask;
69         u32 __percpu            *pcpu_success;
70 #endif
71         struct tcf_proto        *tp;
72         union {
73                 struct work_struct      work;
74                 struct rcu_head         rcu;
75         };
76         /* The 'sel' field MUST be the last field in structure to allow for
77          * tc_u32_keys allocated at end of structure.
78          */
79         struct tc_u32_sel       sel;
80 };
81
82 struct tc_u_hnode {
83         struct tc_u_hnode __rcu *next;
84         u32                     handle;
85         u32                     prio;
86         struct tc_u_common      *tp_c;
87         int                     refcnt;
88         unsigned int            divisor;
89         struct idr              handle_idr;
90         struct rcu_head         rcu;
91         /* The 'ht' field MUST be the last field in structure to allow for
92          * more entries allocated at end of structure.
93          */
94         struct tc_u_knode __rcu *ht[1];
95 };
96
97 struct tc_u_common {
98         struct tc_u_hnode __rcu *hlist;
99         struct tcf_block        *block;
100         int                     refcnt;
101         struct idr              handle_idr;
102         struct hlist_node       hnode;
103         struct rcu_head         rcu;
104 };
105
106 static inline unsigned int u32_hash_fold(__be32 key,
107                                          const struct tc_u32_sel *sel,
108                                          u8 fshift)
109 {
110         unsigned int h = ntohl(key & sel->hmask) >> fshift;
111
112         return h;
113 }
114
115 static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
116                         struct tcf_result *res)
117 {
118         struct {
119                 struct tc_u_knode *knode;
120                 unsigned int      off;
121         } stack[TC_U32_MAXDEPTH];
122
123         struct tc_u_hnode *ht = rcu_dereference_bh(tp->root);
124         unsigned int off = skb_network_offset(skb);
125         struct tc_u_knode *n;
126         int sdepth = 0;
127         int off2 = 0;
128         int sel = 0;
129 #ifdef CONFIG_CLS_U32_PERF
130         int j;
131 #endif
132         int i, r;
133
134 next_ht:
135         n = rcu_dereference_bh(ht->ht[sel]);
136
137 next_knode:
138         if (n) {
139                 struct tc_u32_key *key = n->sel.keys;
140
141 #ifdef CONFIG_CLS_U32_PERF
142                 __this_cpu_inc(n->pf->rcnt);
143                 j = 0;
144 #endif
145
146                 if (tc_skip_sw(n->flags)) {
147                         n = rcu_dereference_bh(n->next);
148                         goto next_knode;
149                 }
150
151 #ifdef CONFIG_CLS_U32_MARK
152                 if ((skb->mark & n->mask) != n->val) {
153                         n = rcu_dereference_bh(n->next);
154                         goto next_knode;
155                 } else {
156                         __this_cpu_inc(*n->pcpu_success);
157                 }
158 #endif
159
160                 for (i = n->sel.nkeys; i > 0; i--, key++) {
161                         int toff = off + key->off + (off2 & key->offmask);
162                         __be32 *data, hdata;
163
164                         if (skb_headroom(skb) + toff > INT_MAX)
165                                 goto out;
166
167                         data = skb_header_pointer(skb, toff, 4, &hdata);
168                         if (!data)
169                                 goto out;
170                         if ((*data ^ key->val) & key->mask) {
171                                 n = rcu_dereference_bh(n->next);
172                                 goto next_knode;
173                         }
174 #ifdef CONFIG_CLS_U32_PERF
175                         __this_cpu_inc(n->pf->kcnts[j]);
176                         j++;
177 #endif
178                 }
179
180                 ht = rcu_dereference_bh(n->ht_down);
181                 if (!ht) {
182 check_terminal:
183                         if (n->sel.flags & TC_U32_TERMINAL) {
184
185                                 *res = n->res;
186 #ifdef CONFIG_NET_CLS_IND
187                                 if (!tcf_match_indev(skb, n->ifindex)) {
188                                         n = rcu_dereference_bh(n->next);
189                                         goto next_knode;
190                                 }
191 #endif
192 #ifdef CONFIG_CLS_U32_PERF
193                                 __this_cpu_inc(n->pf->rhit);
194 #endif
195                                 r = tcf_exts_exec(skb, &n->exts, res);
196                                 if (r < 0) {
197                                         n = rcu_dereference_bh(n->next);
198                                         goto next_knode;
199                                 }
200
201                                 return r;
202                         }
203                         n = rcu_dereference_bh(n->next);
204                         goto next_knode;
205                 }
206
207                 /* PUSH */
208                 if (sdepth >= TC_U32_MAXDEPTH)
209                         goto deadloop;
210                 stack[sdepth].knode = n;
211                 stack[sdepth].off = off;
212                 sdepth++;
213
214                 ht = rcu_dereference_bh(n->ht_down);
215                 sel = 0;
216                 if (ht->divisor) {
217                         __be32 *data, hdata;
218
219                         data = skb_header_pointer(skb, off + n->sel.hoff, 4,
220                                                   &hdata);
221                         if (!data)
222                                 goto out;
223                         sel = ht->divisor & u32_hash_fold(*data, &n->sel,
224                                                           n->fshift);
225                 }
226                 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
227                         goto next_ht;
228
229                 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
230                         off2 = n->sel.off + 3;
231                         if (n->sel.flags & TC_U32_VAROFFSET) {
232                                 __be16 *data, hdata;
233
234                                 data = skb_header_pointer(skb,
235                                                           off + n->sel.offoff,
236                                                           2, &hdata);
237                                 if (!data)
238                                         goto out;
239                                 off2 += ntohs(n->sel.offmask & *data) >>
240                                         n->sel.offshift;
241                         }
242                         off2 &= ~3;
243                 }
244                 if (n->sel.flags & TC_U32_EAT) {
245                         off += off2;
246                         off2 = 0;
247                 }
248
249                 if (off < skb->len)
250                         goto next_ht;
251         }
252
253         /* POP */
254         if (sdepth--) {
255                 n = stack[sdepth].knode;
256                 ht = rcu_dereference_bh(n->ht_up);
257                 off = stack[sdepth].off;
258                 goto check_terminal;
259         }
260 out:
261         return -1;
262
263 deadloop:
264         net_warn_ratelimited("cls_u32: dead loop\n");
265         return -1;
266 }
267
268 static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
269 {
270         struct tc_u_hnode *ht;
271
272         for (ht = rtnl_dereference(tp_c->hlist);
273              ht;
274              ht = rtnl_dereference(ht->next))
275                 if (ht->handle == handle)
276                         break;
277
278         return ht;
279 }
280
281 static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
282 {
283         unsigned int sel;
284         struct tc_u_knode *n = NULL;
285
286         sel = TC_U32_HASH(handle);
287         if (sel > ht->divisor)
288                 goto out;
289
290         for (n = rtnl_dereference(ht->ht[sel]);
291              n;
292              n = rtnl_dereference(n->next))
293                 if (n->handle == handle)
294                         break;
295 out:
296         return n;
297 }
298
299
300 static void *u32_get(struct tcf_proto *tp, u32 handle)
301 {
302         struct tc_u_hnode *ht;
303         struct tc_u_common *tp_c = tp->data;
304
305         if (TC_U32_HTID(handle) == TC_U32_ROOT)
306                 ht = rtnl_dereference(tp->root);
307         else
308                 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
309
310         if (!ht)
311                 return NULL;
312
313         if (TC_U32_KEY(handle) == 0)
314                 return ht;
315
316         return u32_lookup_key(ht, handle);
317 }
318
319 static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
320 {
321         unsigned long idr_index;
322         int err;
323
324         /* This is only used inside rtnl lock it is safe to increment
325          * without read _copy_ update semantics
326          */
327         err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
328                             1, 0x7FF, GFP_KERNEL);
329         if (err)
330                 return 0;
331         return (u32)(idr_index | 0x800) << 20;
332 }
333
334 static struct hlist_head *tc_u_common_hash;
335
336 #define U32_HASH_SHIFT 10
337 #define U32_HASH_SIZE (1 << U32_HASH_SHIFT)
338
339 static unsigned int tc_u_hash(const struct tcf_proto *tp)
340 {
341         return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
342 }
343
344 static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
345 {
346         struct tc_u_common *tc;
347         unsigned int h;
348
349         h = tc_u_hash(tp);
350         hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
351                 if (tc->block == tp->chain->block)
352                         return tc;
353         }
354         return NULL;
355 }
356
357 static int u32_init(struct tcf_proto *tp)
358 {
359         struct tc_u_hnode *root_ht;
360         struct tc_u_common *tp_c;
361         unsigned int h;
362
363         tp_c = tc_u_common_find(tp);
364
365         root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
366         if (root_ht == NULL)
367                 return -ENOBUFS;
368
369         root_ht->refcnt++;
370         root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
371         root_ht->prio = tp->prio;
372         idr_init(&root_ht->handle_idr);
373
374         if (tp_c == NULL) {
375                 tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
376                 if (tp_c == NULL) {
377                         kfree(root_ht);
378                         return -ENOBUFS;
379                 }
380                 tp_c->block = tp->chain->block;
381                 INIT_HLIST_NODE(&tp_c->hnode);
382                 idr_init(&tp_c->handle_idr);
383
384                 h = tc_u_hash(tp);
385                 hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
386         }
387
388         tp_c->refcnt++;
389         RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
390         rcu_assign_pointer(tp_c->hlist, root_ht);
391         root_ht->tp_c = tp_c;
392
393         rcu_assign_pointer(tp->root, root_ht);
394         tp->data = tp_c;
395         return 0;
396 }
397
398 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
399                            bool free_pf)
400 {
401         tcf_exts_destroy(&n->exts);
402         tcf_exts_put_net(&n->exts);
403         if (n->ht_down)
404                 n->ht_down->refcnt--;
405 #ifdef CONFIG_CLS_U32_PERF
406         if (free_pf)
407                 free_percpu(n->pf);
408 #endif
409 #ifdef CONFIG_CLS_U32_MARK
410         if (free_pf)
411                 free_percpu(n->pcpu_success);
412 #endif
413         kfree(n);
414         return 0;
415 }
416
417 /* u32_delete_key_rcu should be called when free'ing a copied
418  * version of a tc_u_knode obtained from u32_init_knode(). When
419  * copies are obtained from u32_init_knode() the statistics are
420  * shared between the old and new copies to allow readers to
421  * continue to update the statistics during the copy. To support
422  * this the u32_delete_key_rcu variant does not free the percpu
423  * statistics.
424  */
425 static void u32_delete_key_work(struct work_struct *work)
426 {
427         struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
428
429         rtnl_lock();
430         u32_destroy_key(key->tp, key, false);
431         rtnl_unlock();
432 }
433
434 static void u32_delete_key_rcu(struct rcu_head *rcu)
435 {
436         struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
437
438         INIT_WORK(&key->work, u32_delete_key_work);
439         tcf_queue_work(&key->work);
440 }
441
442 /* u32_delete_key_freepf_rcu is the rcu callback variant
443  * that free's the entire structure including the statistics
444  * percpu variables. Only use this if the key is not a copy
445  * returned by u32_init_knode(). See u32_delete_key_rcu()
446  * for the variant that should be used with keys return from
447  * u32_init_knode()
448  */
449 static void u32_delete_key_freepf_work(struct work_struct *work)
450 {
451         struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
452
453         rtnl_lock();
454         u32_destroy_key(key->tp, key, true);
455         rtnl_unlock();
456 }
457
458 static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
459 {
460         struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
461
462         INIT_WORK(&key->work, u32_delete_key_freepf_work);
463         tcf_queue_work(&key->work);
464 }
465
466 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
467 {
468         struct tc_u_knode __rcu **kp;
469         struct tc_u_knode *pkp;
470         struct tc_u_hnode *ht = rtnl_dereference(key->ht_up);
471
472         if (ht) {
473                 kp = &ht->ht[TC_U32_HASH(key->handle)];
474                 for (pkp = rtnl_dereference(*kp); pkp;
475                      kp = &pkp->next, pkp = rtnl_dereference(*kp)) {
476                         if (pkp == key) {
477                                 RCU_INIT_POINTER(*kp, key->next);
478
479                                 tcf_unbind_filter(tp, &key->res);
480                                 tcf_exts_get_net(&key->exts);
481                                 call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
482                                 return 0;
483                         }
484                 }
485         }
486         WARN_ON(1);
487         return 0;
488 }
489
490 static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
491 {
492         struct tcf_block *block = tp->chain->block;
493         struct tc_cls_u32_offload cls_u32 = {};
494
495         tc_cls_common_offload_init(&cls_u32.common, tp);
496         cls_u32.command = TC_CLSU32_DELETE_HNODE;
497         cls_u32.hnode.divisor = h->divisor;
498         cls_u32.hnode.handle = h->handle;
499         cls_u32.hnode.prio = h->prio;
500
501         tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
502 }
503
504 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
505                                 u32 flags)
506 {
507         struct tcf_block *block = tp->chain->block;
508         struct tc_cls_u32_offload cls_u32 = {};
509         bool skip_sw = tc_skip_sw(flags);
510         bool offloaded = false;
511         int err;
512
513         tc_cls_common_offload_init(&cls_u32.common, tp);
514         cls_u32.command = TC_CLSU32_NEW_HNODE;
515         cls_u32.hnode.divisor = h->divisor;
516         cls_u32.hnode.handle = h->handle;
517         cls_u32.hnode.prio = h->prio;
518
519         err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
520         if (err < 0) {
521                 u32_clear_hw_hnode(tp, h);
522                 return err;
523         } else if (err > 0) {
524                 offloaded = true;
525         }
526
527         if (skip_sw && !offloaded)
528                 return -EINVAL;
529
530         return 0;
531 }
532
533 static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
534 {
535         struct tcf_block *block = tp->chain->block;
536         struct tc_cls_u32_offload cls_u32 = {};
537
538         tc_cls_common_offload_init(&cls_u32.common, tp);
539         cls_u32.command = TC_CLSU32_DELETE_KNODE;
540         cls_u32.knode.handle = handle;
541
542         tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
543 }
544
545 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
546                                 u32 flags)
547 {
548         struct tcf_block *block = tp->chain->block;
549         struct tc_cls_u32_offload cls_u32 = {};
550         bool skip_sw = tc_skip_sw(flags);
551         int err;
552
553         tc_cls_common_offload_init(&cls_u32.common, tp);
554         cls_u32.command = TC_CLSU32_REPLACE_KNODE;
555         cls_u32.knode.handle = n->handle;
556         cls_u32.knode.fshift = n->fshift;
557 #ifdef CONFIG_CLS_U32_MARK
558         cls_u32.knode.val = n->val;
559         cls_u32.knode.mask = n->mask;
560 #else
561         cls_u32.knode.val = 0;
562         cls_u32.knode.mask = 0;
563 #endif
564         cls_u32.knode.sel = &n->sel;
565         cls_u32.knode.exts = &n->exts;
566         if (n->ht_down)
567                 cls_u32.knode.link_handle = n->ht_down->handle;
568
569         err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
570         if (err < 0) {
571                 u32_remove_hw_knode(tp, n->handle);
572                 return err;
573         } else if (err > 0) {
574                 n->flags |= TCA_CLS_FLAGS_IN_HW;
575         }
576
577         if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
578                 return -EINVAL;
579
580         return 0;
581 }
582
583 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
584 {
585         struct tc_u_knode *n;
586         unsigned int h;
587
588         for (h = 0; h <= ht->divisor; h++) {
589                 while ((n = rtnl_dereference(ht->ht[h])) != NULL) {
590                         RCU_INIT_POINTER(ht->ht[h],
591                                          rtnl_dereference(n->next));
592                         tcf_unbind_filter(tp, &n->res);
593                         u32_remove_hw_knode(tp, n->handle);
594                         idr_remove_ext(&ht->handle_idr, n->handle);
595                         if (tcf_exts_get_net(&n->exts))
596                                 call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
597                         else
598                                 u32_destroy_key(n->tp, n, true);
599                 }
600         }
601 }
602
603 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
604 {
605         struct tc_u_common *tp_c = tp->data;
606         struct tc_u_hnode __rcu **hn;
607         struct tc_u_hnode *phn;
608
609         WARN_ON(ht->refcnt);
610
611         u32_clear_hnode(tp, ht);
612
613         hn = &tp_c->hlist;
614         for (phn = rtnl_dereference(*hn);
615              phn;
616              hn = &phn->next, phn = rtnl_dereference(*hn)) {
617                 if (phn == ht) {
618                         u32_clear_hw_hnode(tp, ht);
619                         idr_destroy(&ht->handle_idr);
620                         idr_remove_ext(&tp_c->handle_idr, ht->handle);
621                         RCU_INIT_POINTER(*hn, ht->next);
622                         kfree_rcu(ht, rcu);
623                         return 0;
624                 }
625         }
626
627         return -ENOENT;
628 }
629
630 static bool ht_empty(struct tc_u_hnode *ht)
631 {
632         unsigned int h;
633
634         for (h = 0; h <= ht->divisor; h++)
635                 if (rcu_access_pointer(ht->ht[h]))
636                         return false;
637
638         return true;
639 }
640
641 static void u32_destroy(struct tcf_proto *tp)
642 {
643         struct tc_u_common *tp_c = tp->data;
644         struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
645
646         WARN_ON(root_ht == NULL);
647
648         if (root_ht && --root_ht->refcnt == 0)
649                 u32_destroy_hnode(tp, root_ht);
650
651         if (--tp_c->refcnt == 0) {
652                 struct tc_u_hnode *ht;
653
654                 hlist_del(&tp_c->hnode);
655
656                 for (ht = rtnl_dereference(tp_c->hlist);
657                      ht;
658                      ht = rtnl_dereference(ht->next)) {
659                         ht->refcnt--;
660                         u32_clear_hnode(tp, ht);
661                 }
662
663                 while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
664                         RCU_INIT_POINTER(tp_c->hlist, ht->next);
665                         kfree_rcu(ht, rcu);
666                 }
667
668                 idr_destroy(&tp_c->handle_idr);
669                 kfree(tp_c);
670         }
671
672         tp->data = NULL;
673 }
674
675 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
676 {
677         struct tc_u_hnode *ht = arg;
678         struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
679         struct tc_u_common *tp_c = tp->data;
680         int ret = 0;
681
682         if (ht == NULL)
683                 goto out;
684
685         if (TC_U32_KEY(ht->handle)) {
686                 u32_remove_hw_knode(tp, ht->handle);
687                 ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
688                 goto out;
689         }
690
691         if (root_ht == ht)
692                 return -EINVAL;
693
694         if (ht->refcnt == 1) {
695                 ht->refcnt--;
696                 u32_destroy_hnode(tp, ht);
697         } else {
698                 return -EBUSY;
699         }
700
701 out:
702         *last = true;
703         if (root_ht) {
704                 if (root_ht->refcnt > 1) {
705                         *last = false;
706                         goto ret;
707                 }
708                 if (root_ht->refcnt == 1) {
709                         if (!ht_empty(root_ht)) {
710                                 *last = false;
711                                 goto ret;
712                         }
713                 }
714         }
715
716         if (tp_c->refcnt > 1) {
717                 *last = false;
718                 goto ret;
719         }
720
721         if (tp_c->refcnt == 1) {
722                 struct tc_u_hnode *ht;
723
724                 for (ht = rtnl_dereference(tp_c->hlist);
725                      ht;
726                      ht = rtnl_dereference(ht->next))
727                         if (!ht_empty(ht)) {
728                                 *last = false;
729                                 break;
730                         }
731         }
732
733 ret:
734         return ret;
735 }
736
737 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
738 {
739         unsigned long idr_index;
740         u32 start = htid | 0x800;
741         u32 max = htid | 0xFFF;
742         u32 min = htid;
743
744         if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
745                           start, max + 1, GFP_KERNEL)) {
746                 if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
747                                   min + 1, max + 1, GFP_KERNEL))
748                         return max;
749         }
750
751         return (u32)idr_index;
752 }
753
754 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
755         [TCA_U32_CLASSID]       = { .type = NLA_U32 },
756         [TCA_U32_HASH]          = { .type = NLA_U32 },
757         [TCA_U32_LINK]          = { .type = NLA_U32 },
758         [TCA_U32_DIVISOR]       = { .type = NLA_U32 },
759         [TCA_U32_SEL]           = { .len = sizeof(struct tc_u32_sel) },
760         [TCA_U32_INDEV]         = { .type = NLA_STRING, .len = IFNAMSIZ },
761         [TCA_U32_MARK]          = { .len = sizeof(struct tc_u32_mark) },
762         [TCA_U32_FLAGS]         = { .type = NLA_U32 },
763 };
764
765 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
766                          unsigned long base, struct tc_u_hnode *ht,
767                          struct tc_u_knode *n, struct nlattr **tb,
768                          struct nlattr *est, bool ovr)
769 {
770         int err;
771
772         err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr);
773         if (err < 0)
774                 return err;
775
776         if (tb[TCA_U32_LINK]) {
777                 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
778                 struct tc_u_hnode *ht_down = NULL, *ht_old;
779
780                 if (TC_U32_KEY(handle))
781                         return -EINVAL;
782
783                 if (handle) {
784                         ht_down = u32_lookup_ht(ht->tp_c, handle);
785
786                         if (ht_down == NULL)
787                                 return -EINVAL;
788                         ht_down->refcnt++;
789                 }
790
791                 ht_old = rtnl_dereference(n->ht_down);
792                 rcu_assign_pointer(n->ht_down, ht_down);
793
794                 if (ht_old)
795                         ht_old->refcnt--;
796         }
797         if (tb[TCA_U32_CLASSID]) {
798                 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
799                 tcf_bind_filter(tp, &n->res, base);
800         }
801
802 #ifdef CONFIG_NET_CLS_IND
803         if (tb[TCA_U32_INDEV]) {
804                 int ret;
805                 ret = tcf_change_indev(net, tb[TCA_U32_INDEV]);
806                 if (ret < 0)
807                         return -EINVAL;
808                 n->ifindex = ret;
809         }
810 #endif
811         return 0;
812 }
813
814 static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
815                               struct tc_u_knode *n)
816 {
817         struct tc_u_knode __rcu **ins;
818         struct tc_u_knode *pins;
819         struct tc_u_hnode *ht;
820
821         if (TC_U32_HTID(n->handle) == TC_U32_ROOT)
822                 ht = rtnl_dereference(tp->root);
823         else
824                 ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle));
825
826         ins = &ht->ht[TC_U32_HASH(n->handle)];
827
828         /* The node must always exist for it to be replaced if this is not the
829          * case then something went very wrong elsewhere.
830          */
831         for (pins = rtnl_dereference(*ins); ;
832              ins = &pins->next, pins = rtnl_dereference(*ins))
833                 if (pins->handle == n->handle)
834                         break;
835
836         idr_replace_ext(&ht->handle_idr, n, n->handle);
837         RCU_INIT_POINTER(n->next, pins->next);
838         rcu_assign_pointer(*ins, n);
839 }
840
841 static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
842                                          struct tc_u_knode *n)
843 {
844         struct tc_u_knode *new;
845         struct tc_u32_sel *s = &n->sel;
846
847         new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key),
848                       GFP_KERNEL);
849
850         if (!new)
851                 return NULL;
852
853         RCU_INIT_POINTER(new->next, n->next);
854         new->handle = n->handle;
855         RCU_INIT_POINTER(new->ht_up, n->ht_up);
856
857 #ifdef CONFIG_NET_CLS_IND
858         new->ifindex = n->ifindex;
859 #endif
860         new->fshift = n->fshift;
861         new->res = n->res;
862         new->flags = n->flags;
863         RCU_INIT_POINTER(new->ht_down, n->ht_down);
864
865         /* bump reference count as long as we hold pointer to structure */
866         if (new->ht_down)
867                 new->ht_down->refcnt++;
868
869 #ifdef CONFIG_CLS_U32_PERF
870         /* Statistics may be incremented by readers during update
871          * so we must keep them in tact. When the node is later destroyed
872          * a special destroy call must be made to not free the pf memory.
873          */
874         new->pf = n->pf;
875 #endif
876
877 #ifdef CONFIG_CLS_U32_MARK
878         new->val = n->val;
879         new->mask = n->mask;
880         /* Similarly success statistics must be moved as pointers */
881         new->pcpu_success = n->pcpu_success;
882 #endif
883         new->tp = tp;
884         memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
885
886         if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
887                 kfree(new);
888                 return NULL;
889         }
890
891         return new;
892 }
893
894 static int u32_change(struct net *net, struct sk_buff *in_skb,
895                       struct tcf_proto *tp, unsigned long base, u32 handle,
896                       struct nlattr **tca, void **arg, bool ovr)
897 {
898         struct tc_u_common *tp_c = tp->data;
899         struct tc_u_hnode *ht;
900         struct tc_u_knode *n;
901         struct tc_u32_sel *s;
902         struct nlattr *opt = tca[TCA_OPTIONS];
903         struct nlattr *tb[TCA_U32_MAX + 1];
904         u32 htid, flags = 0;
905         int err;
906 #ifdef CONFIG_CLS_U32_PERF
907         size_t size;
908 #endif
909
910         if (opt == NULL)
911                 return handle ? -EINVAL : 0;
912
913         err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy, NULL);
914         if (err < 0)
915                 return err;
916
917         if (tb[TCA_U32_FLAGS]) {
918                 flags = nla_get_u32(tb[TCA_U32_FLAGS]);
919                 if (!tc_flags_valid(flags))
920                         return -EINVAL;
921         }
922
923         n = *arg;
924         if (n) {
925                 struct tc_u_knode *new;
926
927                 if (TC_U32_KEY(n->handle) == 0)
928                         return -EINVAL;
929
930                 if (n->flags != flags)
931                         return -EINVAL;
932
933                 new = u32_init_knode(tp, n);
934                 if (!new)
935                         return -ENOMEM;
936
937                 err = u32_set_parms(net, tp, base,
938                                     rtnl_dereference(n->ht_up), new, tb,
939                                     tca[TCA_RATE], ovr);
940
941                 if (err) {
942                         u32_destroy_key(tp, new, false);
943                         return err;
944                 }
945
946                 err = u32_replace_hw_knode(tp, new, flags);
947                 if (err) {
948                         u32_destroy_key(tp, new, false);
949                         return err;
950                 }
951
952                 if (!tc_in_hw(new->flags))
953                         new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
954
955                 u32_replace_knode(tp, tp_c, new);
956                 tcf_unbind_filter(tp, &n->res);
957                 tcf_exts_get_net(&n->exts);
958                 call_rcu(&n->rcu, u32_delete_key_rcu);
959                 return 0;
960         }
961
962         if (tb[TCA_U32_DIVISOR]) {
963                 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
964
965                 if (--divisor > 0x100)
966                         return -EINVAL;
967                 if (TC_U32_KEY(handle))
968                         return -EINVAL;
969                 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
970                 if (ht == NULL)
971                         return -ENOBUFS;
972                 if (handle == 0) {
973                         handle = gen_new_htid(tp->data, ht);
974                         if (handle == 0) {
975                                 kfree(ht);
976                                 return -ENOMEM;
977                         }
978                 } else {
979                         err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
980                                             handle, handle + 1, GFP_KERNEL);
981                         if (err) {
982                                 kfree(ht);
983                                 return err;
984                         }
985                 }
986                 ht->tp_c = tp_c;
987                 ht->refcnt = 1;
988                 ht->divisor = divisor;
989                 ht->handle = handle;
990                 ht->prio = tp->prio;
991                 idr_init(&ht->handle_idr);
992
993                 err = u32_replace_hw_hnode(tp, ht, flags);
994                 if (err) {
995                         idr_remove_ext(&tp_c->handle_idr, handle);
996                         kfree(ht);
997                         return err;
998                 }
999
1000                 RCU_INIT_POINTER(ht->next, tp_c->hlist);
1001                 rcu_assign_pointer(tp_c->hlist, ht);
1002                 *arg = ht;
1003
1004                 return 0;
1005         }
1006
1007         if (tb[TCA_U32_HASH]) {
1008                 htid = nla_get_u32(tb[TCA_U32_HASH]);
1009                 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
1010                         ht = rtnl_dereference(tp->root);
1011                         htid = ht->handle;
1012                 } else {
1013                         ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
1014                         if (ht == NULL)
1015                                 return -EINVAL;
1016                 }
1017         } else {
1018                 ht = rtnl_dereference(tp->root);
1019                 htid = ht->handle;
1020         }
1021
1022         if (ht->divisor < TC_U32_HASH(htid))
1023                 return -EINVAL;
1024
1025         if (handle) {
1026                 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
1027                         return -EINVAL;
1028                 handle = htid | TC_U32_NODE(handle);
1029                 err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
1030                                     handle, handle + 1,
1031                                     GFP_KERNEL);
1032                 if (err)
1033                         return err;
1034         } else
1035                 handle = gen_new_kid(ht, htid);
1036
1037         if (tb[TCA_U32_SEL] == NULL) {
1038                 err = -EINVAL;
1039                 goto erridr;
1040         }
1041
1042         s = nla_data(tb[TCA_U32_SEL]);
1043
1044         n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
1045         if (n == NULL) {
1046                 err = -ENOBUFS;
1047                 goto erridr;
1048         }
1049
1050 #ifdef CONFIG_CLS_U32_PERF
1051         size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
1052         n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
1053         if (!n->pf) {
1054                 err = -ENOBUFS;
1055                 goto errfree;
1056         }
1057 #endif
1058
1059         memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
1060         RCU_INIT_POINTER(n->ht_up, ht);
1061         n->handle = handle;
1062         n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
1063         n->flags = flags;
1064         n->tp = tp;
1065
1066         err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
1067         if (err < 0)
1068                 goto errout;
1069
1070 #ifdef CONFIG_CLS_U32_MARK
1071         n->pcpu_success = alloc_percpu(u32);
1072         if (!n->pcpu_success) {
1073                 err = -ENOMEM;
1074                 goto errout;
1075         }
1076
1077         if (tb[TCA_U32_MARK]) {
1078                 struct tc_u32_mark *mark;
1079
1080                 mark = nla_data(tb[TCA_U32_MARK]);
1081                 n->val = mark->val;
1082                 n->mask = mark->mask;
1083         }
1084 #endif
1085
1086         err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr);
1087         if (err == 0) {
1088                 struct tc_u_knode __rcu **ins;
1089                 struct tc_u_knode *pins;
1090
1091                 err = u32_replace_hw_knode(tp, n, flags);
1092                 if (err)
1093                         goto errhw;
1094
1095                 if (!tc_in_hw(n->flags))
1096                         n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1097
1098                 ins = &ht->ht[TC_U32_HASH(handle)];
1099                 for (pins = rtnl_dereference(*ins); pins;
1100                      ins = &pins->next, pins = rtnl_dereference(*ins))
1101                         if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle))
1102                                 break;
1103
1104                 RCU_INIT_POINTER(n->next, pins);
1105                 rcu_assign_pointer(*ins, n);
1106                 *arg = n;
1107                 return 0;
1108         }
1109
1110 errhw:
1111 #ifdef CONFIG_CLS_U32_MARK
1112         free_percpu(n->pcpu_success);
1113 #endif
1114
1115 errout:
1116         tcf_exts_destroy(&n->exts);
1117 #ifdef CONFIG_CLS_U32_PERF
1118 errfree:
1119         free_percpu(n->pf);
1120 #endif
1121         kfree(n);
1122 erridr:
1123         idr_remove_ext(&ht->handle_idr, handle);
1124         return err;
1125 }
1126
1127 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
1128 {
1129         struct tc_u_common *tp_c = tp->data;
1130         struct tc_u_hnode *ht;
1131         struct tc_u_knode *n;
1132         unsigned int h;
1133
1134         if (arg->stop)
1135                 return;
1136
1137         for (ht = rtnl_dereference(tp_c->hlist);
1138              ht;
1139              ht = rtnl_dereference(ht->next)) {
1140                 if (ht->prio != tp->prio)
1141                         continue;
1142                 if (arg->count >= arg->skip) {
1143                         if (arg->fn(tp, ht, arg) < 0) {
1144                                 arg->stop = 1;
1145                                 return;
1146                         }
1147                 }
1148                 arg->count++;
1149                 for (h = 0; h <= ht->divisor; h++) {
1150                         for (n = rtnl_dereference(ht->ht[h]);
1151                              n;
1152                              n = rtnl_dereference(n->next)) {
1153                                 if (arg->count < arg->skip) {
1154                                         arg->count++;
1155                                         continue;
1156                                 }
1157                                 if (arg->fn(tp, n, arg) < 0) {
1158                                         arg->stop = 1;
1159                                         return;
1160                                 }
1161                                 arg->count++;
1162                         }
1163                 }
1164         }
1165 }
1166
1167 static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
1168 {
1169         struct tc_u_knode *n = fh;
1170
1171         if (n && n->res.classid == classid)
1172                 n->res.class = cl;
1173 }
1174
1175 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
1176                     struct sk_buff *skb, struct tcmsg *t)
1177 {
1178         struct tc_u_knode *n = fh;
1179         struct tc_u_hnode *ht_up, *ht_down;
1180         struct nlattr *nest;
1181
1182         if (n == NULL)
1183                 return skb->len;
1184
1185         t->tcm_handle = n->handle;
1186
1187         nest = nla_nest_start(skb, TCA_OPTIONS);
1188         if (nest == NULL)
1189                 goto nla_put_failure;
1190
1191         if (TC_U32_KEY(n->handle) == 0) {
1192                 struct tc_u_hnode *ht = fh;
1193                 u32 divisor = ht->divisor + 1;
1194
1195                 if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
1196                         goto nla_put_failure;
1197         } else {
1198 #ifdef CONFIG_CLS_U32_PERF
1199                 struct tc_u32_pcnt *gpf;
1200                 int cpu;
1201 #endif
1202
1203                 if (nla_put(skb, TCA_U32_SEL,
1204                             sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
1205                             &n->sel))
1206                         goto nla_put_failure;
1207
1208                 ht_up = rtnl_dereference(n->ht_up);
1209                 if (ht_up) {
1210                         u32 htid = n->handle & 0xFFFFF000;
1211                         if (nla_put_u32(skb, TCA_U32_HASH, htid))
1212                                 goto nla_put_failure;
1213                 }
1214                 if (n->res.classid &&
1215                     nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
1216                         goto nla_put_failure;
1217
1218                 ht_down = rtnl_dereference(n->ht_down);
1219                 if (ht_down &&
1220                     nla_put_u32(skb, TCA_U32_LINK, ht_down->handle))
1221                         goto nla_put_failure;
1222
1223                 if (n->flags && nla_put_u32(skb, TCA_U32_FLAGS, n->flags))
1224                         goto nla_put_failure;
1225
1226 #ifdef CONFIG_CLS_U32_MARK
1227                 if ((n->val || n->mask)) {
1228                         struct tc_u32_mark mark = {.val = n->val,
1229                                                    .mask = n->mask,
1230                                                    .success = 0};
1231                         int cpum;
1232
1233                         for_each_possible_cpu(cpum) {
1234                                 __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum);
1235
1236                                 mark.success += cnt;
1237                         }
1238
1239                         if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark))
1240                                 goto nla_put_failure;
1241                 }
1242 #endif
1243
1244                 if (tcf_exts_dump(skb, &n->exts) < 0)
1245                         goto nla_put_failure;
1246
1247 #ifdef CONFIG_NET_CLS_IND
1248                 if (n->ifindex) {
1249                         struct net_device *dev;
1250                         dev = __dev_get_by_index(net, n->ifindex);
1251                         if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
1252                                 goto nla_put_failure;
1253                 }
1254 #endif
1255 #ifdef CONFIG_CLS_U32_PERF
1256                 gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
1257                               n->sel.nkeys * sizeof(u64),
1258                               GFP_KERNEL);
1259                 if (!gpf)
1260                         goto nla_put_failure;
1261
1262                 for_each_possible_cpu(cpu) {
1263                         int i;
1264                         struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu);
1265
1266                         gpf->rcnt += pf->rcnt;
1267                         gpf->rhit += pf->rhit;
1268                         for (i = 0; i < n->sel.nkeys; i++)
1269                                 gpf->kcnts[i] += pf->kcnts[i];
1270                 }
1271
1272                 if (nla_put_64bit(skb, TCA_U32_PCNT,
1273                                   sizeof(struct tc_u32_pcnt) +
1274                                   n->sel.nkeys * sizeof(u64),
1275                                   gpf, TCA_U32_PAD)) {
1276                         kfree(gpf);
1277                         goto nla_put_failure;
1278                 }
1279                 kfree(gpf);
1280 #endif
1281         }
1282
1283         nla_nest_end(skb, nest);
1284
1285         if (TC_U32_KEY(n->handle))
1286                 if (tcf_exts_dump_stats(skb, &n->exts) < 0)
1287                         goto nla_put_failure;
1288         return skb->len;
1289
1290 nla_put_failure:
1291         nla_nest_cancel(skb, nest);
1292         return -1;
1293 }
1294
1295 static struct tcf_proto_ops cls_u32_ops __read_mostly = {
1296         .kind           =       "u32",
1297         .classify       =       u32_classify,
1298         .init           =       u32_init,
1299         .destroy        =       u32_destroy,
1300         .get            =       u32_get,
1301         .change         =       u32_change,
1302         .delete         =       u32_delete,
1303         .walk           =       u32_walk,
1304         .dump           =       u32_dump,
1305         .bind_class     =       u32_bind_class,
1306         .owner          =       THIS_MODULE,
1307 };
1308
1309 static int __init init_u32(void)
1310 {
1311         int i, ret;
1312
1313         pr_info("u32 classifier\n");
1314 #ifdef CONFIG_CLS_U32_PERF
1315         pr_info("    Performance counters on\n");
1316 #endif
1317 #ifdef CONFIG_NET_CLS_IND
1318         pr_info("    input device check on\n");
1319 #endif
1320 #ifdef CONFIG_NET_CLS_ACT
1321         pr_info("    Actions configured\n");
1322 #endif
1323         tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE,
1324                                           sizeof(struct hlist_head),
1325                                           GFP_KERNEL);
1326         if (!tc_u_common_hash)
1327                 return -ENOMEM;
1328
1329         for (i = 0; i < U32_HASH_SIZE; i++)
1330                 INIT_HLIST_HEAD(&tc_u_common_hash[i]);
1331
1332         ret = register_tcf_proto_ops(&cls_u32_ops);
1333         if (ret)
1334                 kvfree(tc_u_common_hash);
1335         return ret;
1336 }
1337
1338 static void __exit exit_u32(void)
1339 {
1340         unregister_tcf_proto_ops(&cls_u32_ops);
1341         kvfree(tc_u_common_hash);
1342 }
1343
1344 module_init(init_u32)
1345 module_exit(exit_u32)
1346 MODULE_LICENSE("GPL");