Merge tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kernel.org/pub/scm...
[linux-block.git] / net / sched / act_skbedit.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2008, Intel Corporation.
4  *
5  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
6  */
7
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/skbuff.h>
12 #include <linux/rtnetlink.h>
13 #include <net/netlink.h>
14 #include <net/pkt_sched.h>
15 #include <net/ip.h>
16 #include <net/ipv6.h>
17 #include <net/dsfield.h>
18 #include <net/pkt_cls.h>
19 #include <net/tc_wrapper.h>
20
21 #include <linux/tc_act/tc_skbedit.h>
22 #include <net/tc_act/tc_skbedit.h>
23
24 static struct tc_action_ops act_skbedit_ops;
25
26 static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
27                             struct sk_buff *skb)
28 {
29         u16 queue_mapping = params->queue_mapping;
30
31         if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
32                 u32 hash = skb_get_hash(skb);
33
34                 queue_mapping += hash % params->mapping_mod;
35         }
36
37         return netdev_cap_txqueue(skb->dev, queue_mapping);
38 }
39
40 TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb,
41                                       const struct tc_action *a,
42                                       struct tcf_result *res)
43 {
44         struct tcf_skbedit *d = to_skbedit(a);
45         struct tcf_skbedit_params *params;
46         int action;
47
48         tcf_lastuse_update(&d->tcf_tm);
49         bstats_update(this_cpu_ptr(d->common.cpu_bstats), skb);
50
51         params = rcu_dereference_bh(d->params);
52         action = READ_ONCE(d->tcf_action);
53
54         if (params->flags & SKBEDIT_F_PRIORITY)
55                 skb->priority = params->priority;
56         if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
57                 int wlen = skb_network_offset(skb);
58
59                 switch (skb_protocol(skb, true)) {
60                 case htons(ETH_P_IP):
61                         wlen += sizeof(struct iphdr);
62                         if (!pskb_may_pull(skb, wlen))
63                                 goto err;
64                         skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
65                         break;
66
67                 case htons(ETH_P_IPV6):
68                         wlen += sizeof(struct ipv6hdr);
69                         if (!pskb_may_pull(skb, wlen))
70                                 goto err;
71                         skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
72                         break;
73                 }
74         }
75         if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
76             skb->dev->real_num_tx_queues > params->queue_mapping) {
77 #ifdef CONFIG_NET_EGRESS
78                 netdev_xmit_skip_txqueue(true);
79 #endif
80                 skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
81         }
82         if (params->flags & SKBEDIT_F_MARK) {
83                 skb->mark &= ~params->mask;
84                 skb->mark |= params->mark & params->mask;
85         }
86         if (params->flags & SKBEDIT_F_PTYPE)
87                 skb->pkt_type = params->ptype;
88         return action;
89
90 err:
91         qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
92         return TC_ACT_SHOT;
93 }
94
95 static void tcf_skbedit_stats_update(struct tc_action *a, u64 bytes,
96                                      u64 packets, u64 drops,
97                                      u64 lastuse, bool hw)
98 {
99         struct tcf_skbedit *d = to_skbedit(a);
100         struct tcf_t *tm = &d->tcf_tm;
101
102         tcf_action_update_stats(a, bytes, packets, drops, hw);
103         tm->lastuse = max_t(u64, tm->lastuse, lastuse);
104 }
105
106 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
107         [TCA_SKBEDIT_PARMS]             = { .len = sizeof(struct tc_skbedit) },
108         [TCA_SKBEDIT_PRIORITY]          = { .len = sizeof(u32) },
109         [TCA_SKBEDIT_QUEUE_MAPPING]     = { .len = sizeof(u16) },
110         [TCA_SKBEDIT_MARK]              = { .len = sizeof(u32) },
111         [TCA_SKBEDIT_PTYPE]             = { .len = sizeof(u16) },
112         [TCA_SKBEDIT_MASK]              = { .len = sizeof(u32) },
113         [TCA_SKBEDIT_FLAGS]             = { .len = sizeof(u64) },
114         [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) },
115 };
116
117 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
118                             struct nlattr *est, struct tc_action **a,
119                             struct tcf_proto *tp, u32 act_flags,
120                             struct netlink_ext_ack *extack)
121 {
122         struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
123         bool bind = act_flags & TCA_ACT_FLAGS_BIND;
124         struct tcf_skbedit_params *params_new;
125         struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
126         struct tcf_chain *goto_ch = NULL;
127         struct tc_skbedit *parm;
128         struct tcf_skbedit *d;
129         u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
130         u16 *queue_mapping = NULL, *ptype = NULL;
131         u16 mapping_mod = 1;
132         bool exists = false;
133         int ret = 0, err;
134         u32 index;
135
136         if (nla == NULL)
137                 return -EINVAL;
138
139         err = nla_parse_nested_deprecated(tb, TCA_SKBEDIT_MAX, nla,
140                                           skbedit_policy, NULL);
141         if (err < 0)
142                 return err;
143
144         if (tb[TCA_SKBEDIT_PARMS] == NULL)
145                 return -EINVAL;
146
147         if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
148                 flags |= SKBEDIT_F_PRIORITY;
149                 priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
150         }
151
152         if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
153                 if (is_tcf_skbedit_ingress(act_flags) &&
154                     !(act_flags & TCA_ACT_FLAGS_SKIP_SW)) {
155                         NL_SET_ERR_MSG_MOD(extack, "\"queue_mapping\" option on receive side is hardware only, use skip_sw");
156                         return -EOPNOTSUPP;
157                 }
158                 flags |= SKBEDIT_F_QUEUE_MAPPING;
159                 queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
160         }
161
162         if (tb[TCA_SKBEDIT_PTYPE] != NULL) {
163                 ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]);
164                 if (!skb_pkt_type_ok(*ptype))
165                         return -EINVAL;
166                 flags |= SKBEDIT_F_PTYPE;
167         }
168
169         if (tb[TCA_SKBEDIT_MARK] != NULL) {
170                 flags |= SKBEDIT_F_MARK;
171                 mark = nla_data(tb[TCA_SKBEDIT_MARK]);
172         }
173
174         if (tb[TCA_SKBEDIT_MASK] != NULL) {
175                 flags |= SKBEDIT_F_MASK;
176                 mask = nla_data(tb[TCA_SKBEDIT_MASK]);
177         }
178
179         if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
180                 u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
181
182                 if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
183                         u16 *queue_mapping_max;
184
185                         if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
186                             !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
187                                 NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
188                                 return -EINVAL;
189                         }
190
191                         queue_mapping_max =
192                                 nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
193                         if (*queue_mapping_max < *queue_mapping) {
194                                 NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
195                                 return -EINVAL;
196                         }
197
198                         mapping_mod = *queue_mapping_max - *queue_mapping + 1;
199                         flags |= SKBEDIT_F_TXQ_SKBHASH;
200                 }
201                 if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
202                         flags |= SKBEDIT_F_INHERITDSFIELD;
203         }
204
205         parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
206         index = parm->index;
207         err = tcf_idr_check_alloc(tn, &index, a, bind);
208         if (err < 0)
209                 return err;
210         exists = err;
211         if (exists && bind)
212                 return 0;
213
214         if (!flags) {
215                 if (exists)
216                         tcf_idr_release(*a, bind);
217                 else
218                         tcf_idr_cleanup(tn, index);
219                 return -EINVAL;
220         }
221
222         if (!exists) {
223                 ret = tcf_idr_create(tn, index, est, a,
224                                      &act_skbedit_ops, bind, true, act_flags);
225                 if (ret) {
226                         tcf_idr_cleanup(tn, index);
227                         return ret;
228                 }
229
230                 d = to_skbedit(*a);
231                 ret = ACT_P_CREATED;
232         } else {
233                 d = to_skbedit(*a);
234                 if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
235                         tcf_idr_release(*a, bind);
236                         return -EEXIST;
237                 }
238         }
239         err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
240         if (err < 0)
241                 goto release_idr;
242
243         params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
244         if (unlikely(!params_new)) {
245                 err = -ENOMEM;
246                 goto put_chain;
247         }
248
249         params_new->flags = flags;
250         if (flags & SKBEDIT_F_PRIORITY)
251                 params_new->priority = *priority;
252         if (flags & SKBEDIT_F_QUEUE_MAPPING) {
253                 params_new->queue_mapping = *queue_mapping;
254                 params_new->mapping_mod = mapping_mod;
255         }
256         if (flags & SKBEDIT_F_MARK)
257                 params_new->mark = *mark;
258         if (flags & SKBEDIT_F_PTYPE)
259                 params_new->ptype = *ptype;
260         /* default behaviour is to use all the bits */
261         params_new->mask = 0xffffffff;
262         if (flags & SKBEDIT_F_MASK)
263                 params_new->mask = *mask;
264
265         spin_lock_bh(&d->tcf_lock);
266         goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
267         params_new = rcu_replace_pointer(d->params, params_new,
268                                          lockdep_is_held(&d->tcf_lock));
269         spin_unlock_bh(&d->tcf_lock);
270         if (params_new)
271                 kfree_rcu(params_new, rcu);
272         if (goto_ch)
273                 tcf_chain_put_by_act(goto_ch);
274
275         return ret;
276 put_chain:
277         if (goto_ch)
278                 tcf_chain_put_by_act(goto_ch);
279 release_idr:
280         tcf_idr_release(*a, bind);
281         return err;
282 }
283
284 static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
285                             int bind, int ref)
286 {
287         unsigned char *b = skb_tail_pointer(skb);
288         struct tcf_skbedit *d = to_skbedit(a);
289         struct tcf_skbedit_params *params;
290         struct tc_skbedit opt = {
291                 .index   = d->tcf_index,
292                 .refcnt  = refcount_read(&d->tcf_refcnt) - ref,
293                 .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
294         };
295         u64 pure_flags = 0;
296         struct tcf_t t;
297
298         spin_lock_bh(&d->tcf_lock);
299         params = rcu_dereference_protected(d->params,
300                                            lockdep_is_held(&d->tcf_lock));
301         opt.action = d->tcf_action;
302
303         if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
304                 goto nla_put_failure;
305         if ((params->flags & SKBEDIT_F_PRIORITY) &&
306             nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
307                 goto nla_put_failure;
308         if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
309             nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
310                 goto nla_put_failure;
311         if ((params->flags & SKBEDIT_F_MARK) &&
312             nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
313                 goto nla_put_failure;
314         if ((params->flags & SKBEDIT_F_PTYPE) &&
315             nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
316                 goto nla_put_failure;
317         if ((params->flags & SKBEDIT_F_MASK) &&
318             nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
319                 goto nla_put_failure;
320         if (params->flags & SKBEDIT_F_INHERITDSFIELD)
321                 pure_flags |= SKBEDIT_F_INHERITDSFIELD;
322         if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
323                 if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
324                                 params->queue_mapping + params->mapping_mod - 1))
325                         goto nla_put_failure;
326
327                 pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
328         }
329         if (pure_flags != 0 &&
330             nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
331                 goto nla_put_failure;
332
333         tcf_tm_dump(&t, &d->tcf_tm);
334         if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
335                 goto nla_put_failure;
336         spin_unlock_bh(&d->tcf_lock);
337
338         return skb->len;
339
340 nla_put_failure:
341         spin_unlock_bh(&d->tcf_lock);
342         nlmsg_trim(skb, b);
343         return -1;
344 }
345
346 static void tcf_skbedit_cleanup(struct tc_action *a)
347 {
348         struct tcf_skbedit *d = to_skbedit(a);
349         struct tcf_skbedit_params *params;
350
351         params = rcu_dereference_protected(d->params, 1);
352         if (params)
353                 kfree_rcu(params, rcu);
354 }
355
356 static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
357 {
358         return nla_total_size(sizeof(struct tc_skbedit))
359                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
360                 + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
361                 + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
362                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
363                 + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
364                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
365                 + nla_total_size_64bit(sizeof(u64)); /* TCA_SKBEDIT_FLAGS */
366 }
367
368 static int tcf_skbedit_offload_act_setup(struct tc_action *act, void *entry_data,
369                                          u32 *index_inc, bool bind,
370                                          struct netlink_ext_ack *extack)
371 {
372         if (bind) {
373                 struct flow_action_entry *entry = entry_data;
374
375                 if (is_tcf_skbedit_mark(act)) {
376                         entry->id = FLOW_ACTION_MARK;
377                         entry->mark = tcf_skbedit_mark(act);
378                 } else if (is_tcf_skbedit_ptype(act)) {
379                         entry->id = FLOW_ACTION_PTYPE;
380                         entry->ptype = tcf_skbedit_ptype(act);
381                 } else if (is_tcf_skbedit_priority(act)) {
382                         entry->id = FLOW_ACTION_PRIORITY;
383                         entry->priority = tcf_skbedit_priority(act);
384                 } else if (is_tcf_skbedit_tx_queue_mapping(act)) {
385                         NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"queue_mapping\" option is used on transmit side");
386                         return -EOPNOTSUPP;
387                 } else if (is_tcf_skbedit_rx_queue_mapping(act)) {
388                         entry->id = FLOW_ACTION_RX_QUEUE_MAPPING;
389                         entry->rx_queue = tcf_skbedit_rx_queue_mapping(act);
390                 } else if (is_tcf_skbedit_inheritdsfield(act)) {
391                         NL_SET_ERR_MSG_MOD(extack, "Offload not supported when \"inheritdsfield\" option is used");
392                         return -EOPNOTSUPP;
393                 } else {
394                         NL_SET_ERR_MSG_MOD(extack, "Unsupported skbedit option offload");
395                         return -EOPNOTSUPP;
396                 }
397                 *index_inc = 1;
398         } else {
399                 struct flow_offload_action *fl_action = entry_data;
400
401                 if (is_tcf_skbedit_mark(act))
402                         fl_action->id = FLOW_ACTION_MARK;
403                 else if (is_tcf_skbedit_ptype(act))
404                         fl_action->id = FLOW_ACTION_PTYPE;
405                 else if (is_tcf_skbedit_priority(act))
406                         fl_action->id = FLOW_ACTION_PRIORITY;
407                 else if (is_tcf_skbedit_rx_queue_mapping(act))
408                         fl_action->id = FLOW_ACTION_RX_QUEUE_MAPPING;
409                 else
410                         return -EOPNOTSUPP;
411         }
412
413         return 0;
414 }
415
416 static struct tc_action_ops act_skbedit_ops = {
417         .kind           =       "skbedit",
418         .id             =       TCA_ID_SKBEDIT,
419         .owner          =       THIS_MODULE,
420         .act            =       tcf_skbedit_act,
421         .stats_update   =       tcf_skbedit_stats_update,
422         .dump           =       tcf_skbedit_dump,
423         .init           =       tcf_skbedit_init,
424         .cleanup        =       tcf_skbedit_cleanup,
425         .get_fill_size  =       tcf_skbedit_get_fill_size,
426         .offload_act_setup =    tcf_skbedit_offload_act_setup,
427         .size           =       sizeof(struct tcf_skbedit),
428 };
429
430 static __net_init int skbedit_init_net(struct net *net)
431 {
432         struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id);
433
434         return tc_action_net_init(net, tn, &act_skbedit_ops);
435 }
436
437 static void __net_exit skbedit_exit_net(struct list_head *net_list)
438 {
439         tc_action_net_exit(net_list, act_skbedit_ops.net_id);
440 }
441
442 static struct pernet_operations skbedit_net_ops = {
443         .init = skbedit_init_net,
444         .exit_batch = skbedit_exit_net,
445         .id   = &act_skbedit_ops.net_id,
446         .size = sizeof(struct tc_action_net),
447 };
448
449 MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
450 MODULE_DESCRIPTION("SKB Editing");
451 MODULE_LICENSE("GPL");
452
453 static int __init skbedit_init_module(void)
454 {
455         return tcf_register_action(&act_skbedit_ops, &skbedit_net_ops);
456 }
457
458 static void __exit skbedit_cleanup_module(void)
459 {
460         tcf_unregister_action(&act_skbedit_ops, &skbedit_net_ops);
461 }
462
463 module_init(skbedit_init_module);
464 module_exit(skbedit_cleanup_module);