flow_dissector: add meaningful comments
[linux-2.6-block.git] / net / core / flow_dissector.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/skbuff.h>
4 #include <linux/export.h>
5 #include <linux/ip.h>
6 #include <linux/ipv6.h>
7 #include <linux/if_vlan.h>
8 #include <net/dsa.h>
9 #include <net/dst_metadata.h>
10 #include <net/ip.h>
11 #include <net/ipv6.h>
12 #include <net/gre.h>
13 #include <net/pptp.h>
14 #include <net/tipc.h>
15 #include <linux/igmp.h>
16 #include <linux/icmp.h>
17 #include <linux/sctp.h>
18 #include <linux/dccp.h>
19 #include <linux/if_tunnel.h>
20 #include <linux/if_pppox.h>
21 #include <linux/ppp_defs.h>
22 #include <linux/stddef.h>
23 #include <linux/if_ether.h>
24 #include <linux/mpls.h>
25 #include <linux/tcp.h>
26 #include <net/flow_dissector.h>
27 #include <scsi/fc/fc_fcoe.h>
28 #include <uapi/linux/batadv_packet.h>
29 #include <linux/bpf.h>
30 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
31 #include <net/netfilter/nf_conntrack_core.h>
32 #include <net/netfilter/nf_conntrack_labels.h>
33 #endif
34
35 static DEFINE_MUTEX(flow_dissector_mutex);
36
37 static void dissector_set_key(struct flow_dissector *flow_dissector,
38                               enum flow_dissector_key_id key_id)
39 {
40         flow_dissector->used_keys |= (1 << key_id);
41 }
42
43 void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
44                              const struct flow_dissector_key *key,
45                              unsigned int key_count)
46 {
47         unsigned int i;
48
49         memset(flow_dissector, 0, sizeof(*flow_dissector));
50
51         for (i = 0; i < key_count; i++, key++) {
52                 /* User should make sure that every key target offset is withing
53                  * boundaries of unsigned short.
54                  */
55                 BUG_ON(key->offset > USHRT_MAX);
56                 BUG_ON(dissector_uses_key(flow_dissector,
57                                           key->key_id));
58
59                 dissector_set_key(flow_dissector, key->key_id);
60                 flow_dissector->offset[key->key_id] = key->offset;
61         }
62
63         /* Ensure that the dissector always includes control and basic key.
64          * That way we are able to avoid handling lack of these in fast path.
65          */
66         BUG_ON(!dissector_uses_key(flow_dissector,
67                                    FLOW_DISSECTOR_KEY_CONTROL));
68         BUG_ON(!dissector_uses_key(flow_dissector,
69                                    FLOW_DISSECTOR_KEY_BASIC));
70 }
71 EXPORT_SYMBOL(skb_flow_dissector_init);
72
73 int skb_flow_dissector_prog_query(const union bpf_attr *attr,
74                                   union bpf_attr __user *uattr)
75 {
76         __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
77         u32 prog_id, prog_cnt = 0, flags = 0;
78         struct bpf_prog *attached;
79         struct net *net;
80
81         if (attr->query.query_flags)
82                 return -EINVAL;
83
84         net = get_net_ns_by_fd(attr->query.target_fd);
85         if (IS_ERR(net))
86                 return PTR_ERR(net);
87
88         rcu_read_lock();
89         attached = rcu_dereference(net->flow_dissector_prog);
90         if (attached) {
91                 prog_cnt = 1;
92                 prog_id = attached->aux->id;
93         }
94         rcu_read_unlock();
95
96         put_net(net);
97
98         if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
99                 return -EFAULT;
100         if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
101                 return -EFAULT;
102
103         if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
104                 return 0;
105
106         if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
107                 return -EFAULT;
108
109         return 0;
110 }
111
112 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
113                                        struct bpf_prog *prog)
114 {
115         struct bpf_prog *attached;
116         struct net *net;
117         int ret = 0;
118
119         net = current->nsproxy->net_ns;
120         mutex_lock(&flow_dissector_mutex);
121
122         if (net == &init_net) {
123                 /* BPF flow dissector in the root namespace overrides
124                  * any per-net-namespace one. When attaching to root,
125                  * make sure we don't have any BPF program attached
126                  * to the non-root namespaces.
127                  */
128                 struct net *ns;
129
130                 for_each_net(ns) {
131                         if (ns == &init_net)
132                                 continue;
133                         if (rcu_access_pointer(ns->flow_dissector_prog)) {
134                                 ret = -EEXIST;
135                                 goto out;
136                         }
137                 }
138         } else {
139                 /* Make sure root flow dissector is not attached
140                  * when attaching to the non-root namespace.
141                  */
142                 if (rcu_access_pointer(init_net.flow_dissector_prog)) {
143                         ret = -EEXIST;
144                         goto out;
145                 }
146         }
147
148         attached = rcu_dereference_protected(net->flow_dissector_prog,
149                                              lockdep_is_held(&flow_dissector_mutex));
150         if (attached == prog) {
151                 /* The same program cannot be attached twice */
152                 ret = -EINVAL;
153                 goto out;
154         }
155         rcu_assign_pointer(net->flow_dissector_prog, prog);
156         if (attached)
157                 bpf_prog_put(attached);
158 out:
159         mutex_unlock(&flow_dissector_mutex);
160         return ret;
161 }
162
163 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
164 {
165         struct bpf_prog *attached;
166         struct net *net;
167
168         net = current->nsproxy->net_ns;
169         mutex_lock(&flow_dissector_mutex);
170         attached = rcu_dereference_protected(net->flow_dissector_prog,
171                                              lockdep_is_held(&flow_dissector_mutex));
172         if (!attached) {
173                 mutex_unlock(&flow_dissector_mutex);
174                 return -ENOENT;
175         }
176         RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
177         bpf_prog_put(attached);
178         mutex_unlock(&flow_dissector_mutex);
179         return 0;
180 }
181 /**
182  * skb_flow_get_be16 - extract be16 entity
183  * @skb: sk_buff to extract from
184  * @poff: offset to extract at
185  * @data: raw buffer pointer to the packet
186  * @hlen: packet header length
187  *
188  * The function will try to retrieve a be32 entity at
189  * offset poff
190  */
191 static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff,
192                                 void *data, int hlen)
193 {
194         __be16 *u, _u;
195
196         u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
197         if (u)
198                 return *u;
199
200         return 0;
201 }
202
203 /**
204  * __skb_flow_get_ports - extract the upper layer ports and return them
205  * @skb: sk_buff to extract the ports from
206  * @thoff: transport header offset
207  * @ip_proto: protocol for which to get port offset
208  * @data: raw buffer pointer to the packet, if NULL use skb->data
209  * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
210  *
211  * The function will try to retrieve the ports at offset thoff + poff where poff
212  * is the protocol port offset returned from proto_ports_offset
213  */
214 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
215                             void *data, int hlen)
216 {
217         int poff = proto_ports_offset(ip_proto);
218
219         if (!data) {
220                 data = skb->data;
221                 hlen = skb_headlen(skb);
222         }
223
224         if (poff >= 0) {
225                 __be32 *ports, _ports;
226
227                 ports = __skb_header_pointer(skb, thoff + poff,
228                                              sizeof(_ports), data, hlen, &_ports);
229                 if (ports)
230                         return *ports;
231         }
232
233         return 0;
234 }
235 EXPORT_SYMBOL(__skb_flow_get_ports);
236
237 void skb_flow_dissect_meta(const struct sk_buff *skb,
238                            struct flow_dissector *flow_dissector,
239                            void *target_container)
240 {
241         struct flow_dissector_key_meta *meta;
242
243         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_META))
244                 return;
245
246         meta = skb_flow_dissector_target(flow_dissector,
247                                          FLOW_DISSECTOR_KEY_META,
248                                          target_container);
249         meta->ingress_ifindex = skb->skb_iif;
250 }
251 EXPORT_SYMBOL(skb_flow_dissect_meta);
252
253 static void
254 skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
255                                    struct flow_dissector *flow_dissector,
256                                    void *target_container)
257 {
258         struct flow_dissector_key_control *ctrl;
259
260         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
261                 return;
262
263         ctrl = skb_flow_dissector_target(flow_dissector,
264                                          FLOW_DISSECTOR_KEY_ENC_CONTROL,
265                                          target_container);
266         ctrl->addr_type = type;
267 }
268
269 void
270 skb_flow_dissect_ct(const struct sk_buff *skb,
271                     struct flow_dissector *flow_dissector,
272                     void *target_container,
273                     u16 *ctinfo_map,
274                     size_t mapsize)
275 {
276 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
277         struct flow_dissector_key_ct *key;
278         enum ip_conntrack_info ctinfo;
279         struct nf_conn_labels *cl;
280         struct nf_conn *ct;
281
282         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CT))
283                 return;
284
285         ct = nf_ct_get(skb, &ctinfo);
286         if (!ct)
287                 return;
288
289         key = skb_flow_dissector_target(flow_dissector,
290                                         FLOW_DISSECTOR_KEY_CT,
291                                         target_container);
292
293         if (ctinfo < mapsize)
294                 key->ct_state = ctinfo_map[ctinfo];
295 #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)
296         key->ct_zone = ct->zone.id;
297 #endif
298 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
299         key->ct_mark = ct->mark;
300 #endif
301
302         cl = nf_ct_labels_find(ct);
303         if (cl)
304                 memcpy(key->ct_labels, cl->bits, sizeof(key->ct_labels));
305 #endif /* CONFIG_NF_CONNTRACK */
306 }
307 EXPORT_SYMBOL(skb_flow_dissect_ct);
308
309 void
310 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
311                              struct flow_dissector *flow_dissector,
312                              void *target_container)
313 {
314         struct ip_tunnel_info *info;
315         struct ip_tunnel_key *key;
316
317         /* A quick check to see if there might be something to do. */
318         if (!dissector_uses_key(flow_dissector,
319                                 FLOW_DISSECTOR_KEY_ENC_KEYID) &&
320             !dissector_uses_key(flow_dissector,
321                                 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
322             !dissector_uses_key(flow_dissector,
323                                 FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
324             !dissector_uses_key(flow_dissector,
325                                 FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
326             !dissector_uses_key(flow_dissector,
327                                 FLOW_DISSECTOR_KEY_ENC_PORTS) &&
328             !dissector_uses_key(flow_dissector,
329                                 FLOW_DISSECTOR_KEY_ENC_IP) &&
330             !dissector_uses_key(flow_dissector,
331                                 FLOW_DISSECTOR_KEY_ENC_OPTS))
332                 return;
333
334         info = skb_tunnel_info(skb);
335         if (!info)
336                 return;
337
338         key = &info->key;
339
340         switch (ip_tunnel_info_af(info)) {
341         case AF_INET:
342                 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
343                                                    flow_dissector,
344                                                    target_container);
345                 if (dissector_uses_key(flow_dissector,
346                                        FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
347                         struct flow_dissector_key_ipv4_addrs *ipv4;
348
349                         ipv4 = skb_flow_dissector_target(flow_dissector,
350                                                          FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
351                                                          target_container);
352                         ipv4->src = key->u.ipv4.src;
353                         ipv4->dst = key->u.ipv4.dst;
354                 }
355                 break;
356         case AF_INET6:
357                 skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
358                                                    flow_dissector,
359                                                    target_container);
360                 if (dissector_uses_key(flow_dissector,
361                                        FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
362                         struct flow_dissector_key_ipv6_addrs *ipv6;
363
364                         ipv6 = skb_flow_dissector_target(flow_dissector,
365                                                          FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
366                                                          target_container);
367                         ipv6->src = key->u.ipv6.src;
368                         ipv6->dst = key->u.ipv6.dst;
369                 }
370                 break;
371         }
372
373         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
374                 struct flow_dissector_key_keyid *keyid;
375
376                 keyid = skb_flow_dissector_target(flow_dissector,
377                                                   FLOW_DISSECTOR_KEY_ENC_KEYID,
378                                                   target_container);
379                 keyid->keyid = tunnel_id_to_key32(key->tun_id);
380         }
381
382         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
383                 struct flow_dissector_key_ports *tp;
384
385                 tp = skb_flow_dissector_target(flow_dissector,
386                                                FLOW_DISSECTOR_KEY_ENC_PORTS,
387                                                target_container);
388                 tp->src = key->tp_src;
389                 tp->dst = key->tp_dst;
390         }
391
392         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
393                 struct flow_dissector_key_ip *ip;
394
395                 ip = skb_flow_dissector_target(flow_dissector,
396                                                FLOW_DISSECTOR_KEY_ENC_IP,
397                                                target_container);
398                 ip->tos = key->tos;
399                 ip->ttl = key->ttl;
400         }
401
402         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
403                 struct flow_dissector_key_enc_opts *enc_opt;
404
405                 enc_opt = skb_flow_dissector_target(flow_dissector,
406                                                     FLOW_DISSECTOR_KEY_ENC_OPTS,
407                                                     target_container);
408
409                 if (info->options_len) {
410                         enc_opt->len = info->options_len;
411                         ip_tunnel_info_opts_get(enc_opt->data, info);
412                         enc_opt->dst_opt_type = info->key.tun_flags &
413                                                 TUNNEL_OPTIONS_PRESENT;
414                 }
415         }
416 }
417 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
418
419 static enum flow_dissect_ret
420 __skb_flow_dissect_mpls(const struct sk_buff *skb,
421                         struct flow_dissector *flow_dissector,
422                         void *target_container, void *data, int nhoff, int hlen)
423 {
424         struct flow_dissector_key_keyid *key_keyid;
425         struct mpls_label *hdr, _hdr[2];
426         u32 entry, label;
427
428         if (!dissector_uses_key(flow_dissector,
429                                 FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
430             !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
431                 return FLOW_DISSECT_RET_OUT_GOOD;
432
433         hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
434                                    hlen, &_hdr);
435         if (!hdr)
436                 return FLOW_DISSECT_RET_OUT_BAD;
437
438         entry = ntohl(hdr[0].entry);
439         label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
440
441         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
442                 struct flow_dissector_key_mpls *key_mpls;
443
444                 key_mpls = skb_flow_dissector_target(flow_dissector,
445                                                      FLOW_DISSECTOR_KEY_MPLS,
446                                                      target_container);
447                 key_mpls->mpls_label = label;
448                 key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
449                                         >> MPLS_LS_TTL_SHIFT;
450                 key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
451                                         >> MPLS_LS_TC_SHIFT;
452                 key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
453                                         >> MPLS_LS_S_SHIFT;
454         }
455
456         if (label == MPLS_LABEL_ENTROPY) {
457                 key_keyid = skb_flow_dissector_target(flow_dissector,
458                                                       FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
459                                                       target_container);
460                 key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
461         }
462         return FLOW_DISSECT_RET_OUT_GOOD;
463 }
464
465 static enum flow_dissect_ret
466 __skb_flow_dissect_arp(const struct sk_buff *skb,
467                        struct flow_dissector *flow_dissector,
468                        void *target_container, void *data, int nhoff, int hlen)
469 {
470         struct flow_dissector_key_arp *key_arp;
471         struct {
472                 unsigned char ar_sha[ETH_ALEN];
473                 unsigned char ar_sip[4];
474                 unsigned char ar_tha[ETH_ALEN];
475                 unsigned char ar_tip[4];
476         } *arp_eth, _arp_eth;
477         const struct arphdr *arp;
478         struct arphdr _arp;
479
480         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
481                 return FLOW_DISSECT_RET_OUT_GOOD;
482
483         arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
484                                    hlen, &_arp);
485         if (!arp)
486                 return FLOW_DISSECT_RET_OUT_BAD;
487
488         if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
489             arp->ar_pro != htons(ETH_P_IP) ||
490             arp->ar_hln != ETH_ALEN ||
491             arp->ar_pln != 4 ||
492             (arp->ar_op != htons(ARPOP_REPLY) &&
493              arp->ar_op != htons(ARPOP_REQUEST)))
494                 return FLOW_DISSECT_RET_OUT_BAD;
495
496         arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
497                                        sizeof(_arp_eth), data,
498                                        hlen, &_arp_eth);
499         if (!arp_eth)
500                 return FLOW_DISSECT_RET_OUT_BAD;
501
502         key_arp = skb_flow_dissector_target(flow_dissector,
503                                             FLOW_DISSECTOR_KEY_ARP,
504                                             target_container);
505
506         memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
507         memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
508
509         /* Only store the lower byte of the opcode;
510          * this covers ARPOP_REPLY and ARPOP_REQUEST.
511          */
512         key_arp->op = ntohs(arp->ar_op) & 0xff;
513
514         ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
515         ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
516
517         return FLOW_DISSECT_RET_OUT_GOOD;
518 }
519
520 static enum flow_dissect_ret
521 __skb_flow_dissect_gre(const struct sk_buff *skb,
522                        struct flow_dissector_key_control *key_control,
523                        struct flow_dissector *flow_dissector,
524                        void *target_container, void *data,
525                        __be16 *p_proto, int *p_nhoff, int *p_hlen,
526                        unsigned int flags)
527 {
528         struct flow_dissector_key_keyid *key_keyid;
529         struct gre_base_hdr *hdr, _hdr;
530         int offset = 0;
531         u16 gre_ver;
532
533         hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
534                                    data, *p_hlen, &_hdr);
535         if (!hdr)
536                 return FLOW_DISSECT_RET_OUT_BAD;
537
538         /* Only look inside GRE without routing */
539         if (hdr->flags & GRE_ROUTING)
540                 return FLOW_DISSECT_RET_OUT_GOOD;
541
542         /* Only look inside GRE for version 0 and 1 */
543         gre_ver = ntohs(hdr->flags & GRE_VERSION);
544         if (gre_ver > 1)
545                 return FLOW_DISSECT_RET_OUT_GOOD;
546
547         *p_proto = hdr->protocol;
548         if (gre_ver) {
549                 /* Version1 must be PPTP, and check the flags */
550                 if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
551                         return FLOW_DISSECT_RET_OUT_GOOD;
552         }
553
554         offset += sizeof(struct gre_base_hdr);
555
556         if (hdr->flags & GRE_CSUM)
557                 offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
558                           FIELD_SIZEOF(struct gre_full_hdr, reserved1);
559
560         if (hdr->flags & GRE_KEY) {
561                 const __be32 *keyid;
562                 __be32 _keyid;
563
564                 keyid = __skb_header_pointer(skb, *p_nhoff + offset,
565                                              sizeof(_keyid),
566                                              data, *p_hlen, &_keyid);
567                 if (!keyid)
568                         return FLOW_DISSECT_RET_OUT_BAD;
569
570                 if (dissector_uses_key(flow_dissector,
571                                        FLOW_DISSECTOR_KEY_GRE_KEYID)) {
572                         key_keyid = skb_flow_dissector_target(flow_dissector,
573                                                               FLOW_DISSECTOR_KEY_GRE_KEYID,
574                                                               target_container);
575                         if (gre_ver == 0)
576                                 key_keyid->keyid = *keyid;
577                         else
578                                 key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
579                 }
580                 offset += FIELD_SIZEOF(struct gre_full_hdr, key);
581         }
582
583         if (hdr->flags & GRE_SEQ)
584                 offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
585
586         if (gre_ver == 0) {
587                 if (*p_proto == htons(ETH_P_TEB)) {
588                         const struct ethhdr *eth;
589                         struct ethhdr _eth;
590
591                         eth = __skb_header_pointer(skb, *p_nhoff + offset,
592                                                    sizeof(_eth),
593                                                    data, *p_hlen, &_eth);
594                         if (!eth)
595                                 return FLOW_DISSECT_RET_OUT_BAD;
596                         *p_proto = eth->h_proto;
597                         offset += sizeof(*eth);
598
599                         /* Cap headers that we access via pointers at the
600                          * end of the Ethernet header as our maximum alignment
601                          * at that point is only 2 bytes.
602                          */
603                         if (NET_IP_ALIGN)
604                                 *p_hlen = *p_nhoff + offset;
605                 }
606         } else { /* version 1, must be PPTP */
607                 u8 _ppp_hdr[PPP_HDRLEN];
608                 u8 *ppp_hdr;
609
610                 if (hdr->flags & GRE_ACK)
611                         offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
612
613                 ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
614                                                sizeof(_ppp_hdr),
615                                                data, *p_hlen, _ppp_hdr);
616                 if (!ppp_hdr)
617                         return FLOW_DISSECT_RET_OUT_BAD;
618
619                 switch (PPP_PROTOCOL(ppp_hdr)) {
620                 case PPP_IP:
621                         *p_proto = htons(ETH_P_IP);
622                         break;
623                 case PPP_IPV6:
624                         *p_proto = htons(ETH_P_IPV6);
625                         break;
626                 default:
627                         /* Could probably catch some more like MPLS */
628                         break;
629                 }
630
631                 offset += PPP_HDRLEN;
632         }
633
634         *p_nhoff += offset;
635         key_control->flags |= FLOW_DIS_ENCAPSULATION;
636         if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
637                 return FLOW_DISSECT_RET_OUT_GOOD;
638
639         return FLOW_DISSECT_RET_PROTO_AGAIN;
640 }
641
642 /**
643  * __skb_flow_dissect_batadv() - dissect batman-adv header
644  * @skb: sk_buff to with the batman-adv header
645  * @key_control: flow dissectors control key
646  * @data: raw buffer pointer to the packet, if NULL use skb->data
647  * @p_proto: pointer used to update the protocol to process next
648  * @p_nhoff: pointer used to update inner network header offset
649  * @hlen: packet header length
650  * @flags: any combination of FLOW_DISSECTOR_F_*
651  *
652  * ETH_P_BATMAN packets are tried to be dissected. Only
653  * &struct batadv_unicast packets are actually processed because they contain an
654  * inner ethernet header and are usually followed by actual network header. This
655  * allows the flow dissector to continue processing the packet.
656  *
657  * Return: FLOW_DISSECT_RET_PROTO_AGAIN when &struct batadv_unicast was found,
658  *  FLOW_DISSECT_RET_OUT_GOOD when dissector should stop after encapsulation,
659  *  otherwise FLOW_DISSECT_RET_OUT_BAD
660  */
661 static enum flow_dissect_ret
662 __skb_flow_dissect_batadv(const struct sk_buff *skb,
663                           struct flow_dissector_key_control *key_control,
664                           void *data, __be16 *p_proto, int *p_nhoff, int hlen,
665                           unsigned int flags)
666 {
667         struct {
668                 struct batadv_unicast_packet batadv_unicast;
669                 struct ethhdr eth;
670         } *hdr, _hdr;
671
672         hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr), data, hlen,
673                                    &_hdr);
674         if (!hdr)
675                 return FLOW_DISSECT_RET_OUT_BAD;
676
677         if (hdr->batadv_unicast.version != BATADV_COMPAT_VERSION)
678                 return FLOW_DISSECT_RET_OUT_BAD;
679
680         if (hdr->batadv_unicast.packet_type != BATADV_UNICAST)
681                 return FLOW_DISSECT_RET_OUT_BAD;
682
683         *p_proto = hdr->eth.h_proto;
684         *p_nhoff += sizeof(*hdr);
685
686         key_control->flags |= FLOW_DIS_ENCAPSULATION;
687         if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
688                 return FLOW_DISSECT_RET_OUT_GOOD;
689
690         return FLOW_DISSECT_RET_PROTO_AGAIN;
691 }
692
693 static void
694 __skb_flow_dissect_tcp(const struct sk_buff *skb,
695                        struct flow_dissector *flow_dissector,
696                        void *target_container, void *data, int thoff, int hlen)
697 {
698         struct flow_dissector_key_tcp *key_tcp;
699         struct tcphdr *th, _th;
700
701         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_TCP))
702                 return;
703
704         th = __skb_header_pointer(skb, thoff, sizeof(_th), data, hlen, &_th);
705         if (!th)
706                 return;
707
708         if (unlikely(__tcp_hdrlen(th) < sizeof(_th)))
709                 return;
710
711         key_tcp = skb_flow_dissector_target(flow_dissector,
712                                             FLOW_DISSECTOR_KEY_TCP,
713                                             target_container);
714         key_tcp->flags = (*(__be16 *) &tcp_flag_word(th) & htons(0x0FFF));
715 }
716
717 static void
718 __skb_flow_dissect_ipv4(const struct sk_buff *skb,
719                         struct flow_dissector *flow_dissector,
720                         void *target_container, void *data, const struct iphdr *iph)
721 {
722         struct flow_dissector_key_ip *key_ip;
723
724         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
725                 return;
726
727         key_ip = skb_flow_dissector_target(flow_dissector,
728                                            FLOW_DISSECTOR_KEY_IP,
729                                            target_container);
730         key_ip->tos = iph->tos;
731         key_ip->ttl = iph->ttl;
732 }
733
734 static void
735 __skb_flow_dissect_ipv6(const struct sk_buff *skb,
736                         struct flow_dissector *flow_dissector,
737                         void *target_container, void *data, const struct ipv6hdr *iph)
738 {
739         struct flow_dissector_key_ip *key_ip;
740
741         if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IP))
742                 return;
743
744         key_ip = skb_flow_dissector_target(flow_dissector,
745                                            FLOW_DISSECTOR_KEY_IP,
746                                            target_container);
747         key_ip->tos = ipv6_get_dsfield(iph);
748         key_ip->ttl = iph->hop_limit;
749 }
750
751 /* Maximum number of protocol headers that can be parsed in
752  * __skb_flow_dissect
753  */
754 #define MAX_FLOW_DISSECT_HDRS   15
755
756 static bool skb_flow_dissect_allowed(int *num_hdrs)
757 {
758         ++*num_hdrs;
759
760         return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
761 }
762
763 static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
764                                      struct flow_dissector *flow_dissector,
765                                      void *target_container)
766 {
767         struct flow_dissector_key_control *key_control;
768         struct flow_dissector_key_basic *key_basic;
769         struct flow_dissector_key_addrs *key_addrs;
770         struct flow_dissector_key_ports *key_ports;
771         struct flow_dissector_key_tags *key_tags;
772
773         key_control = skb_flow_dissector_target(flow_dissector,
774                                                 FLOW_DISSECTOR_KEY_CONTROL,
775                                                 target_container);
776         key_control->thoff = flow_keys->thoff;
777         if (flow_keys->is_frag)
778                 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
779         if (flow_keys->is_first_frag)
780                 key_control->flags |= FLOW_DIS_FIRST_FRAG;
781         if (flow_keys->is_encap)
782                 key_control->flags |= FLOW_DIS_ENCAPSULATION;
783
784         key_basic = skb_flow_dissector_target(flow_dissector,
785                                               FLOW_DISSECTOR_KEY_BASIC,
786                                               target_container);
787         key_basic->n_proto = flow_keys->n_proto;
788         key_basic->ip_proto = flow_keys->ip_proto;
789
790         if (flow_keys->addr_proto == ETH_P_IP &&
791             dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
792                 key_addrs = skb_flow_dissector_target(flow_dissector,
793                                                       FLOW_DISSECTOR_KEY_IPV4_ADDRS,
794                                                       target_container);
795                 key_addrs->v4addrs.src = flow_keys->ipv4_src;
796                 key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
797                 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
798         } else if (flow_keys->addr_proto == ETH_P_IPV6 &&
799                    dissector_uses_key(flow_dissector,
800                                       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
801                 key_addrs = skb_flow_dissector_target(flow_dissector,
802                                                       FLOW_DISSECTOR_KEY_IPV6_ADDRS,
803                                                       target_container);
804                 memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
805                        sizeof(key_addrs->v6addrs));
806                 key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
807         }
808
809         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
810                 key_ports = skb_flow_dissector_target(flow_dissector,
811                                                       FLOW_DISSECTOR_KEY_PORTS,
812                                                       target_container);
813                 key_ports->src = flow_keys->sport;
814                 key_ports->dst = flow_keys->dport;
815         }
816
817         if (dissector_uses_key(flow_dissector,
818                                FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
819                 key_tags = skb_flow_dissector_target(flow_dissector,
820                                                      FLOW_DISSECTOR_KEY_FLOW_LABEL,
821                                                      target_container);
822                 key_tags->flow_label = ntohl(flow_keys->flow_label);
823         }
824 }
825
826 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
827                       __be16 proto, int nhoff, int hlen, unsigned int flags)
828 {
829         struct bpf_flow_keys *flow_keys = ctx->flow_keys;
830         u32 result;
831
832         /* Pass parameters to the BPF program */
833         memset(flow_keys, 0, sizeof(*flow_keys));
834         flow_keys->n_proto = proto;
835         flow_keys->nhoff = nhoff;
836         flow_keys->thoff = flow_keys->nhoff;
837
838         BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
839                      (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
840         BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
841                      (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
842         BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
843                      (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
844         flow_keys->flags = flags;
845
846         preempt_disable();
847         result = BPF_PROG_RUN(prog, ctx);
848         preempt_enable();
849
850         flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
851         flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
852                                    flow_keys->nhoff, hlen);
853
854         return result == BPF_OK;
855 }
856
857 /**
858  * __skb_flow_dissect - extract the flow_keys struct and return it
859  * @net: associated network namespace, derived from @skb if NULL
860  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
861  * @flow_dissector: list of keys to dissect
862  * @target_container: target structure to put dissected values into
863  * @data: raw buffer pointer to the packet, if NULL use skb->data
864  * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
865  * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
866  * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
867  * @flags: flags that control the dissection process, e.g.
868  *         FLOW_DISSECTOR_F_STOP_AT_ENCAP.
869  *
870  * The function will try to retrieve individual keys into target specified
871  * by flow_dissector from either the skbuff or a raw buffer specified by the
872  * rest parameters.
873  *
874  * Caller must take care of zeroing target container memory.
875  */
876 bool __skb_flow_dissect(const struct net *net,
877                         const struct sk_buff *skb,
878                         struct flow_dissector *flow_dissector,
879                         void *target_container,
880                         void *data, __be16 proto, int nhoff, int hlen,
881                         unsigned int flags)
882 {
883         struct flow_dissector_key_control *key_control;
884         struct flow_dissector_key_basic *key_basic;
885         struct flow_dissector_key_addrs *key_addrs;
886         struct flow_dissector_key_ports *key_ports;
887         struct flow_dissector_key_icmp *key_icmp;
888         struct flow_dissector_key_tags *key_tags;
889         struct flow_dissector_key_vlan *key_vlan;
890         struct bpf_prog *attached = NULL;
891         enum flow_dissect_ret fdret;
892         enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
893         int num_hdrs = 0;
894         u8 ip_proto = 0;
895         bool ret;
896
897         if (!data) {
898                 data = skb->data;
899                 proto = skb_vlan_tag_present(skb) ?
900                          skb->vlan_proto : skb->protocol;
901                 nhoff = skb_network_offset(skb);
902                 hlen = skb_headlen(skb);
903 #if IS_ENABLED(CONFIG_NET_DSA)
904                 if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) {
905                         const struct dsa_device_ops *ops;
906                         int offset;
907
908                         ops = skb->dev->dsa_ptr->tag_ops;
909                         if (ops->flow_dissect &&
910                             !ops->flow_dissect(skb, &proto, &offset)) {
911                                 hlen -= offset;
912                                 nhoff += offset;
913                         }
914                 }
915 #endif
916         }
917
918         /* It is ensured by skb_flow_dissector_init() that control key will
919          * be always present.
920          */
921         key_control = skb_flow_dissector_target(flow_dissector,
922                                                 FLOW_DISSECTOR_KEY_CONTROL,
923                                                 target_container);
924
925         /* It is ensured by skb_flow_dissector_init() that basic key will
926          * be always present.
927          */
928         key_basic = skb_flow_dissector_target(flow_dissector,
929                                               FLOW_DISSECTOR_KEY_BASIC,
930                                               target_container);
931
932         if (skb) {
933                 if (!net) {
934                         if (skb->dev)
935                                 net = dev_net(skb->dev);
936                         else if (skb->sk)
937                                 net = sock_net(skb->sk);
938                 }
939         }
940
941         WARN_ON_ONCE(!net);
942         if (net) {
943                 rcu_read_lock();
944                 attached = rcu_dereference(init_net.flow_dissector_prog);
945
946                 if (!attached)
947                         attached = rcu_dereference(net->flow_dissector_prog);
948
949                 if (attached) {
950                         struct bpf_flow_keys flow_keys;
951                         struct bpf_flow_dissector ctx = {
952                                 .flow_keys = &flow_keys,
953                                 .data = data,
954                                 .data_end = data + hlen,
955                         };
956                         __be16 n_proto = proto;
957
958                         if (skb) {
959                                 ctx.skb = skb;
960                                 /* we can't use 'proto' in the skb case
961                                  * because it might be set to skb->vlan_proto
962                                  * which has been pulled from the data
963                                  */
964                                 n_proto = skb->protocol;
965                         }
966
967                         ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
968                                                hlen, flags);
969                         __skb_flow_bpf_to_target(&flow_keys, flow_dissector,
970                                                  target_container);
971                         rcu_read_unlock();
972                         return ret;
973                 }
974                 rcu_read_unlock();
975         }
976
977         if (dissector_uses_key(flow_dissector,
978                                FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
979                 struct ethhdr *eth = eth_hdr(skb);
980                 struct flow_dissector_key_eth_addrs *key_eth_addrs;
981
982                 key_eth_addrs = skb_flow_dissector_target(flow_dissector,
983                                                           FLOW_DISSECTOR_KEY_ETH_ADDRS,
984                                                           target_container);
985                 memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
986         }
987
988 proto_again:
989         fdret = FLOW_DISSECT_RET_CONTINUE;
990
991         switch (proto) {
992         case htons(ETH_P_IP): {
993                 const struct iphdr *iph;
994                 struct iphdr _iph;
995
996                 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
997                 if (!iph || iph->ihl < 5) {
998                         fdret = FLOW_DISSECT_RET_OUT_BAD;
999                         break;
1000                 }
1001
1002                 nhoff += iph->ihl * 4;
1003
1004                 ip_proto = iph->protocol;
1005
1006                 if (dissector_uses_key(flow_dissector,
1007                                        FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
1008                         key_addrs = skb_flow_dissector_target(flow_dissector,
1009                                                               FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1010                                                               target_container);
1011
1012                         memcpy(&key_addrs->v4addrs, &iph->saddr,
1013                                sizeof(key_addrs->v4addrs));
1014                         key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1015                 }
1016
1017                 if (ip_is_fragment(iph)) {
1018                         key_control->flags |= FLOW_DIS_IS_FRAGMENT;
1019
1020                         if (iph->frag_off & htons(IP_OFFSET)) {
1021                                 fdret = FLOW_DISSECT_RET_OUT_GOOD;
1022                                 break;
1023                         } else {
1024                                 key_control->flags |= FLOW_DIS_FIRST_FRAG;
1025                                 if (!(flags &
1026                                       FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) {
1027                                         fdret = FLOW_DISSECT_RET_OUT_GOOD;
1028                                         break;
1029                                 }
1030                         }
1031                 }
1032
1033                 __skb_flow_dissect_ipv4(skb, flow_dissector,
1034                                         target_container, data, iph);
1035
1036                 break;
1037         }
1038         case htons(ETH_P_IPV6): {
1039                 const struct ipv6hdr *iph;
1040                 struct ipv6hdr _iph;
1041
1042                 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
1043                 if (!iph) {
1044                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1045                         break;
1046                 }
1047
1048                 ip_proto = iph->nexthdr;
1049                 nhoff += sizeof(struct ipv6hdr);
1050
1051                 if (dissector_uses_key(flow_dissector,
1052                                        FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
1053                         key_addrs = skb_flow_dissector_target(flow_dissector,
1054                                                               FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1055                                                               target_container);
1056
1057                         memcpy(&key_addrs->v6addrs, &iph->saddr,
1058                                sizeof(key_addrs->v6addrs));
1059                         key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1060                 }
1061
1062                 if ((dissector_uses_key(flow_dissector,
1063                                         FLOW_DISSECTOR_KEY_FLOW_LABEL) ||
1064                      (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) &&
1065                     ip6_flowlabel(iph)) {
1066                         __be32 flow_label = ip6_flowlabel(iph);
1067
1068                         if (dissector_uses_key(flow_dissector,
1069                                                FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
1070                                 key_tags = skb_flow_dissector_target(flow_dissector,
1071                                                                      FLOW_DISSECTOR_KEY_FLOW_LABEL,
1072                                                                      target_container);
1073                                 key_tags->flow_label = ntohl(flow_label);
1074                         }
1075                         if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) {
1076                                 fdret = FLOW_DISSECT_RET_OUT_GOOD;
1077                                 break;
1078                         }
1079                 }
1080
1081                 __skb_flow_dissect_ipv6(skb, flow_dissector,
1082                                         target_container, data, iph);
1083
1084                 break;
1085         }
1086         case htons(ETH_P_8021AD):
1087         case htons(ETH_P_8021Q): {
1088                 const struct vlan_hdr *vlan = NULL;
1089                 struct vlan_hdr _vlan;
1090                 __be16 saved_vlan_tpid = proto;
1091
1092                 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
1093                     skb && skb_vlan_tag_present(skb)) {
1094                         proto = skb->protocol;
1095                 } else {
1096                         vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
1097                                                     data, hlen, &_vlan);
1098                         if (!vlan) {
1099                                 fdret = FLOW_DISSECT_RET_OUT_BAD;
1100                                 break;
1101                         }
1102
1103                         proto = vlan->h_vlan_encapsulated_proto;
1104                         nhoff += sizeof(*vlan);
1105                 }
1106
1107                 if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
1108                         dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
1109                 } else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
1110                         dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
1111                 } else {
1112                         fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1113                         break;
1114                 }
1115
1116                 if (dissector_uses_key(flow_dissector, dissector_vlan)) {
1117                         key_vlan = skb_flow_dissector_target(flow_dissector,
1118                                                              dissector_vlan,
1119                                                              target_container);
1120
1121                         if (!vlan) {
1122                                 key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
1123                                 key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb);
1124                         } else {
1125                                 key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
1126                                         VLAN_VID_MASK;
1127                                 key_vlan->vlan_priority =
1128                                         (ntohs(vlan->h_vlan_TCI) &
1129                                          VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
1130                         }
1131                         key_vlan->vlan_tpid = saved_vlan_tpid;
1132                 }
1133
1134                 fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1135                 break;
1136         }
1137         case htons(ETH_P_PPP_SES): {
1138                 struct {
1139                         struct pppoe_hdr hdr;
1140                         __be16 proto;
1141                 } *hdr, _hdr;
1142                 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
1143                 if (!hdr) {
1144                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1145                         break;
1146                 }
1147
1148                 proto = hdr->proto;
1149                 nhoff += PPPOE_SES_HLEN;
1150                 switch (proto) {
1151                 case htons(PPP_IP):
1152                         proto = htons(ETH_P_IP);
1153                         fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1154                         break;
1155                 case htons(PPP_IPV6):
1156                         proto = htons(ETH_P_IPV6);
1157                         fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1158                         break;
1159                 default:
1160                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1161                         break;
1162                 }
1163                 break;
1164         }
1165         case htons(ETH_P_TIPC): {
1166                 struct tipc_basic_hdr *hdr, _hdr;
1167
1168                 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr),
1169                                            data, hlen, &_hdr);
1170                 if (!hdr) {
1171                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1172                         break;
1173                 }
1174
1175                 if (dissector_uses_key(flow_dissector,
1176                                        FLOW_DISSECTOR_KEY_TIPC)) {
1177                         key_addrs = skb_flow_dissector_target(flow_dissector,
1178                                                               FLOW_DISSECTOR_KEY_TIPC,
1179                                                               target_container);
1180                         key_addrs->tipckey.key = tipc_hdr_rps_key(hdr);
1181                         key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC;
1182                 }
1183                 fdret = FLOW_DISSECT_RET_OUT_GOOD;
1184                 break;
1185         }
1186
1187         case htons(ETH_P_MPLS_UC):
1188         case htons(ETH_P_MPLS_MC):
1189                 fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
1190                                                 target_container, data,
1191                                                 nhoff, hlen);
1192                 break;
1193         case htons(ETH_P_FCOE):
1194                 if ((hlen - nhoff) < FCOE_HEADER_LEN) {
1195                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1196                         break;
1197                 }
1198
1199                 nhoff += FCOE_HEADER_LEN;
1200                 fdret = FLOW_DISSECT_RET_OUT_GOOD;
1201                 break;
1202
1203         case htons(ETH_P_ARP):
1204         case htons(ETH_P_RARP):
1205                 fdret = __skb_flow_dissect_arp(skb, flow_dissector,
1206                                                target_container, data,
1207                                                nhoff, hlen);
1208                 break;
1209
1210         case htons(ETH_P_BATMAN):
1211                 fdret = __skb_flow_dissect_batadv(skb, key_control, data,
1212                                                   &proto, &nhoff, hlen, flags);
1213                 break;
1214
1215         default:
1216                 fdret = FLOW_DISSECT_RET_OUT_BAD;
1217                 break;
1218         }
1219
1220         /* Process result of proto processing */
1221         switch (fdret) {
1222         case FLOW_DISSECT_RET_OUT_GOOD:
1223                 goto out_good;
1224         case FLOW_DISSECT_RET_PROTO_AGAIN:
1225                 if (skb_flow_dissect_allowed(&num_hdrs))
1226                         goto proto_again;
1227                 goto out_good;
1228         case FLOW_DISSECT_RET_CONTINUE:
1229         case FLOW_DISSECT_RET_IPPROTO_AGAIN:
1230                 break;
1231         case FLOW_DISSECT_RET_OUT_BAD:
1232         default:
1233                 goto out_bad;
1234         }
1235
1236 ip_proto_again:
1237         fdret = FLOW_DISSECT_RET_CONTINUE;
1238
1239         switch (ip_proto) {
1240         case IPPROTO_GRE:
1241                 fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
1242                                                target_container, data,
1243                                                &proto, &nhoff, &hlen, flags);
1244                 break;
1245
1246         case NEXTHDR_HOP:
1247         case NEXTHDR_ROUTING:
1248         case NEXTHDR_DEST: {
1249                 u8 _opthdr[2], *opthdr;
1250
1251                 if (proto != htons(ETH_P_IPV6))
1252                         break;
1253
1254                 opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr),
1255                                               data, hlen, &_opthdr);
1256                 if (!opthdr) {
1257                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1258                         break;
1259                 }
1260
1261                 ip_proto = opthdr[0];
1262                 nhoff += (opthdr[1] + 1) << 3;
1263
1264                 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
1265                 break;
1266         }
1267         case NEXTHDR_FRAGMENT: {
1268                 struct frag_hdr _fh, *fh;
1269
1270                 if (proto != htons(ETH_P_IPV6))
1271                         break;
1272
1273                 fh = __skb_header_pointer(skb, nhoff, sizeof(_fh),
1274                                           data, hlen, &_fh);
1275
1276                 if (!fh) {
1277                         fdret = FLOW_DISSECT_RET_OUT_BAD;
1278                         break;
1279                 }
1280
1281                 key_control->flags |= FLOW_DIS_IS_FRAGMENT;
1282
1283                 nhoff += sizeof(_fh);
1284                 ip_proto = fh->nexthdr;
1285
1286                 if (!(fh->frag_off & htons(IP6_OFFSET))) {
1287                         key_control->flags |= FLOW_DIS_FIRST_FRAG;
1288                         if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) {
1289                                 fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN;
1290                                 break;
1291                         }
1292                 }
1293
1294                 fdret = FLOW_DISSECT_RET_OUT_GOOD;
1295                 break;
1296         }
1297         case IPPROTO_IPIP:
1298                 proto = htons(ETH_P_IP);
1299
1300                 key_control->flags |= FLOW_DIS_ENCAPSULATION;
1301                 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
1302                         fdret = FLOW_DISSECT_RET_OUT_GOOD;
1303                         break;
1304                 }
1305
1306                 fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1307                 break;
1308
1309         case IPPROTO_IPV6:
1310                 proto = htons(ETH_P_IPV6);
1311
1312                 key_control->flags |= FLOW_DIS_ENCAPSULATION;
1313                 if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) {
1314                         fdret = FLOW_DISSECT_RET_OUT_GOOD;
1315                         break;
1316                 }
1317
1318                 fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1319                 break;
1320
1321
1322         case IPPROTO_MPLS:
1323                 proto = htons(ETH_P_MPLS_UC);
1324                 fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
1325                 break;
1326
1327         case IPPROTO_TCP:
1328                 __skb_flow_dissect_tcp(skb, flow_dissector, target_container,
1329                                        data, nhoff, hlen);
1330                 break;
1331
1332         default:
1333                 break;
1334         }
1335
1336         if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
1337             !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
1338                 key_ports = skb_flow_dissector_target(flow_dissector,
1339                                                       FLOW_DISSECTOR_KEY_PORTS,
1340                                                       target_container);
1341                 key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
1342                                                         data, hlen);
1343         }
1344
1345         if (dissector_uses_key(flow_dissector,
1346                                FLOW_DISSECTOR_KEY_ICMP)) {
1347                 key_icmp = skb_flow_dissector_target(flow_dissector,
1348                                                      FLOW_DISSECTOR_KEY_ICMP,
1349                                                      target_container);
1350                 key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
1351         }
1352
1353         /* Process result of IP proto processing */
1354         switch (fdret) {
1355         case FLOW_DISSECT_RET_PROTO_AGAIN:
1356                 if (skb_flow_dissect_allowed(&num_hdrs))
1357                         goto proto_again;
1358                 break;
1359         case FLOW_DISSECT_RET_IPPROTO_AGAIN:
1360                 if (skb_flow_dissect_allowed(&num_hdrs))
1361                         goto ip_proto_again;
1362                 break;
1363         case FLOW_DISSECT_RET_OUT_GOOD:
1364         case FLOW_DISSECT_RET_CONTINUE:
1365                 break;
1366         case FLOW_DISSECT_RET_OUT_BAD:
1367         default:
1368                 goto out_bad;
1369         }
1370
1371 out_good:
1372         ret = true;
1373
1374 out:
1375         key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
1376         key_basic->n_proto = proto;
1377         key_basic->ip_proto = ip_proto;
1378
1379         return ret;
1380
1381 out_bad:
1382         ret = false;
1383         goto out;
1384 }
1385 EXPORT_SYMBOL(__skb_flow_dissect);
1386
1387 static u32 hashrnd __read_mostly;
1388 static __always_inline void __flow_hash_secret_init(void)
1389 {
1390         net_get_random_once(&hashrnd, sizeof(hashrnd));
1391 }
1392
1393 static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
1394                                              u32 keyval)
1395 {
1396         return jhash2(words, length, keyval);
1397 }
1398
1399 static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
1400 {
1401         const void *p = flow;
1402
1403         BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
1404         return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
1405 }
1406
1407 static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
1408 {
1409         size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
1410         BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
1411         /* flow.addrs MUST be the last member in struct flow_keys because
1412          * different L3 protocols have different address length
1413          */
1414         BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
1415                      sizeof(*flow) - sizeof(flow->addrs));
1416
1417         switch (flow->control.addr_type) {
1418         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1419                 diff -= sizeof(flow->addrs.v4addrs);
1420                 break;
1421         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1422                 diff -= sizeof(flow->addrs.v6addrs);
1423                 break;
1424         case FLOW_DISSECTOR_KEY_TIPC:
1425                 diff -= sizeof(flow->addrs.tipckey);
1426                 break;
1427         }
1428         return (sizeof(*flow) - diff) / sizeof(u32);
1429 }
1430
1431 __be32 flow_get_u32_src(const struct flow_keys *flow)
1432 {
1433         switch (flow->control.addr_type) {
1434         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1435                 return flow->addrs.v4addrs.src;
1436         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1437                 return (__force __be32)ipv6_addr_hash(
1438                         &flow->addrs.v6addrs.src);
1439         case FLOW_DISSECTOR_KEY_TIPC:
1440                 return flow->addrs.tipckey.key;
1441         default:
1442                 return 0;
1443         }
1444 }
1445 EXPORT_SYMBOL(flow_get_u32_src);
1446
1447 __be32 flow_get_u32_dst(const struct flow_keys *flow)
1448 {
1449         switch (flow->control.addr_type) {
1450         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1451                 return flow->addrs.v4addrs.dst;
1452         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1453                 return (__force __be32)ipv6_addr_hash(
1454                         &flow->addrs.v6addrs.dst);
1455         default:
1456                 return 0;
1457         }
1458 }
1459 EXPORT_SYMBOL(flow_get_u32_dst);
1460
1461 /* Sort the source and destination IP (and the ports if the IP are the same),
1462  * to have consistent hash within the two directions
1463  */
1464 static inline void __flow_hash_consistentify(struct flow_keys *keys)
1465 {
1466         int addr_diff, i;
1467
1468         switch (keys->control.addr_type) {
1469         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1470                 addr_diff = (__force u32)keys->addrs.v4addrs.dst -
1471                             (__force u32)keys->addrs.v4addrs.src;
1472                 if ((addr_diff < 0) ||
1473                     (addr_diff == 0 &&
1474                      ((__force u16)keys->ports.dst <
1475                       (__force u16)keys->ports.src))) {
1476                         swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
1477                         swap(keys->ports.src, keys->ports.dst);
1478                 }
1479                 break;
1480         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1481                 addr_diff = memcmp(&keys->addrs.v6addrs.dst,
1482                                    &keys->addrs.v6addrs.src,
1483                                    sizeof(keys->addrs.v6addrs.dst));
1484                 if ((addr_diff < 0) ||
1485                     (addr_diff == 0 &&
1486                      ((__force u16)keys->ports.dst <
1487                       (__force u16)keys->ports.src))) {
1488                         for (i = 0; i < 4; i++)
1489                                 swap(keys->addrs.v6addrs.src.s6_addr32[i],
1490                                      keys->addrs.v6addrs.dst.s6_addr32[i]);
1491                         swap(keys->ports.src, keys->ports.dst);
1492                 }
1493                 break;
1494         }
1495 }
1496
1497 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
1498 {
1499         u32 hash;
1500
1501         __flow_hash_consistentify(keys);
1502
1503         hash = __flow_hash_words(flow_keys_hash_start(keys),
1504                                  flow_keys_hash_length(keys), keyval);
1505         if (!hash)
1506                 hash = 1;
1507
1508         return hash;
1509 }
1510
1511 u32 flow_hash_from_keys(struct flow_keys *keys)
1512 {
1513         __flow_hash_secret_init();
1514         return __flow_hash_from_keys(keys, hashrnd);
1515 }
1516 EXPORT_SYMBOL(flow_hash_from_keys);
1517
1518 static inline u32 ___skb_get_hash(const struct sk_buff *skb,
1519                                   struct flow_keys *keys, u32 keyval)
1520 {
1521         skb_flow_dissect_flow_keys(skb, keys,
1522                                    FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
1523
1524         return __flow_hash_from_keys(keys, keyval);
1525 }
1526
1527 struct _flow_keys_digest_data {
1528         __be16  n_proto;
1529         u8      ip_proto;
1530         u8      padding;
1531         __be32  ports;
1532         __be32  src;
1533         __be32  dst;
1534 };
1535
1536 void make_flow_keys_digest(struct flow_keys_digest *digest,
1537                            const struct flow_keys *flow)
1538 {
1539         struct _flow_keys_digest_data *data =
1540             (struct _flow_keys_digest_data *)digest;
1541
1542         BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
1543
1544         memset(digest, 0, sizeof(*digest));
1545
1546         data->n_proto = flow->basic.n_proto;
1547         data->ip_proto = flow->basic.ip_proto;
1548         data->ports = flow->ports.ports;
1549         data->src = flow->addrs.v4addrs.src;
1550         data->dst = flow->addrs.v4addrs.dst;
1551 }
1552 EXPORT_SYMBOL(make_flow_keys_digest);
1553
1554 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
1555
1556 u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
1557 {
1558         struct flow_keys keys;
1559
1560         __flow_hash_secret_init();
1561
1562         memset(&keys, 0, sizeof(keys));
1563         __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
1564                            &keys, NULL, 0, 0, 0,
1565                            FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
1566
1567         return __flow_hash_from_keys(&keys, hashrnd);
1568 }
1569 EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
1570
1571 /**
1572  * __skb_get_hash: calculate a flow hash
1573  * @skb: sk_buff to calculate flow hash from
1574  *
1575  * This function calculates a flow hash based on src/dst addresses
1576  * and src/dst port numbers.  Sets hash in skb to non-zero hash value
1577  * on success, zero indicates no valid hash.  Also, sets l4_hash in skb
1578  * if hash is a canonical 4-tuple hash over transport ports.
1579  */
1580 void __skb_get_hash(struct sk_buff *skb)
1581 {
1582         struct flow_keys keys;
1583         u32 hash;
1584
1585         __flow_hash_secret_init();
1586
1587         hash = ___skb_get_hash(skb, &keys, hashrnd);
1588
1589         __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
1590 }
1591 EXPORT_SYMBOL(__skb_get_hash);
1592
1593 __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
1594 {
1595         struct flow_keys keys;
1596
1597         return ___skb_get_hash(skb, &keys, perturb);
1598 }
1599 EXPORT_SYMBOL(skb_get_hash_perturb);
1600
1601 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
1602                    const struct flow_keys_basic *keys, int hlen)
1603 {
1604         u32 poff = keys->control.thoff;
1605
1606         /* skip L4 headers for fragments after the first */
1607         if ((keys->control.flags & FLOW_DIS_IS_FRAGMENT) &&
1608             !(keys->control.flags & FLOW_DIS_FIRST_FRAG))
1609                 return poff;
1610
1611         switch (keys->basic.ip_proto) {
1612         case IPPROTO_TCP: {
1613                 /* access doff as u8 to avoid unaligned access */
1614                 const u8 *doff;
1615                 u8 _doff;
1616
1617                 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
1618                                             data, hlen, &_doff);
1619                 if (!doff)
1620                         return poff;
1621
1622                 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
1623                 break;
1624         }
1625         case IPPROTO_UDP:
1626         case IPPROTO_UDPLITE:
1627                 poff += sizeof(struct udphdr);
1628                 break;
1629         /* For the rest, we do not really care about header
1630          * extensions at this point for now.
1631          */
1632         case IPPROTO_ICMP:
1633                 poff += sizeof(struct icmphdr);
1634                 break;
1635         case IPPROTO_ICMPV6:
1636                 poff += sizeof(struct icmp6hdr);
1637                 break;
1638         case IPPROTO_IGMP:
1639                 poff += sizeof(struct igmphdr);
1640                 break;
1641         case IPPROTO_DCCP:
1642                 poff += sizeof(struct dccp_hdr);
1643                 break;
1644         case IPPROTO_SCTP:
1645                 poff += sizeof(struct sctphdr);
1646                 break;
1647         }
1648
1649         return poff;
1650 }
1651
1652 /**
1653  * skb_get_poff - get the offset to the payload
1654  * @skb: sk_buff to get the payload offset from
1655  *
1656  * The function will get the offset to the payload as far as it could
1657  * be dissected.  The main user is currently BPF, so that we can dynamically
1658  * truncate packets without needing to push actual payload to the user
1659  * space and can analyze headers only, instead.
1660  */
1661 u32 skb_get_poff(const struct sk_buff *skb)
1662 {
1663         struct flow_keys_basic keys;
1664
1665         if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
1666                                               NULL, 0, 0, 0, 0))
1667                 return 0;
1668
1669         return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
1670 }
1671
1672 __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
1673 {
1674         memset(keys, 0, sizeof(*keys));
1675
1676         memcpy(&keys->addrs.v6addrs.src, &fl6->saddr,
1677             sizeof(keys->addrs.v6addrs.src));
1678         memcpy(&keys->addrs.v6addrs.dst, &fl6->daddr,
1679             sizeof(keys->addrs.v6addrs.dst));
1680         keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1681         keys->ports.src = fl6->fl6_sport;
1682         keys->ports.dst = fl6->fl6_dport;
1683         keys->keyid.keyid = fl6->fl6_gre_key;
1684         keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
1685         keys->basic.ip_proto = fl6->flowi6_proto;
1686
1687         return flow_hash_from_keys(keys);
1688 }
1689 EXPORT_SYMBOL(__get_hash_from_flowi6);
1690
1691 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
1692         {
1693                 .key_id = FLOW_DISSECTOR_KEY_CONTROL,
1694                 .offset = offsetof(struct flow_keys, control),
1695         },
1696         {
1697                 .key_id = FLOW_DISSECTOR_KEY_BASIC,
1698                 .offset = offsetof(struct flow_keys, basic),
1699         },
1700         {
1701                 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1702                 .offset = offsetof(struct flow_keys, addrs.v4addrs),
1703         },
1704         {
1705                 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1706                 .offset = offsetof(struct flow_keys, addrs.v6addrs),
1707         },
1708         {
1709                 .key_id = FLOW_DISSECTOR_KEY_TIPC,
1710                 .offset = offsetof(struct flow_keys, addrs.tipckey),
1711         },
1712         {
1713                 .key_id = FLOW_DISSECTOR_KEY_PORTS,
1714                 .offset = offsetof(struct flow_keys, ports),
1715         },
1716         {
1717                 .key_id = FLOW_DISSECTOR_KEY_VLAN,
1718                 .offset = offsetof(struct flow_keys, vlan),
1719         },
1720         {
1721                 .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
1722                 .offset = offsetof(struct flow_keys, tags),
1723         },
1724         {
1725                 .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
1726                 .offset = offsetof(struct flow_keys, keyid),
1727         },
1728 };
1729
1730 static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = {
1731         {
1732                 .key_id = FLOW_DISSECTOR_KEY_CONTROL,
1733                 .offset = offsetof(struct flow_keys, control),
1734         },
1735         {
1736                 .key_id = FLOW_DISSECTOR_KEY_BASIC,
1737                 .offset = offsetof(struct flow_keys, basic),
1738         },
1739         {
1740                 .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
1741                 .offset = offsetof(struct flow_keys, addrs.v4addrs),
1742         },
1743         {
1744                 .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
1745                 .offset = offsetof(struct flow_keys, addrs.v6addrs),
1746         },
1747         {
1748                 .key_id = FLOW_DISSECTOR_KEY_PORTS,
1749                 .offset = offsetof(struct flow_keys, ports),
1750         },
1751 };
1752
1753 static const struct flow_dissector_key flow_keys_basic_dissector_keys[] = {
1754         {
1755                 .key_id = FLOW_DISSECTOR_KEY_CONTROL,
1756                 .offset = offsetof(struct flow_keys, control),
1757         },
1758         {
1759                 .key_id = FLOW_DISSECTOR_KEY_BASIC,
1760                 .offset = offsetof(struct flow_keys, basic),
1761         },
1762 };
1763
1764 struct flow_dissector flow_keys_dissector __read_mostly;
1765 EXPORT_SYMBOL(flow_keys_dissector);
1766
1767 struct flow_dissector flow_keys_basic_dissector __read_mostly;
1768 EXPORT_SYMBOL(flow_keys_basic_dissector);
1769
1770 static int __init init_default_flow_dissectors(void)
1771 {
1772         skb_flow_dissector_init(&flow_keys_dissector,
1773                                 flow_keys_dissector_keys,
1774                                 ARRAY_SIZE(flow_keys_dissector_keys));
1775         skb_flow_dissector_init(&flow_keys_dissector_symmetric,
1776                                 flow_keys_dissector_symmetric_keys,
1777                                 ARRAY_SIZE(flow_keys_dissector_symmetric_keys));
1778         skb_flow_dissector_init(&flow_keys_basic_dissector,
1779                                 flow_keys_basic_dissector_keys,
1780                                 ARRAY_SIZE(flow_keys_basic_dissector_keys));
1781         return 0;
1782 }
1783
1784 core_initcall(init_default_flow_dissectors);