Commit | Line | Data |
---|---|---|
187d0738 | 1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * | |
3 | * Copyright (c) 2019 Facebook | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * Include file for sample Host Bandwidth Manager (HBM) BPF programs | |
10 | */ | |
11 | #define KBUILD_MODNAME "foo" | |
12 | #include <stddef.h> | |
13 | #include <stdbool.h> | |
14 | #include <uapi/linux/bpf.h> | |
15 | #include <uapi/linux/if_ether.h> | |
16 | #include <uapi/linux/if_packet.h> | |
17 | #include <uapi/linux/ip.h> | |
18 | #include <uapi/linux/ipv6.h> | |
19 | #include <uapi/linux/in.h> | |
20 | #include <uapi/linux/tcp.h> | |
21 | #include <uapi/linux/filter.h> | |
22 | #include <uapi/linux/pkt_cls.h> | |
23 | #include <net/ipv6.h> | |
24 | #include <net/inet_ecn.h> | |
25 | #include "bpf_endian.h" | |
26 | #include "bpf_helpers.h" | |
27 | #include "hbm.h" | |
28 | ||
29 | #define DROP_PKT 0 | |
30 | #define ALLOW_PKT 1 | |
31 | #define TCP_ECN_OK 1 | |
71634d7f | 32 | #define CWR 2 |
187d0738 | 33 | |
c87f60a7 MR |
34 | #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging |
35 | #undef bpf_printk | |
187d0738 | 36 | #define bpf_printk(fmt, ...) |
37 | #endif | |
38 | ||
39 | #define INITIAL_CREDIT_PACKETS 100 | |
40 | #define MAX_BYTES_PER_PACKET 1500 | |
41 | #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) | |
42 | #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) | |
43 | #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) | |
44 | #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) | |
45 | #define LARGE_PKT_THRESH 120 | |
46 | #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) | |
47 | #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) | |
48 | ||
71634d7f | 49 | // Time base accounting for fq's EDT |
50 | #define BURST_SIZE_NS 100000 // 100us | |
51 | #define MARK_THRESH_NS 50000 // 50us | |
52 | #define DROP_THRESH_NS 500000 // 500us | |
53 | // Reserve 20us of queuing for small packets (less than 120 bytes) | |
54 | #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) | |
55 | #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) | |
56 | ||
187d0738 | 57 | // rate in bytes per ns << 20 |
58 | #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) | |
71634d7f | 59 | #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
60 | #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) | |
187d0738 | 61 | |
62 | struct bpf_map_def SEC("maps") queue_state = { | |
63 | .type = BPF_MAP_TYPE_CGROUP_STORAGE, | |
64 | .key_size = sizeof(struct bpf_cgroup_storage_key), | |
65 | .value_size = sizeof(struct hbm_vqueue), | |
66 | }; | |
67 | BPF_ANNOTATE_KV_PAIR(queue_state, struct bpf_cgroup_storage_key, | |
68 | struct hbm_vqueue); | |
69 | ||
70 | struct bpf_map_def SEC("maps") queue_stats = { | |
71 | .type = BPF_MAP_TYPE_ARRAY, | |
72 | .key_size = sizeof(u32), | |
73 | .value_size = sizeof(struct hbm_queue_stats), | |
74 | .max_entries = 1, | |
75 | }; | |
76 | BPF_ANNOTATE_KV_PAIR(queue_stats, int, struct hbm_queue_stats); | |
77 | ||
78 | struct hbm_pkt_info { | |
d58c6f72 | 79 | int cwnd; |
80 | int rtt; | |
71634d7f | 81 | int packets_out; |
187d0738 | 82 | bool is_ip; |
83 | bool is_tcp; | |
84 | short ecn; | |
85 | }; | |
86 | ||
d58c6f72 | 87 | static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) |
88 | { | |
89 | struct bpf_sock *sk; | |
90 | struct bpf_tcp_sock *tp; | |
91 | ||
92 | sk = skb->sk; | |
93 | if (sk) { | |
94 | sk = bpf_sk_fullsock(sk); | |
95 | if (sk) { | |
96 | if (sk->protocol == IPPROTO_TCP) { | |
97 | tp = bpf_tcp_sock(sk); | |
98 | if (tp) { | |
99 | pkti->cwnd = tp->snd_cwnd; | |
100 | pkti->rtt = tp->srtt_us >> 3; | |
71634d7f | 101 | pkti->packets_out = tp->packets_out; |
d58c6f72 | 102 | return 0; |
103 | } | |
104 | } | |
105 | } | |
106 | } | |
71634d7f | 107 | pkti->cwnd = 0; |
108 | pkti->rtt = 0; | |
109 | pkti->packets_out = 0; | |
d58c6f72 | 110 | return 1; |
111 | } | |
112 | ||
71634d7f | 113 | static void hbm_get_pkt_info(struct __sk_buff *skb, |
114 | struct hbm_pkt_info *pkti) | |
187d0738 | 115 | { |
116 | struct iphdr iph; | |
117 | struct ipv6hdr *ip6h; | |
118 | ||
d58c6f72 | 119 | pkti->cwnd = 0; |
120 | pkti->rtt = 0; | |
187d0738 | 121 | bpf_skb_load_bytes(skb, 0, &iph, 12); |
122 | if (iph.version == 6) { | |
123 | ip6h = (struct ipv6hdr *)&iph; | |
124 | pkti->is_ip = true; | |
125 | pkti->is_tcp = (ip6h->nexthdr == 6); | |
126 | pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; | |
127 | } else if (iph.version == 4) { | |
128 | pkti->is_ip = true; | |
129 | pkti->is_tcp = (iph.protocol == 6); | |
130 | pkti->ecn = iph.tos & INET_ECN_MASK; | |
131 | } else { | |
132 | pkti->is_ip = false; | |
133 | pkti->is_tcp = false; | |
134 | pkti->ecn = 0; | |
135 | } | |
d58c6f72 | 136 | if (pkti->is_tcp) |
137 | get_tcp_info(skb, pkti); | |
187d0738 | 138 | } |
139 | ||
140 | static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) | |
141 | { | |
71634d7f | 142 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); |
143 | qdp->lasttime = bpf_ktime_get_ns(); | |
144 | qdp->credit = INIT_CREDIT; | |
145 | qdp->rate = rate * 128; | |
146 | } | |
147 | ||
148 | static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, | |
149 | int rate) | |
150 | { | |
151 | unsigned long long curtime; | |
152 | ||
153 | curtime = bpf_ktime_get_ns(); | |
154 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); | |
155 | qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst | |
156 | qdp->credit = 0; // not used | |
157 | qdp->rate = rate * 128; | |
187d0738 | 158 | } |
159 | ||
160 | static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, | |
161 | int len, | |
162 | unsigned long long curtime, | |
163 | bool congestion_flag, | |
d58c6f72 | 164 | bool drop_flag, |
165 | bool cwr_flag, | |
166 | bool ecn_ce_flag, | |
167 | struct hbm_pkt_info *pkti, | |
168 | int credit) | |
187d0738 | 169 | { |
d58c6f72 | 170 | int rv = ALLOW_PKT; |
171 | ||
187d0738 | 172 | if (qsp != NULL) { |
173 | // Following is needed for work conserving | |
174 | __sync_add_and_fetch(&(qsp->bytes_total), len); | |
175 | if (qsp->stats) { | |
176 | // Optionally update statistics | |
177 | if (qsp->firstPacketTime == 0) | |
178 | qsp->firstPacketTime = curtime; | |
179 | qsp->lastPacketTime = curtime; | |
180 | __sync_add_and_fetch(&(qsp->pkts_total), 1); | |
d58c6f72 | 181 | if (congestion_flag) { |
187d0738 | 182 | __sync_add_and_fetch(&(qsp->pkts_marked), 1); |
183 | __sync_add_and_fetch(&(qsp->bytes_marked), len); | |
184 | } | |
185 | if (drop_flag) { | |
186 | __sync_add_and_fetch(&(qsp->pkts_dropped), 1); | |
187 | __sync_add_and_fetch(&(qsp->bytes_dropped), | |
188 | len); | |
189 | } | |
d58c6f72 | 190 | if (ecn_ce_flag) |
191 | __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); | |
192 | if (pkti->cwnd) { | |
193 | __sync_add_and_fetch(&(qsp->sum_cwnd), | |
194 | pkti->cwnd); | |
195 | __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); | |
196 | } | |
197 | if (pkti->rtt) | |
198 | __sync_add_and_fetch(&(qsp->sum_rtt), | |
199 | pkti->rtt); | |
200 | __sync_add_and_fetch(&(qsp->sum_credit), credit); | |
201 | ||
202 | if (drop_flag) | |
203 | rv = DROP_PKT; | |
204 | if (cwr_flag) | |
205 | rv |= 2; | |
206 | if (rv == DROP_PKT) | |
207 | __sync_add_and_fetch(&(qsp->returnValCount[0]), | |
208 | 1); | |
209 | else if (rv == ALLOW_PKT) | |
210 | __sync_add_and_fetch(&(qsp->returnValCount[1]), | |
211 | 1); | |
212 | else if (rv == 2) | |
213 | __sync_add_and_fetch(&(qsp->returnValCount[2]), | |
214 | 1); | |
215 | else if (rv == 3) | |
216 | __sync_add_and_fetch(&(qsp->returnValCount[3]), | |
217 | 1); | |
187d0738 | 218 | } |
219 | } | |
220 | } |