Commit | Line | Data |
---|---|---|
187d0738 | 1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * | |
3 | * Copyright (c) 2019 Facebook | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * Include file for sample Host Bandwidth Manager (HBM) BPF programs | |
10 | */ | |
11 | #define KBUILD_MODNAME "foo" | |
187d0738 | 12 | #include <uapi/linux/bpf.h> |
13 | #include <uapi/linux/if_ether.h> | |
14 | #include <uapi/linux/if_packet.h> | |
15 | #include <uapi/linux/ip.h> | |
16 | #include <uapi/linux/ipv6.h> | |
17 | #include <uapi/linux/in.h> | |
18 | #include <uapi/linux/tcp.h> | |
19 | #include <uapi/linux/filter.h> | |
20 | #include <uapi/linux/pkt_cls.h> | |
21 | #include <net/ipv6.h> | |
22 | #include <net/inet_ecn.h> | |
7cf245a3 THJ |
23 | #include <bpf/bpf_endian.h> |
24 | #include <bpf/bpf_helpers.h> | |
187d0738 | 25 | #include "hbm.h" |
26 | ||
27 | #define DROP_PKT 0 | |
28 | #define ALLOW_PKT 1 | |
29 | #define TCP_ECN_OK 1 | |
71634d7f | 30 | #define CWR 2 |
187d0738 | 31 | |
c87f60a7 MR |
32 | #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging |
33 | #undef bpf_printk | |
187d0738 | 34 | #define bpf_printk(fmt, ...) |
35 | #endif | |
36 | ||
37 | #define INITIAL_CREDIT_PACKETS 100 | |
38 | #define MAX_BYTES_PER_PACKET 1500 | |
39 | #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) | |
40 | #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) | |
41 | #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) | |
42 | #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) | |
43 | #define LARGE_PKT_THRESH 120 | |
44 | #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) | |
45 | #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) | |
46 | ||
71634d7f | 47 | // Time base accounting for fq's EDT |
48 | #define BURST_SIZE_NS 100000 // 100us | |
49 | #define MARK_THRESH_NS 50000 // 50us | |
50 | #define DROP_THRESH_NS 500000 // 500us | |
51 | // Reserve 20us of queuing for small packets (less than 120 bytes) | |
52 | #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) | |
53 | #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) | |
54 | ||
187d0738 | 55 | // rate in bytes per ns << 20 |
56 | #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) | |
71634d7f | 57 | #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
58 | #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) | |
187d0738 | 59 | |
36b5d471 AN |
60 | struct { |
61 | __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); | |
62 | __type(key, struct bpf_cgroup_storage_key); | |
63 | __type(value, struct hbm_vqueue); | |
64 | } queue_state SEC(".maps"); | |
187d0738 | 65 | |
36b5d471 AN |
66 | struct { |
67 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
68 | __uint(max_entries, 1); | |
69 | __type(key, u32); | |
c5815ac7 | 70 | __type(value, struct hbm_queue_stats); |
36b5d471 | 71 | } queue_stats SEC(".maps"); |
187d0738 | 72 | |
73 | struct hbm_pkt_info { | |
d58c6f72 | 74 | int cwnd; |
75 | int rtt; | |
71634d7f | 76 | int packets_out; |
187d0738 | 77 | bool is_ip; |
78 | bool is_tcp; | |
79 | short ecn; | |
80 | }; | |
81 | ||
d58c6f72 | 82 | static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) |
83 | { | |
84 | struct bpf_sock *sk; | |
85 | struct bpf_tcp_sock *tp; | |
86 | ||
87 | sk = skb->sk; | |
88 | if (sk) { | |
89 | sk = bpf_sk_fullsock(sk); | |
90 | if (sk) { | |
91 | if (sk->protocol == IPPROTO_TCP) { | |
92 | tp = bpf_tcp_sock(sk); | |
93 | if (tp) { | |
94 | pkti->cwnd = tp->snd_cwnd; | |
95 | pkti->rtt = tp->srtt_us >> 3; | |
71634d7f | 96 | pkti->packets_out = tp->packets_out; |
d58c6f72 | 97 | return 0; |
98 | } | |
99 | } | |
100 | } | |
101 | } | |
71634d7f | 102 | pkti->cwnd = 0; |
103 | pkti->rtt = 0; | |
104 | pkti->packets_out = 0; | |
d58c6f72 | 105 | return 1; |
106 | } | |
107 | ||
71634d7f | 108 | static void hbm_get_pkt_info(struct __sk_buff *skb, |
109 | struct hbm_pkt_info *pkti) | |
187d0738 | 110 | { |
111 | struct iphdr iph; | |
112 | struct ipv6hdr *ip6h; | |
113 | ||
d58c6f72 | 114 | pkti->cwnd = 0; |
115 | pkti->rtt = 0; | |
187d0738 | 116 | bpf_skb_load_bytes(skb, 0, &iph, 12); |
117 | if (iph.version == 6) { | |
118 | ip6h = (struct ipv6hdr *)&iph; | |
119 | pkti->is_ip = true; | |
120 | pkti->is_tcp = (ip6h->nexthdr == 6); | |
121 | pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; | |
122 | } else if (iph.version == 4) { | |
123 | pkti->is_ip = true; | |
124 | pkti->is_tcp = (iph.protocol == 6); | |
125 | pkti->ecn = iph.tos & INET_ECN_MASK; | |
126 | } else { | |
127 | pkti->is_ip = false; | |
128 | pkti->is_tcp = false; | |
129 | pkti->ecn = 0; | |
130 | } | |
d58c6f72 | 131 | if (pkti->is_tcp) |
132 | get_tcp_info(skb, pkti); | |
187d0738 | 133 | } |
134 | ||
135 | static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) | |
136 | { | |
71634d7f | 137 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); |
138 | qdp->lasttime = bpf_ktime_get_ns(); | |
139 | qdp->credit = INIT_CREDIT; | |
140 | qdp->rate = rate * 128; | |
141 | } | |
142 | ||
143 | static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, | |
144 | int rate) | |
145 | { | |
146 | unsigned long long curtime; | |
147 | ||
148 | curtime = bpf_ktime_get_ns(); | |
149 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); | |
150 | qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst | |
151 | qdp->credit = 0; // not used | |
152 | qdp->rate = rate * 128; | |
187d0738 | 153 | } |
154 | ||
155 | static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, | |
156 | int len, | |
157 | unsigned long long curtime, | |
158 | bool congestion_flag, | |
d58c6f72 | 159 | bool drop_flag, |
160 | bool cwr_flag, | |
161 | bool ecn_ce_flag, | |
162 | struct hbm_pkt_info *pkti, | |
163 | int credit) | |
187d0738 | 164 | { |
d58c6f72 | 165 | int rv = ALLOW_PKT; |
166 | ||
187d0738 | 167 | if (qsp != NULL) { |
168 | // Following is needed for work conserving | |
169 | __sync_add_and_fetch(&(qsp->bytes_total), len); | |
170 | if (qsp->stats) { | |
171 | // Optionally update statistics | |
172 | if (qsp->firstPacketTime == 0) | |
173 | qsp->firstPacketTime = curtime; | |
174 | qsp->lastPacketTime = curtime; | |
175 | __sync_add_and_fetch(&(qsp->pkts_total), 1); | |
d58c6f72 | 176 | if (congestion_flag) { |
187d0738 | 177 | __sync_add_and_fetch(&(qsp->pkts_marked), 1); |
178 | __sync_add_and_fetch(&(qsp->bytes_marked), len); | |
179 | } | |
180 | if (drop_flag) { | |
181 | __sync_add_and_fetch(&(qsp->pkts_dropped), 1); | |
182 | __sync_add_and_fetch(&(qsp->bytes_dropped), | |
183 | len); | |
184 | } | |
d58c6f72 | 185 | if (ecn_ce_flag) |
186 | __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); | |
187 | if (pkti->cwnd) { | |
188 | __sync_add_and_fetch(&(qsp->sum_cwnd), | |
189 | pkti->cwnd); | |
190 | __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); | |
191 | } | |
192 | if (pkti->rtt) | |
193 | __sync_add_and_fetch(&(qsp->sum_rtt), | |
194 | pkti->rtt); | |
195 | __sync_add_and_fetch(&(qsp->sum_credit), credit); | |
196 | ||
197 | if (drop_flag) | |
198 | rv = DROP_PKT; | |
199 | if (cwr_flag) | |
200 | rv |= 2; | |
201 | if (rv == DROP_PKT) | |
202 | __sync_add_and_fetch(&(qsp->returnValCount[0]), | |
203 | 1); | |
204 | else if (rv == ALLOW_PKT) | |
205 | __sync_add_and_fetch(&(qsp->returnValCount[1]), | |
206 | 1); | |
207 | else if (rv == 2) | |
208 | __sync_add_and_fetch(&(qsp->returnValCount[2]), | |
209 | 1); | |
210 | else if (rv == 3) | |
211 | __sync_add_and_fetch(&(qsp->returnValCount[3]), | |
212 | 1); | |
187d0738 | 213 | } |
214 | } | |
215 | } |