Commit | Line | Data |
---|---|---|
ddc64d0a | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
aecd67b6 JDB |
2 | /* include/net/xdp.h |
3 | * | |
4 | * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. | |
aecd67b6 JDB |
5 | */ |
6 | #ifndef __LINUX_NET_XDP_H__ | |
7 | #define __LINUX_NET_XDP_H__ | |
8 | ||
f95f0f95 | 9 | #include <linux/skbuff.h> /* skb_shared_info */ |
d3d854fd | 10 | #include <uapi/linux/netdev.h> |
f95f0f95 | 11 | |
aecd67b6 JDB |
12 | /** |
13 | * DOC: XDP RX-queue information | |
14 | * | |
15 | * The XDP RX-queue info (xdp_rxq_info) is associated with the driver | |
16 | * level RX-ring queues. It is information that is specific to how | |
17 | * the driver have configured a given RX-ring queue. | |
18 | * | |
6bbc7103 | 19 | * Each xdp_buff frame received in the driver carries a (pointer) |
aecd67b6 JDB |
20 | * reference to this xdp_rxq_info structure. This provides the XDP |
21 | * data-path read-access to RX-info for both kernel and bpf-side | |
22 | * (limited subset). | |
23 | * | |
24 | * For now, direct access is only safe while running in NAPI/softirq | |
6bbc7103 | 25 | * context. Contents are read-mostly and must not be updated during |
aecd67b6 JDB |
26 | * driver NAPI/softirq poll. |
27 | * | |
28 | * The driver usage API is a register and unregister API. | |
29 | * | |
30 | * The struct is not directly tied to the XDP prog. A new XDP prog | |
31 | * can be attached as long as it doesn't change the underlying | |
32 | * RX-ring. If the RX-ring does change significantly, the NIC driver | |
33 | * naturally need to stop the RX-ring before purging and reallocating | |
6bbc7103 KJ |
34 | * memory. In that process the driver MUST call unregister (which |
35 | * also applies for driver shutdown and unload). The register API is | |
aecd67b6 JDB |
36 | * also mandatory during RX-ring setup. |
37 | */ | |
38 | ||
5ab073ff JDB |
39 | enum xdp_mem_type { |
40 | MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ | |
41 | MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ | |
57d0a1c1 | 42 | MEM_TYPE_PAGE_POOL, |
2b43470a | 43 | MEM_TYPE_XSK_BUFF_POOL, |
5ab073ff JDB |
44 | MEM_TYPE_MAX, |
45 | }; | |
46 | ||
d3d854fd JK |
47 | typedef u32 xdp_features_t; |
48 | ||
42b33468 | 49 | /* XDP flags for ndo_xdp_xmit */ |
42b33468 JDB |
50 | #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ |
51 | #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH | |
52 | ||
5ab073ff JDB |
53 | struct xdp_mem_info { |
54 | u32 type; /* enum xdp_mem_type, but known size type */ | |
8d5d8852 | 55 | u32 id; |
5ab073ff JDB |
56 | }; |
57 | ||
57d0a1c1 JDB |
58 | struct page_pool; |
59 | ||
aecd67b6 JDB |
60 | struct xdp_rxq_info { |
61 | struct net_device *dev; | |
62 | u32 queue_index; | |
63 | u32 reg_state; | |
5ab073ff | 64 | struct xdp_mem_info mem; |
b02e5a0e | 65 | unsigned int napi_id; |
bf25146a | 66 | u32 frag_size; |
aecd67b6 JDB |
67 | } ____cacheline_aligned; /* perf critical, avoid false-sharing */ |
68 | ||
64b59025 DA |
69 | struct xdp_txq_info { |
70 | struct net_device *dev; | |
71 | }; | |
72 | ||
2e88d4ff | 73 | enum xdp_buff_flags { |
d65a1906 LB |
74 | XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ |
75 | XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under | |
76 | * pressure | |
77 | */ | |
2e88d4ff LB |
78 | }; |
79 | ||
106ca27f JDB |
80 | struct xdp_buff { |
81 | void *data; | |
82 | void *data_end; | |
83 | void *data_meta; | |
84 | void *data_hard_start; | |
85 | struct xdp_rxq_info *rxq; | |
64b59025 | 86 | struct xdp_txq_info *txq; |
f95f0f95 | 87 | u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ |
2e88d4ff | 88 | u32 flags; /* supported values defined in xdp_buff_flags */ |
106ca27f | 89 | }; |
5ab073ff | 90 | |
2e88d4ff LB |
91 | static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) |
92 | { | |
93 | return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); | |
94 | } | |
95 | ||
96 | static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) | |
97 | { | |
98 | xdp->flags |= XDP_FLAGS_HAS_FRAGS; | |
99 | } | |
100 | ||
101 | static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) | |
102 | { | |
103 | xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; | |
104 | } | |
105 | ||
d65a1906 LB |
106 | static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) |
107 | { | |
108 | return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); | |
109 | } | |
110 | ||
111 | static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) | |
112 | { | |
113 | xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; | |
114 | } | |
115 | ||
43b5169d LB |
116 | static __always_inline void |
117 | xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) | |
118 | { | |
119 | xdp->frame_sz = frame_sz; | |
120 | xdp->rxq = rxq; | |
2e88d4ff | 121 | xdp->flags = 0; |
43b5169d LB |
122 | } |
123 | ||
be9df4af LB |
124 | static __always_inline void |
125 | xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, | |
126 | int headroom, int data_len, const bool meta_valid) | |
127 | { | |
128 | unsigned char *data = hard_start + headroom; | |
129 | ||
130 | xdp->data_hard_start = hard_start; | |
131 | xdp->data = data; | |
132 | xdp->data_end = data + data_len; | |
133 | xdp->data_meta = meta_valid ? data : data + 1; | |
134 | } | |
135 | ||
f95f0f95 JDB |
136 | /* Reserve memory area at end-of data area. |
137 | * | |
138 | * This macro reserves tailroom in the XDP buffer by limiting the | |
139 | * XDP/BPF data access to data_hard_end. Notice same area (and size) | |
140 | * is used for XDP_PASS, when constructing the SKB via build_skb(). | |
141 | */ | |
142 | #define xdp_data_hard_end(xdp) \ | |
143 | ((xdp)->data_hard_start + (xdp)->frame_sz - \ | |
144 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) | |
145 | ||
2f0bc54b LB |
146 | static inline struct skb_shared_info * |
147 | xdp_get_shared_info_from_buff(struct xdp_buff *xdp) | |
148 | { | |
149 | return (struct skb_shared_info *)xdp_data_hard_end(xdp); | |
150 | } | |
151 | ||
0165cc81 LB |
152 | static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) |
153 | { | |
154 | unsigned int len = xdp->data_end - xdp->data; | |
155 | struct skb_shared_info *sinfo; | |
156 | ||
157 | if (likely(!xdp_buff_has_frags(xdp))) | |
158 | goto out; | |
159 | ||
160 | sinfo = xdp_get_shared_info_from_buff(xdp); | |
161 | len += sinfo->xdp_frags_size; | |
162 | out: | |
163 | return len; | |
164 | } | |
165 | ||
c0048cff JDB |
166 | struct xdp_frame { |
167 | void *data; | |
168 | u16 len; | |
169 | u16 headroom; | |
b860a1b9 | 170 | u32 metasize; /* uses lower 8-bits */ |
c0048cff JDB |
171 | /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, |
172 | * while mem info is valid on remote CPU. | |
173 | */ | |
174 | struct xdp_mem_info mem; | |
70280ed9 | 175 | struct net_device *dev_rx; /* used by cpumap */ |
b860a1b9 | 176 | u32 frame_sz; |
2e88d4ff | 177 | u32 flags; /* supported values defined in xdp_buff_flags */ |
c0048cff JDB |
178 | }; |
179 | ||
2e88d4ff LB |
180 | static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) |
181 | { | |
182 | return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); | |
183 | } | |
184 | ||
d65a1906 LB |
185 | static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) |
186 | { | |
187 | return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); | |
188 | } | |
189 | ||
89653987 LB |
190 | #define XDP_BULK_QUEUE_SIZE 16 |
191 | struct xdp_frame_bulk { | |
192 | int count; | |
193 | void *xa; | |
194 | void *q[XDP_BULK_QUEUE_SIZE]; | |
195 | }; | |
196 | ||
197 | static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) | |
198 | { | |
199 | /* bq->count will be zero'ed when bq->xa gets updated */ | |
200 | bq->xa = NULL; | |
201 | } | |
dee72f8a | 202 | |
2f0bc54b LB |
203 | static inline struct skb_shared_info * |
204 | xdp_get_shared_info_from_frame(struct xdp_frame *frame) | |
205 | { | |
206 | void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); | |
207 | ||
208 | return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - | |
209 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); | |
210 | } | |
211 | ||
92164774 | 212 | struct xdp_cpumap_stats { |
28b1520e | 213 | unsigned int redirect; |
92164774 LB |
214 | unsigned int pass; |
215 | unsigned int drop; | |
216 | }; | |
217 | ||
a8d5b4ab TM |
218 | /* Clear kernel pointers in xdp_frame */ |
219 | static inline void xdp_scrub_frame(struct xdp_frame *frame) | |
220 | { | |
221 | frame->data = NULL; | |
222 | frame->dev_rx = NULL; | |
223 | } | |
224 | ||
d65a1906 LB |
225 | static inline void |
226 | xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, | |
227 | unsigned int size, unsigned int truesize, | |
228 | bool pfmemalloc) | |
229 | { | |
230 | skb_shinfo(skb)->nr_frags = nr_frags; | |
231 | ||
232 | skb->len += size; | |
233 | skb->data_len += size; | |
234 | skb->truesize += truesize; | |
235 | skb->pfmemalloc |= pfmemalloc; | |
236 | } | |
237 | ||
34cc0b33 JDB |
238 | /* Avoids inlining WARN macro in fast-path */ |
239 | void xdp_warn(const char *msg, const char *func, const int line); | |
240 | #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) | |
241 | ||
b0d1beef | 242 | struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); |
97a0e1ea LB |
243 | struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, |
244 | struct sk_buff *skb, | |
245 | struct net_device *dev); | |
89f479f0 LB |
246 | struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf, |
247 | struct net_device *dev); | |
65e6dcf7 | 248 | int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); |
e624d4ed | 249 | struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf); |
b0d1beef | 250 | |
fc379872 LB |
251 | static inline |
252 | void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) | |
253 | { | |
254 | xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame); | |
255 | xdp->data = frame->data; | |
256 | xdp->data_end = frame->data + frame->len; | |
257 | xdp->data_meta = frame->data - frame->metasize; | |
258 | xdp->frame_sz = frame->frame_sz; | |
2e88d4ff | 259 | xdp->flags = frame->flags; |
fc379872 LB |
260 | } |
261 | ||
c0048cff | 262 | static inline |
daa5cdc3 DA |
263 | int xdp_update_frame_from_buff(struct xdp_buff *xdp, |
264 | struct xdp_frame *xdp_frame) | |
c0048cff | 265 | { |
daa5cdc3 | 266 | int metasize, headroom; |
02b55e56 | 267 | |
c0048cff JDB |
268 | /* Assure headroom is available for storing info */ |
269 | headroom = xdp->data - xdp->data_hard_start; | |
270 | metasize = xdp->data - xdp->data_meta; | |
271 | metasize = metasize > 0 ? metasize : 0; | |
272 | if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) | |
daa5cdc3 | 273 | return -ENOSPC; |
c0048cff | 274 | |
34cc0b33 JDB |
275 | /* Catch if driver didn't reserve tailroom for skb_shared_info */ |
276 | if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { | |
277 | XDP_WARN("Driver BUG: missing reserved tailroom"); | |
daa5cdc3 | 278 | return -ENOSPC; |
34cc0b33 JDB |
279 | } |
280 | ||
c0048cff JDB |
281 | xdp_frame->data = xdp->data; |
282 | xdp_frame->len = xdp->data_end - xdp->data; | |
283 | xdp_frame->headroom = headroom - sizeof(*xdp_frame); | |
284 | xdp_frame->metasize = metasize; | |
34cc0b33 | 285 | xdp_frame->frame_sz = xdp->frame_sz; |
2e88d4ff | 286 | xdp_frame->flags = xdp->flags; |
c0048cff | 287 | |
daa5cdc3 DA |
288 | return 0; |
289 | } | |
290 | ||
291 | /* Convert xdp_buff to xdp_frame */ | |
292 | static inline | |
293 | struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) | |
294 | { | |
295 | struct xdp_frame *xdp_frame; | |
296 | ||
297 | if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) | |
298 | return xdp_convert_zc_to_xdp_frame(xdp); | |
299 | ||
300 | /* Store info in top of packet */ | |
301 | xdp_frame = xdp->data_hard_start; | |
302 | if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) | |
303 | return NULL; | |
304 | ||
c0048cff JDB |
305 | /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ |
306 | xdp_frame->mem = xdp->rxq->mem; | |
307 | ||
308 | return xdp_frame; | |
309 | } | |
310 | ||
bf25146a EC |
311 | void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, |
312 | struct xdp_buff *xdp); | |
03993094 | 313 | void xdp_return_frame(struct xdp_frame *xdpf); |
389ab7f0 | 314 | void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); |
c497176c | 315 | void xdp_return_buff(struct xdp_buff *xdp); |
89653987 LB |
316 | void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); |
317 | void xdp_return_frame_bulk(struct xdp_frame *xdpf, | |
318 | struct xdp_frame_bulk *bq); | |
5ab073ff | 319 | |
6bf071bf JDB |
320 | /* When sending xdp_frame into the network stack, then there is no |
321 | * return point callback, which is needed to release e.g. DMA-mapping | |
322 | * resources with page_pool. Thus, have explicit function to release | |
323 | * frame resources. | |
324 | */ | |
325 | void __xdp_release_frame(void *data, struct xdp_mem_info *mem); | |
326 | static inline void xdp_release_frame(struct xdp_frame *xdpf) | |
327 | { | |
328 | struct xdp_mem_info *mem = &xdpf->mem; | |
7c48cb01 LB |
329 | struct skb_shared_info *sinfo; |
330 | int i; | |
6bf071bf JDB |
331 | |
332 | /* Curr only page_pool needs this */ | |
7c48cb01 LB |
333 | if (mem->type != MEM_TYPE_PAGE_POOL) |
334 | return; | |
335 | ||
336 | if (likely(!xdp_frame_has_frags(xdpf))) | |
337 | goto out; | |
338 | ||
339 | sinfo = xdp_get_shared_info_from_frame(xdpf); | |
340 | for (i = 0; i < sinfo->nr_frags; i++) { | |
341 | struct page *page = skb_frag_page(&sinfo->frags[i]); | |
342 | ||
343 | __xdp_release_frame(page_address(page), mem); | |
344 | } | |
345 | out: | |
346 | __xdp_release_frame(xdpf->data, mem); | |
6bf071bf JDB |
347 | } |
348 | ||
5142239a LB |
349 | static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) |
350 | { | |
351 | struct skb_shared_info *sinfo; | |
352 | unsigned int len = xdpf->len; | |
353 | ||
354 | if (likely(!xdp_frame_has_frags(xdpf))) | |
355 | goto out; | |
356 | ||
357 | sinfo = xdp_get_shared_info_from_frame(xdpf); | |
358 | len += sinfo->xdp_frags_size; | |
359 | out: | |
360 | return len; | |
361 | } | |
362 | ||
bf25146a EC |
363 | int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, |
364 | struct net_device *dev, u32 queue_index, | |
365 | unsigned int napi_id, u32 frag_size); | |
366 | static inline int | |
367 | xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, | |
368 | struct net_device *dev, u32 queue_index, | |
369 | unsigned int napi_id) | |
370 | { | |
371 | return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); | |
372 | } | |
373 | ||
aecd67b6 JDB |
374 | void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); |
375 | void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); | |
c0124f32 | 376 | bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); |
5ab073ff JDB |
377 | int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, |
378 | enum xdp_mem_type type, void *allocator); | |
dce5bd61 | 379 | void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); |
4a48ef70 THJ |
380 | int xdp_reg_mem_model(struct xdp_mem_info *mem, |
381 | enum xdp_mem_type type, void *allocator); | |
382 | void xdp_unreg_mem_model(struct xdp_mem_info *mem); | |
aecd67b6 | 383 | |
106ca27f JDB |
384 | /* Drivers not supporting XDP metadata can use this helper, which |
385 | * rejects any room expansion for metadata as a result. | |
386 | */ | |
387 | static __always_inline void | |
388 | xdp_set_data_meta_invalid(struct xdp_buff *xdp) | |
389 | { | |
390 | xdp->data_meta = xdp->data + 1; | |
391 | } | |
392 | ||
393 | static __always_inline bool | |
394 | xdp_data_meta_unsupported(const struct xdp_buff *xdp) | |
395 | { | |
396 | return unlikely(xdp->data_meta > xdp->data); | |
397 | } | |
398 | ||
7445cf31 ZE |
399 | static inline bool xdp_metalen_invalid(unsigned long metalen) |
400 | { | |
401 | return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); | |
402 | } | |
403 | ||
05296620 JK |
404 | struct xdp_attachment_info { |
405 | struct bpf_prog *prog; | |
406 | u32 flags; | |
407 | }; | |
408 | ||
409 | struct netdev_bpf; | |
05296620 JK |
410 | void xdp_attachment_setup(struct xdp_attachment_info *info, |
411 | struct netdev_bpf *bpf); | |
412 | ||
89653987 | 413 | #define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE |
788f87ac | 414 | |
3d76a4d3 SF |
415 | #define XDP_METADATA_KFUNC_xxx \ |
416 | XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \ | |
417 | bpf_xdp_metadata_rx_timestamp) \ | |
418 | XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ | |
419 | bpf_xdp_metadata_rx_hash) \ | |
420 | ||
421 | enum { | |
422 | #define XDP_METADATA_KFUNC(name, _) name, | |
423 | XDP_METADATA_KFUNC_xxx | |
424 | #undef XDP_METADATA_KFUNC | |
425 | MAX_XDP_METADATA_KFUNC, | |
426 | }; | |
427 | ||
428 | #ifdef CONFIG_NET | |
429 | u32 bpf_xdp_metadata_kfunc_id(int id); | |
430 | bool bpf_dev_bound_kfunc_id(u32 btf_id); | |
431 | #else | |
432 | static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; } | |
433 | static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; } | |
434 | #endif | |
435 | ||
aecd67b6 | 436 | #endif /* __LINUX_NET_XDP_H__ */ |