mei: hbm: clean the feature flags on link reset
[linux-2.6-block.git] / net / xdp / xdp_umem.c
CommitLineData
c0c77d8f
BT
1// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
c0c77d8f
BT
4 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>
9#include <linux/sched/task.h>
10#include <linux/uaccess.h>
11#include <linux/slab.h>
12#include <linux/bpf.h>
13#include <linux/mm.h>
84c6b868
JK
14#include <linux/netdevice.h>
15#include <linux/rtnetlink.h>
c0c77d8f
BT
16
17#include "xdp_umem.h"
e61e62b9 18#include "xsk_queue.h"
c0c77d8f 19
bbff2f32 20#define XDP_UMEM_MIN_CHUNK_SIZE 2048
c0c77d8f 21
ac98d8aa
MK
22void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&umem->xsk_list_lock, flags);
27 list_add_rcu(&xs->list, &umem->xsk_list);
28 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
29}
30
31void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
32{
33 unsigned long flags;
34
541d7fdd
BT
35 spin_lock_irqsave(&umem->xsk_list_lock, flags);
36 list_del_rcu(&xs->list);
37 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
ac98d8aa
MK
38}
39
c9b47cc1
MK
40/* The umem is stored both in the _rx struct and the _tx struct as we do
41 * not know if the device has more tx queues than rx, or the opposite.
42 * This might also change during run time.
43 */
cc5b5d35
KK
44static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
45 u16 queue_id)
84c6b868 46{
cc5b5d35
KK
47 if (queue_id >= max_t(unsigned int,
48 dev->real_num_rx_queues,
49 dev->real_num_tx_queues))
50 return -EINVAL;
51
c9b47cc1
MK
52 if (queue_id < dev->real_num_rx_queues)
53 dev->_rx[queue_id].umem = umem;
54 if (queue_id < dev->real_num_tx_queues)
55 dev->_tx[queue_id].umem = umem;
cc5b5d35
KK
56
57 return 0;
c9b47cc1 58}
84c6b868 59
1661d346
JK
60struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
61 u16 queue_id)
c9b47cc1
MK
62{
63 if (queue_id < dev->real_num_rx_queues)
64 return dev->_rx[queue_id].umem;
65 if (queue_id < dev->real_num_tx_queues)
66 return dev->_tx[queue_id].umem;
84c6b868 67
c9b47cc1
MK
68 return NULL;
69}
84c6b868 70
c9b47cc1
MK
71static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
72{
a41b4f3c 73 if (queue_id < dev->real_num_rx_queues)
c9b47cc1 74 dev->_rx[queue_id].umem = NULL;
a41b4f3c 75 if (queue_id < dev->real_num_tx_queues)
c9b47cc1 76 dev->_tx[queue_id].umem = NULL;
84c6b868
JK
77}
78
173d3adb 79int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
c9b47cc1 80 u16 queue_id, u16 flags)
173d3adb
BT
81{
82 bool force_zc, force_copy;
83 struct netdev_bpf bpf;
c9b47cc1 84 int err = 0;
173d3adb
BT
85
86 force_zc = flags & XDP_ZEROCOPY;
87 force_copy = flags & XDP_COPY;
88
89 if (force_zc && force_copy)
90 return -EINVAL;
91
c9b47cc1
MK
92 rtnl_lock();
93 if (xdp_get_umem_from_qid(dev, queue_id)) {
94 err = -EBUSY;
95 goto out_rtnl_unlock;
96 }
173d3adb 97
cc5b5d35
KK
98 err = xdp_reg_umem_at_qid(dev, umem, queue_id);
99 if (err)
100 goto out_rtnl_unlock;
101
c9b47cc1
MK
102 umem->dev = dev;
103 umem->queue_id = queue_id;
104 if (force_copy)
105 /* For copy-mode, we are done. */
106 goto out_rtnl_unlock;
173d3adb 107
c9b47cc1
MK
108 if (!dev->netdev_ops->ndo_bpf ||
109 !dev->netdev_ops->ndo_xsk_async_xmit) {
110 err = -EOPNOTSUPP;
111 goto err_unreg_umem;
84c6b868 112 }
173d3adb 113
f734607e
JK
114 bpf.command = XDP_SETUP_XSK_UMEM;
115 bpf.xsk.umem = umem;
116 bpf.xsk.queue_id = queue_id;
173d3adb 117
f734607e 118 err = dev->netdev_ops->ndo_bpf(dev, &bpf);
f734607e 119 if (err)
c9b47cc1 120 goto err_unreg_umem;
84c6b868 121 rtnl_unlock();
173d3adb 122
f734607e 123 dev_hold(dev);
f734607e
JK
124 umem->zc = true;
125 return 0;
84c6b868 126
c9b47cc1
MK
127err_unreg_umem:
128 xdp_clear_umem_at_qid(dev, queue_id);
129 if (!force_zc)
130 err = 0; /* fallback to copy mode */
131out_rtnl_unlock:
84c6b868 132 rtnl_unlock();
c9b47cc1 133 return err;
173d3adb
BT
134}
135
ac98d8aa 136static void xdp_umem_clear_dev(struct xdp_umem *umem)
173d3adb
BT
137{
138 struct netdev_bpf bpf;
139 int err;
140
c9b47cc1 141 if (umem->zc) {
173d3adb
BT
142 bpf.command = XDP_SETUP_XSK_UMEM;
143 bpf.xsk.umem = NULL;
144 bpf.xsk.queue_id = umem->queue_id;
145
146 rtnl_lock();
147 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
148 rtnl_unlock();
149
150 if (err)
151 WARN(1, "failed to disable umem!\n");
c9b47cc1
MK
152 }
153
154 if (umem->dev) {
155 rtnl_lock();
156 xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
157 rtnl_unlock();
158 }
173d3adb 159
c9b47cc1 160 if (umem->zc) {
173d3adb 161 dev_put(umem->dev);
c9b47cc1 162 umem->zc = false;
173d3adb
BT
163 }
164}
165
c0c77d8f
BT
166static void xdp_umem_unpin_pages(struct xdp_umem *umem)
167{
168 unsigned int i;
169
a49049ea
BT
170 for (i = 0; i < umem->npgs; i++) {
171 struct page *page = umem->pgs[i];
c0c77d8f 172
a49049ea
BT
173 set_page_dirty_lock(page);
174 put_page(page);
c0c77d8f 175 }
a49049ea
BT
176
177 kfree(umem->pgs);
178 umem->pgs = NULL;
c0c77d8f
BT
179}
180
181static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
182{
c09290c5
DB
183 if (umem->user) {
184 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
185 free_uid(umem->user);
186 }
c0c77d8f
BT
187}
188
189static void xdp_umem_release(struct xdp_umem *umem)
190{
191 struct task_struct *task;
192 struct mm_struct *mm;
193
173d3adb
BT
194 xdp_umem_clear_dev(umem);
195
423f3832
MK
196 if (umem->fq) {
197 xskq_destroy(umem->fq);
198 umem->fq = NULL;
199 }
200
fe230832
MK
201 if (umem->cq) {
202 xskq_destroy(umem->cq);
203 umem->cq = NULL;
204 }
205
f5bd9138
JK
206 xsk_reuseq_destroy(umem);
207
a49049ea 208 xdp_umem_unpin_pages(umem);
c0c77d8f 209
a49049ea
BT
210 task = get_pid_task(umem->pid, PIDTYPE_PID);
211 put_pid(umem->pid);
212 if (!task)
213 goto out;
214 mm = get_task_mm(task);
215 put_task_struct(task);
216 if (!mm)
217 goto out;
c0c77d8f 218
a49049ea 219 mmput(mm);
8aef7340
BT
220 kfree(umem->pages);
221 umem->pages = NULL;
222
c0c77d8f
BT
223 xdp_umem_unaccount_pages(umem);
224out:
225 kfree(umem);
226}
227
228static void xdp_umem_release_deferred(struct work_struct *work)
229{
230 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
231
232 xdp_umem_release(umem);
233}
234
235void xdp_get_umem(struct xdp_umem *umem)
236{
d3b42f14 237 refcount_inc(&umem->users);
c0c77d8f
BT
238}
239
240void xdp_put_umem(struct xdp_umem *umem)
241{
242 if (!umem)
243 return;
244
d3b42f14 245 if (refcount_dec_and_test(&umem->users)) {
c0c77d8f
BT
246 INIT_WORK(&umem->work, xdp_umem_release_deferred);
247 schedule_work(&umem->work);
248 }
249}
250
251static int xdp_umem_pin_pages(struct xdp_umem *umem)
252{
253 unsigned int gup_flags = FOLL_WRITE;
254 long npgs;
255 int err;
256
a343993c
BT
257 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
258 GFP_KERNEL | __GFP_NOWARN);
c0c77d8f
BT
259 if (!umem->pgs)
260 return -ENOMEM;
261
262 down_write(&current->mm->mmap_sem);
263 npgs = get_user_pages(umem->address, umem->npgs,
264 gup_flags, &umem->pgs[0], NULL);
265 up_write(&current->mm->mmap_sem);
266
267 if (npgs != umem->npgs) {
268 if (npgs >= 0) {
269 umem->npgs = npgs;
270 err = -ENOMEM;
271 goto out_pin;
272 }
273 err = npgs;
274 goto out_pgs;
275 }
276 return 0;
277
278out_pin:
279 xdp_umem_unpin_pages(umem);
280out_pgs:
281 kfree(umem->pgs);
282 umem->pgs = NULL;
283 return err;
284}
285
286static int xdp_umem_account_pages(struct xdp_umem *umem)
287{
288 unsigned long lock_limit, new_npgs, old_npgs;
289
290 if (capable(CAP_IPC_LOCK))
291 return 0;
292
293 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
294 umem->user = get_uid(current_user());
295
296 do {
297 old_npgs = atomic_long_read(&umem->user->locked_vm);
298 new_npgs = old_npgs + umem->npgs;
299 if (new_npgs > lock_limit) {
300 free_uid(umem->user);
301 umem->user = NULL;
302 return -ENOBUFS;
303 }
304 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
305 new_npgs) != old_npgs);
306 return 0;
307}
308
a49049ea 309static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
c0c77d8f 310{
bbff2f32
BT
311 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
312 unsigned int chunks, chunks_per_page;
c0c77d8f 313 u64 addr = mr->addr, size = mr->len;
8aef7340 314 int size_chk, err, i;
c0c77d8f 315
bbff2f32 316 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
c0c77d8f
BT
317 /* Strictly speaking we could support this, if:
318 * - huge pages, or*
319 * - using an IOMMU, or
320 * - making sure the memory area is consecutive
321 * but for now, we simply say "computer says no".
322 */
323 return -EINVAL;
324 }
325
bbff2f32 326 if (!is_power_of_2(chunk_size))
c0c77d8f
BT
327 return -EINVAL;
328
329 if (!PAGE_ALIGNED(addr)) {
330 /* Memory area has to be page size aligned. For
331 * simplicity, this might change.
332 */
333 return -EINVAL;
334 }
335
336 if ((addr + size) < addr)
337 return -EINVAL;
338
bbff2f32
BT
339 chunks = (unsigned int)div_u64(size, chunk_size);
340 if (chunks == 0)
c0c77d8f
BT
341 return -EINVAL;
342
bbff2f32
BT
343 chunks_per_page = PAGE_SIZE / chunk_size;
344 if (chunks < chunks_per_page || chunks % chunks_per_page)
c0c77d8f
BT
345 return -EINVAL;
346
bbff2f32 347 headroom = ALIGN(headroom, 64);
c0c77d8f 348
bbff2f32 349 size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
c0c77d8f
BT
350 if (size_chk < 0)
351 return -EINVAL;
352
353 umem->pid = get_task_pid(current, PIDTYPE_PID);
c0c77d8f 354 umem->address = (unsigned long)addr;
93ee30f3
MK
355 umem->chunk_mask = ~((u64)chunk_size - 1);
356 umem->size = size;
bbff2f32
BT
357 umem->headroom = headroom;
358 umem->chunk_size_nohr = chunk_size - headroom;
c0c77d8f
BT
359 umem->npgs = size / PAGE_SIZE;
360 umem->pgs = NULL;
361 umem->user = NULL;
ac98d8aa
MK
362 INIT_LIST_HEAD(&umem->xsk_list);
363 spin_lock_init(&umem->xsk_list_lock);
c0c77d8f 364
d3b42f14 365 refcount_set(&umem->users, 1);
c0c77d8f
BT
366
367 err = xdp_umem_account_pages(umem);
368 if (err)
369 goto out;
370
371 err = xdp_umem_pin_pages(umem);
372 if (err)
373 goto out_account;
8aef7340
BT
374
375 umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
376 if (!umem->pages) {
377 err = -ENOMEM;
378 goto out_account;
379 }
380
381 for (i = 0; i < umem->npgs; i++)
382 umem->pages[i].addr = page_address(umem->pgs[i]);
383
c0c77d8f
BT
384 return 0;
385
386out_account:
387 xdp_umem_unaccount_pages(umem);
388out:
389 put_pid(umem->pid);
390 return err;
391}
965a9909 392
a49049ea
BT
393struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
394{
395 struct xdp_umem *umem;
396 int err;
397
398 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
399 if (!umem)
400 return ERR_PTR(-ENOMEM);
401
402 err = xdp_umem_reg(umem, mr);
403 if (err) {
404 kfree(umem);
405 return ERR_PTR(err);
406 }
407
408 return umem;
409}
410
965a9909
MK
411bool xdp_umem_validate_queues(struct xdp_umem *umem)
412{
da60cf00 413 return umem->fq && umem->cq;
965a9909 414}