Commit | Line | Data |
---|---|---|
c0c77d8f BT |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* XDP user-space packet buffer | |
3 | * Copyright(c) 2018 Intel Corporation. | |
c0c77d8f BT |
4 | */ |
5 | ||
6 | #include <linux/init.h> | |
7 | #include <linux/sched/mm.h> | |
8 | #include <linux/sched/signal.h> | |
9 | #include <linux/sched/task.h> | |
10 | #include <linux/uaccess.h> | |
11 | #include <linux/slab.h> | |
12 | #include <linux/bpf.h> | |
13 | #include <linux/mm.h> | |
84c6b868 JK |
14 | #include <linux/netdevice.h> |
15 | #include <linux/rtnetlink.h> | |
50e74c01 | 16 | #include <linux/idr.h> |
624676e7 | 17 | #include <linux/vmalloc.h> |
c0c77d8f BT |
18 | |
19 | #include "xdp_umem.h" | |
e61e62b9 | 20 | #include "xsk_queue.h" |
c0c77d8f | 21 | |
50e74c01 BT |
22 | static DEFINE_IDA(umem_ida); |
23 | ||
1c1efc2a | 24 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) |
c9b47cc1 | 25 | { |
1c1efc2a | 26 | unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); |
84c6b868 | 27 | |
a720a2a0 | 28 | kvfree(umem->pgs); |
1c1efc2a | 29 | umem->pgs = NULL; |
c9b47cc1 | 30 | } |
84c6b868 | 31 | |
1c1efc2a | 32 | static void xdp_umem_unaccount_pages(struct xdp_umem *umem) |
c9b47cc1 | 33 | { |
1c1efc2a MK |
34 | if (umem->user) { |
35 | atomic_long_sub(umem->npgs, &umem->user->locked_vm); | |
36 | free_uid(umem->user); | |
37 | } | |
84c6b868 JK |
38 | } |
39 | ||
7f7ffa4e MK |
40 | static void xdp_umem_addr_unmap(struct xdp_umem *umem) |
41 | { | |
42 | vunmap(umem->addrs); | |
43 | umem->addrs = NULL; | |
44 | } | |
45 | ||
46 | static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, | |
47 | u32 nr_pages) | |
48 | { | |
49 | umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); | |
50 | if (!umem->addrs) | |
51 | return -ENOMEM; | |
52 | return 0; | |
53 | } | |
54 | ||
c0c77d8f BT |
55 | static void xdp_umem_release(struct xdp_umem *umem) |
56 | { | |
c2d3d6a4 | 57 | umem->zc = false; |
21f1481a | 58 | ida_free(&umem_ida, umem->id); |
50e74c01 | 59 | |
7f7ffa4e | 60 | xdp_umem_addr_unmap(umem); |
a49049ea | 61 | xdp_umem_unpin_pages(umem); |
c0c77d8f | 62 | |
c0c77d8f | 63 | xdp_umem_unaccount_pages(umem); |
c0c77d8f BT |
64 | kfree(umem); |
65 | } | |
66 | ||
537cf4e3 MK |
67 | static void xdp_umem_release_deferred(struct work_struct *work) |
68 | { | |
69 | struct xdp_umem *umem = container_of(work, struct xdp_umem, work); | |
70 | ||
71 | xdp_umem_release(umem); | |
72 | } | |
73 | ||
c0c77d8f BT |
74 | void xdp_get_umem(struct xdp_umem *umem) |
75 | { | |
d3b42f14 | 76 | refcount_inc(&umem->users); |
c0c77d8f BT |
77 | } |
78 | ||
537cf4e3 | 79 | void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) |
c0c77d8f BT |
80 | { |
81 | if (!umem) | |
82 | return; | |
83 | ||
537cf4e3 MK |
84 | if (refcount_dec_and_test(&umem->users)) { |
85 | if (defer_cleanup) { | |
86 | INIT_WORK(&umem->work, xdp_umem_release_deferred); | |
87 | schedule_work(&umem->work); | |
88 | } else { | |
89 | xdp_umem_release(umem); | |
90 | } | |
91 | } | |
c0c77d8f BT |
92 | } |
93 | ||
07bf2d97 | 94 | static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) |
c0c77d8f BT |
95 | { |
96 | unsigned int gup_flags = FOLL_WRITE; | |
97 | long npgs; | |
98 | int err; | |
99 | ||
a720a2a0 | 100 | umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); |
c0c77d8f BT |
101 | if (!umem->pgs) |
102 | return -ENOMEM; | |
103 | ||
d8ed45c5 | 104 | mmap_read_lock(current->mm); |
07bf2d97 | 105 | npgs = pin_user_pages(address, umem->npgs, |
932f4a63 | 106 | gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); |
d8ed45c5 | 107 | mmap_read_unlock(current->mm); |
c0c77d8f BT |
108 | |
109 | if (npgs != umem->npgs) { | |
110 | if (npgs >= 0) { | |
111 | umem->npgs = npgs; | |
112 | err = -ENOMEM; | |
113 | goto out_pin; | |
114 | } | |
115 | err = npgs; | |
116 | goto out_pgs; | |
117 | } | |
118 | return 0; | |
119 | ||
120 | out_pin: | |
121 | xdp_umem_unpin_pages(umem); | |
122 | out_pgs: | |
a720a2a0 | 123 | kvfree(umem->pgs); |
c0c77d8f BT |
124 | umem->pgs = NULL; |
125 | return err; | |
126 | } | |
127 | ||
128 | static int xdp_umem_account_pages(struct xdp_umem *umem) | |
129 | { | |
130 | unsigned long lock_limit, new_npgs, old_npgs; | |
131 | ||
132 | if (capable(CAP_IPC_LOCK)) | |
133 | return 0; | |
134 | ||
135 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
136 | umem->user = get_uid(current_user()); | |
137 | ||
138 | do { | |
139 | old_npgs = atomic_long_read(&umem->user->locked_vm); | |
140 | new_npgs = old_npgs + umem->npgs; | |
141 | if (new_npgs > lock_limit) { | |
142 | free_uid(umem->user); | |
143 | umem->user = NULL; | |
144 | return -ENOBUFS; | |
145 | } | |
146 | } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, | |
147 | new_npgs) != old_npgs); | |
148 | return 0; | |
149 | } | |
150 | ||
a49049ea | 151 | static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) |
c0c77d8f | 152 | { |
2b1667e5 | 153 | u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; |
c05cd364 | 154 | bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; |
b16a87d0 | 155 | u64 npgs, addr = mr->addr, size = mr->len; |
2b1667e5 | 156 | unsigned int chunks, chunks_rem; |
99e3a236 | 157 | int err; |
c0c77d8f | 158 | |
bbff2f32 | 159 | if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { |
c0c77d8f BT |
160 | /* Strictly speaking we could support this, if: |
161 | * - huge pages, or* | |
162 | * - using an IOMMU, or | |
163 | * - making sure the memory area is consecutive | |
164 | * but for now, we simply say "computer says no". | |
165 | */ | |
166 | return -EINVAL; | |
167 | } | |
168 | ||
c2d3d6a4 | 169 | if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) |
c05cd364 KL |
170 | return -EINVAL; |
171 | ||
172 | if (!unaligned_chunks && !is_power_of_2(chunk_size)) | |
c0c77d8f BT |
173 | return -EINVAL; |
174 | ||
175 | if (!PAGE_ALIGNED(addr)) { | |
176 | /* Memory area has to be page size aligned. For | |
177 | * simplicity, this might change. | |
178 | */ | |
179 | return -EINVAL; | |
180 | } | |
181 | ||
182 | if ((addr + size) < addr) | |
183 | return -EINVAL; | |
184 | ||
2b1667e5 BT |
185 | npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); |
186 | if (npgs_rem) | |
187 | npgs++; | |
b16a87d0 BT |
188 | if (npgs > U32_MAX) |
189 | return -EINVAL; | |
190 | ||
2b1667e5 | 191 | chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); |
bbff2f32 | 192 | if (chunks == 0) |
c0c77d8f BT |
193 | return -EINVAL; |
194 | ||
2b1667e5 BT |
195 | if (!unaligned_chunks && chunks_rem) |
196 | return -EINVAL; | |
c0c77d8f | 197 | |
99e3a236 | 198 | if (headroom >= chunk_size - XDP_PACKET_HEADROOM) |
c0c77d8f BT |
199 | return -EINVAL; |
200 | ||
93ee30f3 | 201 | umem->size = size; |
bbff2f32 | 202 | umem->headroom = headroom; |
2b43470a | 203 | umem->chunk_size = chunk_size; |
1c1efc2a | 204 | umem->chunks = chunks; |
b16a87d0 | 205 | umem->npgs = (u32)npgs; |
c0c77d8f BT |
206 | umem->pgs = NULL; |
207 | umem->user = NULL; | |
c05cd364 | 208 | umem->flags = mr->flags; |
c0c77d8f | 209 | |
921b6869 | 210 | INIT_LIST_HEAD(&umem->xsk_dma_list); |
d3b42f14 | 211 | refcount_set(&umem->users, 1); |
c0c77d8f BT |
212 | |
213 | err = xdp_umem_account_pages(umem); | |
214 | if (err) | |
044175a0 | 215 | return err; |
c0c77d8f | 216 | |
07bf2d97 | 217 | err = xdp_umem_pin_pages(umem, (unsigned long)addr); |
c0c77d8f BT |
218 | if (err) |
219 | goto out_account; | |
8aef7340 | 220 | |
7f7ffa4e MK |
221 | err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); |
222 | if (err) | |
223 | goto out_unpin; | |
224 | ||
2b43470a | 225 | return 0; |
c0c77d8f | 226 | |
7f7ffa4e MK |
227 | out_unpin: |
228 | xdp_umem_unpin_pages(umem); | |
c0c77d8f BT |
229 | out_account: |
230 | xdp_umem_unaccount_pages(umem); | |
c0c77d8f BT |
231 | return err; |
232 | } | |
965a9909 | 233 | |
a49049ea BT |
234 | struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) |
235 | { | |
236 | struct xdp_umem *umem; | |
237 | int err; | |
238 | ||
239 | umem = kzalloc(sizeof(*umem), GFP_KERNEL); | |
240 | if (!umem) | |
241 | return ERR_PTR(-ENOMEM); | |
242 | ||
21f1481a | 243 | err = ida_alloc(&umem_ida, GFP_KERNEL); |
50e74c01 BT |
244 | if (err < 0) { |
245 | kfree(umem); | |
246 | return ERR_PTR(err); | |
247 | } | |
248 | umem->id = err; | |
249 | ||
a49049ea BT |
250 | err = xdp_umem_reg(umem, mr); |
251 | if (err) { | |
21f1481a | 252 | ida_free(&umem_ida, umem->id); |
a49049ea BT |
253 | kfree(umem); |
254 | return ERR_PTR(err); | |
255 | } | |
256 | ||
257 | return umem; | |
258 | } |