Commit | Line | Data |
---|---|---|
c0c77d8f BT |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* XDP user-space packet buffer | |
3 | * Copyright(c) 2018 Intel Corporation. | |
c0c77d8f BT |
4 | */ |
5 | ||
6 | #include <linux/init.h> | |
7 | #include <linux/sched/mm.h> | |
8 | #include <linux/sched/signal.h> | |
9 | #include <linux/sched/task.h> | |
10 | #include <linux/uaccess.h> | |
11 | #include <linux/slab.h> | |
12 | #include <linux/bpf.h> | |
13 | #include <linux/mm.h> | |
84c6b868 JK |
14 | #include <linux/netdevice.h> |
15 | #include <linux/rtnetlink.h> | |
50e74c01 | 16 | #include <linux/idr.h> |
624676e7 | 17 | #include <linux/vmalloc.h> |
c0c77d8f BT |
18 | |
19 | #include "xdp_umem.h" | |
e61e62b9 | 20 | #include "xsk_queue.h" |
c0c77d8f | 21 | |
50e74c01 BT |
22 | static DEFINE_IDA(umem_ida); |
23 | ||
1c1efc2a | 24 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) |
c9b47cc1 | 25 | { |
1c1efc2a | 26 | unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); |
84c6b868 | 27 | |
a720a2a0 | 28 | kvfree(umem->pgs); |
1c1efc2a | 29 | umem->pgs = NULL; |
c9b47cc1 | 30 | } |
84c6b868 | 31 | |
1c1efc2a | 32 | static void xdp_umem_unaccount_pages(struct xdp_umem *umem) |
c9b47cc1 | 33 | { |
1c1efc2a MK |
34 | if (umem->user) { |
35 | atomic_long_sub(umem->npgs, &umem->user->locked_vm); | |
36 | free_uid(umem->user); | |
37 | } | |
84c6b868 JK |
38 | } |
39 | ||
7f7ffa4e MK |
40 | static void xdp_umem_addr_unmap(struct xdp_umem *umem) |
41 | { | |
42 | vunmap(umem->addrs); | |
43 | umem->addrs = NULL; | |
44 | } | |
45 | ||
46 | static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, | |
47 | u32 nr_pages) | |
48 | { | |
49 | umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); | |
50 | if (!umem->addrs) | |
51 | return -ENOMEM; | |
52 | return 0; | |
53 | } | |
54 | ||
c0c77d8f BT |
55 | static void xdp_umem_release(struct xdp_umem *umem) |
56 | { | |
c2d3d6a4 | 57 | umem->zc = false; |
21f1481a | 58 | ida_free(&umem_ida, umem->id); |
50e74c01 | 59 | |
7f7ffa4e | 60 | xdp_umem_addr_unmap(umem); |
a49049ea | 61 | xdp_umem_unpin_pages(umem); |
c0c77d8f | 62 | |
c0c77d8f | 63 | xdp_umem_unaccount_pages(umem); |
c0c77d8f BT |
64 | kfree(umem); |
65 | } | |
66 | ||
537cf4e3 MK |
67 | static void xdp_umem_release_deferred(struct work_struct *work) |
68 | { | |
69 | struct xdp_umem *umem = container_of(work, struct xdp_umem, work); | |
70 | ||
71 | xdp_umem_release(umem); | |
72 | } | |
73 | ||
c0c77d8f BT |
74 | void xdp_get_umem(struct xdp_umem *umem) |
75 | { | |
d3b42f14 | 76 | refcount_inc(&umem->users); |
c0c77d8f BT |
77 | } |
78 | ||
537cf4e3 | 79 | void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) |
c0c77d8f BT |
80 | { |
81 | if (!umem) | |
82 | return; | |
83 | ||
537cf4e3 MK |
84 | if (refcount_dec_and_test(&umem->users)) { |
85 | if (defer_cleanup) { | |
86 | INIT_WORK(&umem->work, xdp_umem_release_deferred); | |
87 | schedule_work(&umem->work); | |
88 | } else { | |
89 | xdp_umem_release(umem); | |
90 | } | |
91 | } | |
c0c77d8f BT |
92 | } |
93 | ||
07bf2d97 | 94 | static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) |
c0c77d8f BT |
95 | { |
96 | unsigned int gup_flags = FOLL_WRITE; | |
97 | long npgs; | |
98 | int err; | |
99 | ||
a720a2a0 | 100 | umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); |
c0c77d8f BT |
101 | if (!umem->pgs) |
102 | return -ENOMEM; | |
103 | ||
d8ed45c5 | 104 | mmap_read_lock(current->mm); |
07bf2d97 | 105 | npgs = pin_user_pages(address, umem->npgs, |
4c630f30 | 106 | gup_flags | FOLL_LONGTERM, &umem->pgs[0]); |
d8ed45c5 | 107 | mmap_read_unlock(current->mm); |
c0c77d8f BT |
108 | |
109 | if (npgs != umem->npgs) { | |
110 | if (npgs >= 0) { | |
111 | umem->npgs = npgs; | |
112 | err = -ENOMEM; | |
113 | goto out_pin; | |
114 | } | |
115 | err = npgs; | |
116 | goto out_pgs; | |
117 | } | |
118 | return 0; | |
119 | ||
120 | out_pin: | |
121 | xdp_umem_unpin_pages(umem); | |
122 | out_pgs: | |
a720a2a0 | 123 | kvfree(umem->pgs); |
c0c77d8f BT |
124 | umem->pgs = NULL; |
125 | return err; | |
126 | } | |
127 | ||
128 | static int xdp_umem_account_pages(struct xdp_umem *umem) | |
129 | { | |
130 | unsigned long lock_limit, new_npgs, old_npgs; | |
131 | ||
132 | if (capable(CAP_IPC_LOCK)) | |
133 | return 0; | |
134 | ||
135 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
136 | umem->user = get_uid(current_user()); | |
137 | ||
138 | do { | |
139 | old_npgs = atomic_long_read(&umem->user->locked_vm); | |
140 | new_npgs = old_npgs + umem->npgs; | |
141 | if (new_npgs > lock_limit) { | |
142 | free_uid(umem->user); | |
143 | umem->user = NULL; | |
144 | return -ENOBUFS; | |
145 | } | |
146 | } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, | |
147 | new_npgs) != old_npgs); | |
148 | return 0; | |
149 | } | |
150 | ||
a49049ea | 151 | static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) |
c0c77d8f | 152 | { |
c05cd364 | 153 | bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; |
c7df4813 KC |
154 | u32 chunk_size = mr->chunk_size, headroom = mr->headroom; |
155 | u64 addr = mr->addr, size = mr->len; | |
156 | u32 chunks_rem, npgs_rem; | |
157 | u64 chunks, npgs; | |
99e3a236 | 158 | int err; |
c0c77d8f | 159 | |
bbff2f32 | 160 | if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { |
c0c77d8f BT |
161 | /* Strictly speaking we could support this, if: |
162 | * - huge pages, or* | |
163 | * - using an IOMMU, or | |
164 | * - making sure the memory area is consecutive | |
165 | * but for now, we simply say "computer says no". | |
166 | */ | |
167 | return -EINVAL; | |
168 | } | |
169 | ||
c2d3d6a4 | 170 | if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) |
c05cd364 KL |
171 | return -EINVAL; |
172 | ||
173 | if (!unaligned_chunks && !is_power_of_2(chunk_size)) | |
c0c77d8f BT |
174 | return -EINVAL; |
175 | ||
176 | if (!PAGE_ALIGNED(addr)) { | |
177 | /* Memory area has to be page size aligned. For | |
178 | * simplicity, this might change. | |
179 | */ | |
180 | return -EINVAL; | |
181 | } | |
182 | ||
183 | if ((addr + size) < addr) | |
184 | return -EINVAL; | |
185 | ||
2b1667e5 BT |
186 | npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); |
187 | if (npgs_rem) | |
188 | npgs++; | |
b16a87d0 BT |
189 | if (npgs > U32_MAX) |
190 | return -EINVAL; | |
191 | ||
c7df4813 KC |
192 | chunks = div_u64_rem(size, chunk_size, &chunks_rem); |
193 | if (!chunks || chunks > U32_MAX) | |
c0c77d8f BT |
194 | return -EINVAL; |
195 | ||
2b1667e5 BT |
196 | if (!unaligned_chunks && chunks_rem) |
197 | return -EINVAL; | |
c0c77d8f | 198 | |
99e3a236 | 199 | if (headroom >= chunk_size - XDP_PACKET_HEADROOM) |
c0c77d8f BT |
200 | return -EINVAL; |
201 | ||
93ee30f3 | 202 | umem->size = size; |
bbff2f32 | 203 | umem->headroom = headroom; |
2b43470a | 204 | umem->chunk_size = chunk_size; |
1c1efc2a | 205 | umem->chunks = chunks; |
c7df4813 | 206 | umem->npgs = npgs; |
c0c77d8f BT |
207 | umem->pgs = NULL; |
208 | umem->user = NULL; | |
c05cd364 | 209 | umem->flags = mr->flags; |
c0c77d8f | 210 | |
921b6869 | 211 | INIT_LIST_HEAD(&umem->xsk_dma_list); |
d3b42f14 | 212 | refcount_set(&umem->users, 1); |
c0c77d8f BT |
213 | |
214 | err = xdp_umem_account_pages(umem); | |
215 | if (err) | |
044175a0 | 216 | return err; |
c0c77d8f | 217 | |
07bf2d97 | 218 | err = xdp_umem_pin_pages(umem, (unsigned long)addr); |
c0c77d8f BT |
219 | if (err) |
220 | goto out_account; | |
8aef7340 | 221 | |
7f7ffa4e MK |
222 | err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); |
223 | if (err) | |
224 | goto out_unpin; | |
225 | ||
2b43470a | 226 | return 0; |
c0c77d8f | 227 | |
7f7ffa4e MK |
228 | out_unpin: |
229 | xdp_umem_unpin_pages(umem); | |
c0c77d8f BT |
230 | out_account: |
231 | xdp_umem_unaccount_pages(umem); | |
c0c77d8f BT |
232 | return err; |
233 | } | |
965a9909 | 234 | |
a49049ea BT |
235 | struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) |
236 | { | |
237 | struct xdp_umem *umem; | |
238 | int err; | |
239 | ||
240 | umem = kzalloc(sizeof(*umem), GFP_KERNEL); | |
241 | if (!umem) | |
242 | return ERR_PTR(-ENOMEM); | |
243 | ||
21f1481a | 244 | err = ida_alloc(&umem_ida, GFP_KERNEL); |
50e74c01 BT |
245 | if (err < 0) { |
246 | kfree(umem); | |
247 | return ERR_PTR(err); | |
248 | } | |
249 | umem->id = err; | |
250 | ||
a49049ea BT |
251 | err = xdp_umem_reg(umem, mr); |
252 | if (err) { | |
21f1481a | 253 | ida_free(&umem_ida, umem->id); |
a49049ea BT |
254 | kfree(umem); |
255 | return ERR_PTR(err); | |
256 | } | |
257 | ||
258 | return umem; | |
259 | } |