Commit | Line | Data |
---|---|---|
fec56f58 AS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2019 Facebook */ | |
3 | #include <linux/hash.h> | |
4 | #include <linux/bpf.h> | |
5 | #include <linux/filter.h> | |
b91e014f | 6 | #include <linux/ftrace.h> |
e9b4e606 | 7 | #include <linux/rbtree_latch.h> |
fec56f58 | 8 | |
be8704ff AS |
9 | /* dummy _ops. The verifier will operate on target program's ops. */ |
10 | const struct bpf_verifier_ops bpf_extension_verifier_ops = { | |
11 | }; | |
12 | const struct bpf_prog_ops bpf_extension_prog_ops = { | |
13 | }; | |
14 | ||
fec56f58 AS |
15 | /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ |
16 | #define TRAMPOLINE_HASH_BITS 10 | |
17 | #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) | |
18 | ||
19 | static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; | |
e9b4e606 | 20 | static struct latch_tree_root image_tree __cacheline_aligned; |
fec56f58 | 21 | |
e9b4e606 | 22 | /* serializes access to trampoline_table and image_tree */ |
fec56f58 AS |
23 | static DEFINE_MUTEX(trampoline_mutex); |
24 | ||
e9b4e606 | 25 | static void *bpf_jit_alloc_exec_page(void) |
98e8627e BT |
26 | { |
27 | void *image; | |
28 | ||
29 | image = bpf_jit_alloc_exec(PAGE_SIZE); | |
30 | if (!image) | |
31 | return NULL; | |
32 | ||
33 | set_vm_flush_reset_perms(image); | |
34 | /* Keep image as writeable. The alternative is to keep flipping ro/rw | |
35 | * everytime new program is attached or detached. | |
36 | */ | |
37 | set_memory_x((long)image, 1); | |
38 | return image; | |
39 | } | |
40 | ||
e9b4e606 JO |
41 | static __always_inline bool image_tree_less(struct latch_tree_node *a, |
42 | struct latch_tree_node *b) | |
43 | { | |
44 | struct bpf_image *ia = container_of(a, struct bpf_image, tnode); | |
45 | struct bpf_image *ib = container_of(b, struct bpf_image, tnode); | |
46 | ||
47 | return ia < ib; | |
48 | } | |
49 | ||
50 | static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) | |
51 | { | |
52 | void *image = container_of(n, struct bpf_image, tnode); | |
53 | ||
54 | if (addr < image) | |
55 | return -1; | |
56 | if (addr >= image + PAGE_SIZE) | |
57 | return 1; | |
58 | ||
59 | return 0; | |
60 | } | |
61 | ||
62 | static const struct latch_tree_ops image_tree_ops = { | |
63 | .less = image_tree_less, | |
64 | .comp = image_tree_comp, | |
65 | }; | |
66 | ||
67 | static void *__bpf_image_alloc(bool lock) | |
68 | { | |
69 | struct bpf_image *image; | |
70 | ||
71 | image = bpf_jit_alloc_exec_page(); | |
72 | if (!image) | |
73 | return NULL; | |
74 | ||
75 | if (lock) | |
76 | mutex_lock(&trampoline_mutex); | |
77 | latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); | |
78 | if (lock) | |
79 | mutex_unlock(&trampoline_mutex); | |
80 | return image->data; | |
81 | } | |
82 | ||
83 | void *bpf_image_alloc(void) | |
84 | { | |
85 | return __bpf_image_alloc(true); | |
86 | } | |
87 | ||
88 | bool is_bpf_image_address(unsigned long addr) | |
89 | { | |
90 | bool ret; | |
91 | ||
92 | rcu_read_lock(); | |
93 | ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; | |
94 | rcu_read_unlock(); | |
95 | ||
96 | return ret; | |
97 | } | |
98 | ||
fec56f58 AS |
99 | struct bpf_trampoline *bpf_trampoline_lookup(u64 key) |
100 | { | |
101 | struct bpf_trampoline *tr; | |
102 | struct hlist_head *head; | |
103 | void *image; | |
104 | int i; | |
105 | ||
106 | mutex_lock(&trampoline_mutex); | |
107 | head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; | |
108 | hlist_for_each_entry(tr, head, hlist) { | |
109 | if (tr->key == key) { | |
110 | refcount_inc(&tr->refcnt); | |
111 | goto out; | |
112 | } | |
113 | } | |
114 | tr = kzalloc(sizeof(*tr), GFP_KERNEL); | |
115 | if (!tr) | |
116 | goto out; | |
117 | ||
118 | /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ | |
e9b4e606 | 119 | image = __bpf_image_alloc(false); |
fec56f58 AS |
120 | if (!image) { |
121 | kfree(tr); | |
122 | tr = NULL; | |
123 | goto out; | |
124 | } | |
125 | ||
126 | tr->key = key; | |
127 | INIT_HLIST_NODE(&tr->hlist); | |
128 | hlist_add_head(&tr->hlist, head); | |
129 | refcount_set(&tr->refcnt, 1); | |
130 | mutex_init(&tr->mutex); | |
131 | for (i = 0; i < BPF_TRAMP_MAX; i++) | |
132 | INIT_HLIST_HEAD(&tr->progs_hlist[i]); | |
fec56f58 AS |
133 | tr->image = image; |
134 | out: | |
135 | mutex_unlock(&trampoline_mutex); | |
136 | return tr; | |
137 | } | |
138 | ||
b91e014f AS |
139 | static int is_ftrace_location(void *ip) |
140 | { | |
141 | long addr; | |
142 | ||
143 | addr = ftrace_location((long)ip); | |
144 | if (!addr) | |
145 | return 0; | |
146 | if (WARN_ON_ONCE(addr != (long)ip)) | |
147 | return -EFAULT; | |
148 | return 1; | |
149 | } | |
150 | ||
151 | static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) | |
152 | { | |
153 | void *ip = tr->func.addr; | |
154 | int ret; | |
155 | ||
156 | if (tr->func.ftrace_managed) | |
157 | ret = unregister_ftrace_direct((long)ip, (long)old_addr); | |
158 | else | |
159 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); | |
160 | return ret; | |
161 | } | |
162 | ||
163 | static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) | |
164 | { | |
165 | void *ip = tr->func.addr; | |
166 | int ret; | |
167 | ||
168 | if (tr->func.ftrace_managed) | |
169 | ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); | |
170 | else | |
171 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); | |
172 | return ret; | |
173 | } | |
174 | ||
175 | /* first time registering */ | |
176 | static int register_fentry(struct bpf_trampoline *tr, void *new_addr) | |
177 | { | |
178 | void *ip = tr->func.addr; | |
179 | int ret; | |
180 | ||
181 | ret = is_ftrace_location(ip); | |
182 | if (ret < 0) | |
183 | return ret; | |
184 | tr->func.ftrace_managed = ret; | |
185 | ||
186 | if (tr->func.ftrace_managed) | |
187 | ret = register_ftrace_direct((long)ip, (long)new_addr); | |
188 | else | |
189 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); | |
190 | return ret; | |
191 | } | |
192 | ||
fec56f58 | 193 | /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 |
e9b4e606 | 194 | * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 |
fec56f58 AS |
195 | */ |
196 | #define BPF_MAX_TRAMP_PROGS 40 | |
197 | ||
198 | static int bpf_trampoline_update(struct bpf_trampoline *tr) | |
199 | { | |
e9b4e606 JO |
200 | void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; |
201 | void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; | |
fec56f58 AS |
202 | struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; |
203 | int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; | |
204 | int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; | |
205 | struct bpf_prog **progs, **fentry, **fexit; | |
206 | u32 flags = BPF_TRAMP_F_RESTORE_REGS; | |
207 | struct bpf_prog_aux *aux; | |
208 | int err; | |
209 | ||
210 | if (fentry_cnt + fexit_cnt == 0) { | |
b91e014f | 211 | err = unregister_fentry(tr, old_image); |
fec56f58 AS |
212 | tr->selector = 0; |
213 | goto out; | |
214 | } | |
215 | ||
216 | /* populate fentry progs */ | |
217 | fentry = progs = progs_to_run; | |
218 | hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist) | |
219 | *progs++ = aux->prog; | |
220 | ||
221 | /* populate fexit progs */ | |
222 | fexit = progs; | |
223 | hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist) | |
224 | *progs++ = aux->prog; | |
225 | ||
226 | if (fexit_cnt) | |
227 | flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; | |
228 | ||
05d57f17 AS |
229 | /* Though the second half of trampoline page is unused a task could be |
230 | * preempted in the middle of the first half of trampoline and two | |
231 | * updates to trampoline would change the code from underneath the | |
232 | * preempted task. Hence wait for tasks to voluntarily schedule or go | |
233 | * to userspace. | |
234 | */ | |
235 | synchronize_rcu_tasks(); | |
236 | ||
e9b4e606 | 237 | err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, |
85d33df3 | 238 | &tr->func.model, flags, |
fec56f58 AS |
239 | fentry, fentry_cnt, |
240 | fexit, fexit_cnt, | |
241 | tr->func.addr); | |
85d33df3 | 242 | if (err < 0) |
fec56f58 AS |
243 | goto out; |
244 | ||
245 | if (tr->selector) | |
246 | /* progs already running at this address */ | |
b91e014f | 247 | err = modify_fentry(tr, old_image, new_image); |
fec56f58 AS |
248 | else |
249 | /* first time registering */ | |
b91e014f | 250 | err = register_fentry(tr, new_image); |
fec56f58 AS |
251 | if (err) |
252 | goto out; | |
253 | tr->selector++; | |
254 | out: | |
255 | return err; | |
256 | } | |
257 | ||
258 | static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) | |
259 | { | |
260 | switch (t) { | |
261 | case BPF_TRACE_FENTRY: | |
262 | return BPF_TRAMP_FENTRY; | |
be8704ff | 263 | case BPF_TRACE_FEXIT: |
fec56f58 | 264 | return BPF_TRAMP_FEXIT; |
be8704ff AS |
265 | default: |
266 | return BPF_TRAMP_REPLACE; | |
fec56f58 AS |
267 | } |
268 | } | |
269 | ||
270 | int bpf_trampoline_link_prog(struct bpf_prog *prog) | |
271 | { | |
272 | enum bpf_tramp_prog_type kind; | |
273 | struct bpf_trampoline *tr; | |
274 | int err = 0; | |
be8704ff | 275 | int cnt; |
fec56f58 AS |
276 | |
277 | tr = prog->aux->trampoline; | |
278 | kind = bpf_attach_type_to_tramp(prog->expected_attach_type); | |
279 | mutex_lock(&tr->mutex); | |
be8704ff AS |
280 | if (tr->extension_prog) { |
281 | /* cannot attach fentry/fexit if extension prog is attached. | |
282 | * cannot overwrite extension prog either. | |
283 | */ | |
284 | err = -EBUSY; | |
285 | goto out; | |
286 | } | |
287 | cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; | |
288 | if (kind == BPF_TRAMP_REPLACE) { | |
289 | /* Cannot attach extension if fentry/fexit are in use. */ | |
290 | if (cnt) { | |
291 | err = -EBUSY; | |
292 | goto out; | |
293 | } | |
294 | tr->extension_prog = prog; | |
295 | err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, | |
296 | prog->bpf_func); | |
297 | goto out; | |
298 | } | |
299 | if (cnt >= BPF_MAX_TRAMP_PROGS) { | |
fec56f58 AS |
300 | err = -E2BIG; |
301 | goto out; | |
302 | } | |
303 | if (!hlist_unhashed(&prog->aux->tramp_hlist)) { | |
304 | /* prog already linked */ | |
305 | err = -EBUSY; | |
306 | goto out; | |
307 | } | |
308 | hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); | |
309 | tr->progs_cnt[kind]++; | |
310 | err = bpf_trampoline_update(prog->aux->trampoline); | |
311 | if (err) { | |
312 | hlist_del(&prog->aux->tramp_hlist); | |
313 | tr->progs_cnt[kind]--; | |
314 | } | |
315 | out: | |
316 | mutex_unlock(&tr->mutex); | |
317 | return err; | |
318 | } | |
319 | ||
320 | /* bpf_trampoline_unlink_prog() should never fail. */ | |
321 | int bpf_trampoline_unlink_prog(struct bpf_prog *prog) | |
322 | { | |
323 | enum bpf_tramp_prog_type kind; | |
324 | struct bpf_trampoline *tr; | |
325 | int err; | |
326 | ||
327 | tr = prog->aux->trampoline; | |
328 | kind = bpf_attach_type_to_tramp(prog->expected_attach_type); | |
329 | mutex_lock(&tr->mutex); | |
be8704ff AS |
330 | if (kind == BPF_TRAMP_REPLACE) { |
331 | WARN_ON_ONCE(!tr->extension_prog); | |
332 | err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, | |
333 | tr->extension_prog->bpf_func, NULL); | |
334 | tr->extension_prog = NULL; | |
335 | goto out; | |
336 | } | |
fec56f58 AS |
337 | hlist_del(&prog->aux->tramp_hlist); |
338 | tr->progs_cnt[kind]--; | |
339 | err = bpf_trampoline_update(prog->aux->trampoline); | |
be8704ff | 340 | out: |
fec56f58 AS |
341 | mutex_unlock(&tr->mutex); |
342 | return err; | |
343 | } | |
344 | ||
345 | void bpf_trampoline_put(struct bpf_trampoline *tr) | |
346 | { | |
e9b4e606 JO |
347 | struct bpf_image *image; |
348 | ||
fec56f58 AS |
349 | if (!tr) |
350 | return; | |
351 | mutex_lock(&trampoline_mutex); | |
352 | if (!refcount_dec_and_test(&tr->refcnt)) | |
353 | goto out; | |
354 | WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); | |
355 | if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) | |
356 | goto out; | |
357 | if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) | |
358 | goto out; | |
e9b4e606 JO |
359 | image = container_of(tr->image, struct bpf_image, data); |
360 | latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); | |
05d57f17 AS |
361 | /* wait for tasks to get out of trampoline before freeing it */ |
362 | synchronize_rcu_tasks(); | |
e9b4e606 | 363 | bpf_jit_free_exec(image); |
fec56f58 AS |
364 | hlist_del(&tr->hlist); |
365 | kfree(tr); | |
366 | out: | |
367 | mutex_unlock(&trampoline_mutex); | |
368 | } | |
369 | ||
370 | /* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that | |
371 | * are needed for trampoline. The macro is split into | |
372 | * call _bpf_prog_enter | |
373 | * call prog->bpf_func | |
374 | * call __bpf_prog_exit | |
375 | */ | |
376 | u64 notrace __bpf_prog_enter(void) | |
377 | { | |
378 | u64 start = 0; | |
379 | ||
380 | rcu_read_lock(); | |
381 | preempt_disable(); | |
382 | if (static_branch_unlikely(&bpf_stats_enabled_key)) | |
383 | start = sched_clock(); | |
384 | return start; | |
385 | } | |
386 | ||
387 | void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) | |
388 | { | |
389 | struct bpf_prog_stats *stats; | |
390 | ||
391 | if (static_branch_unlikely(&bpf_stats_enabled_key) && | |
392 | /* static_key could be enabled in __bpf_prog_enter | |
393 | * and disabled in __bpf_prog_exit. | |
394 | * And vice versa. | |
395 | * Hence check that 'start' is not zero. | |
396 | */ | |
397 | start) { | |
398 | stats = this_cpu_ptr(prog->aux->stats); | |
399 | u64_stats_update_begin(&stats->syncp); | |
400 | stats->cnt++; | |
401 | stats->nsecs += sched_clock() - start; | |
402 | u64_stats_update_end(&stats->syncp); | |
403 | } | |
404 | preempt_enable(); | |
405 | rcu_read_unlock(); | |
406 | } | |
407 | ||
408 | int __weak | |
85d33df3 MKL |
409 | arch_prepare_bpf_trampoline(void *image, void *image_end, |
410 | const struct btf_func_model *m, u32 flags, | |
fec56f58 AS |
411 | struct bpf_prog **fentry_progs, int fentry_cnt, |
412 | struct bpf_prog **fexit_progs, int fexit_cnt, | |
413 | void *orig_call) | |
414 | { | |
415 | return -ENOTSUPP; | |
416 | } | |
417 | ||
418 | static int __init init_trampolines(void) | |
419 | { | |
420 | int i; | |
421 | ||
422 | for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) | |
423 | INIT_HLIST_HEAD(&trampoline_table[i]); | |
424 | return 0; | |
425 | } | |
426 | late_initcall(init_trampolines); |