Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux-2.6-block.git] / kernel / bpf / xskmap.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* XSKMAP used for AF_XDP sockets
3  * Copyright(c) 2018 Intel Corporation.
4  */
5
6 #include <linux/bpf.h>
7 #include <linux/capability.h>
8 #include <net/xdp_sock.h>
9 #include <linux/slab.h>
10 #include <linux/sched.h>
11
12 struct xsk_map {
13         struct bpf_map map;
14         struct xdp_sock **xsk_map;
15         struct list_head __percpu *flush_list;
16         spinlock_t lock; /* Synchronize map updates */
17 };
18
19 int xsk_map_inc(struct xsk_map *map)
20 {
21         struct bpf_map *m = &map->map;
22
23         m = bpf_map_inc(m, false);
24         return PTR_ERR_OR_ZERO(m);
25 }
26
27 void xsk_map_put(struct xsk_map *map)
28 {
29         bpf_map_put(&map->map);
30 }
31
32 static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
33                                                struct xdp_sock **map_entry)
34 {
35         struct xsk_map_node *node;
36         int err;
37
38         node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
39         if (!node)
40                 return ERR_PTR(-ENOMEM);
41
42         err = xsk_map_inc(map);
43         if (err) {
44                 kfree(node);
45                 return ERR_PTR(err);
46         }
47
48         node->map = map;
49         node->map_entry = map_entry;
50         return node;
51 }
52
53 static void xsk_map_node_free(struct xsk_map_node *node)
54 {
55         xsk_map_put(node->map);
56         kfree(node);
57 }
58
59 static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
60 {
61         spin_lock_bh(&xs->map_list_lock);
62         list_add_tail(&node->node, &xs->map_list);
63         spin_unlock_bh(&xs->map_list_lock);
64 }
65
66 static void xsk_map_sock_delete(struct xdp_sock *xs,
67                                 struct xdp_sock **map_entry)
68 {
69         struct xsk_map_node *n, *tmp;
70
71         spin_lock_bh(&xs->map_list_lock);
72         list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
73                 if (map_entry == n->map_entry) {
74                         list_del(&n->node);
75                         xsk_map_node_free(n);
76                 }
77         }
78         spin_unlock_bh(&xs->map_list_lock);
79 }
80
81 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
82 {
83         struct xsk_map *m;
84         int cpu, err;
85         u64 cost;
86
87         if (!capable(CAP_NET_ADMIN))
88                 return ERR_PTR(-EPERM);
89
90         if (attr->max_entries == 0 || attr->key_size != 4 ||
91             attr->value_size != 4 ||
92             attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
93                 return ERR_PTR(-EINVAL);
94
95         m = kzalloc(sizeof(*m), GFP_USER);
96         if (!m)
97                 return ERR_PTR(-ENOMEM);
98
99         bpf_map_init_from_attr(&m->map, attr);
100         spin_lock_init(&m->lock);
101
102         cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
103         cost += sizeof(struct list_head) * num_possible_cpus();
104
105         /* Notice returns -EPERM on if map size is larger than memlock limit */
106         err = bpf_map_charge_init(&m->map.memory, cost);
107         if (err)
108                 goto free_m;
109
110         err = -ENOMEM;
111
112         m->flush_list = alloc_percpu(struct list_head);
113         if (!m->flush_list)
114                 goto free_charge;
115
116         for_each_possible_cpu(cpu)
117                 INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
118
119         m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
120                                         sizeof(struct xdp_sock *),
121                                         m->map.numa_node);
122         if (!m->xsk_map)
123                 goto free_percpu;
124         return &m->map;
125
126 free_percpu:
127         free_percpu(m->flush_list);
128 free_charge:
129         bpf_map_charge_finish(&m->map.memory);
130 free_m:
131         kfree(m);
132         return ERR_PTR(err);
133 }
134
135 static void xsk_map_free(struct bpf_map *map)
136 {
137         struct xsk_map *m = container_of(map, struct xsk_map, map);
138
139         bpf_clear_redirect_map(map);
140         synchronize_net();
141         free_percpu(m->flush_list);
142         bpf_map_area_free(m->xsk_map);
143         kfree(m);
144 }
145
146 static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
147 {
148         struct xsk_map *m = container_of(map, struct xsk_map, map);
149         u32 index = key ? *(u32 *)key : U32_MAX;
150         u32 *next = next_key;
151
152         if (index >= m->map.max_entries) {
153                 *next = 0;
154                 return 0;
155         }
156
157         if (index == m->map.max_entries - 1)
158                 return -ENOENT;
159         *next = index + 1;
160         return 0;
161 }
162
163 struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
164 {
165         struct xsk_map *m = container_of(map, struct xsk_map, map);
166         struct xdp_sock *xs;
167
168         if (key >= map->max_entries)
169                 return NULL;
170
171         xs = READ_ONCE(m->xsk_map[key]);
172         return xs;
173 }
174
175 int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
176                        struct xdp_sock *xs)
177 {
178         struct xsk_map *m = container_of(map, struct xsk_map, map);
179         struct list_head *flush_list = this_cpu_ptr(m->flush_list);
180         int err;
181
182         err = xsk_rcv(xs, xdp);
183         if (err)
184                 return err;
185
186         if (!xs->flush_node.prev)
187                 list_add(&xs->flush_node, flush_list);
188
189         return 0;
190 }
191
192 void __xsk_map_flush(struct bpf_map *map)
193 {
194         struct xsk_map *m = container_of(map, struct xsk_map, map);
195         struct list_head *flush_list = this_cpu_ptr(m->flush_list);
196         struct xdp_sock *xs, *tmp;
197
198         list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
199                 xsk_flush(xs);
200                 __list_del_clearprev(&xs->flush_node);
201         }
202 }
203
204 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
205 {
206         WARN_ON_ONCE(!rcu_read_lock_held());
207         return __xsk_map_lookup_elem(map, *(u32 *)key);
208 }
209
210 static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
211 {
212         return ERR_PTR(-EOPNOTSUPP);
213 }
214
215 static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
216                                u64 map_flags)
217 {
218         struct xsk_map *m = container_of(map, struct xsk_map, map);
219         struct xdp_sock *xs, *old_xs, **map_entry;
220         u32 i = *(u32 *)key, fd = *(u32 *)value;
221         struct xsk_map_node *node;
222         struct socket *sock;
223         int err;
224
225         if (unlikely(map_flags > BPF_EXIST))
226                 return -EINVAL;
227         if (unlikely(i >= m->map.max_entries))
228                 return -E2BIG;
229
230         sock = sockfd_lookup(fd, &err);
231         if (!sock)
232                 return err;
233
234         if (sock->sk->sk_family != PF_XDP) {
235                 sockfd_put(sock);
236                 return -EOPNOTSUPP;
237         }
238
239         xs = (struct xdp_sock *)sock->sk;
240
241         if (!xsk_is_setup_for_bpf_map(xs)) {
242                 sockfd_put(sock);
243                 return -EOPNOTSUPP;
244         }
245
246         map_entry = &m->xsk_map[i];
247         node = xsk_map_node_alloc(m, map_entry);
248         if (IS_ERR(node)) {
249                 sockfd_put(sock);
250                 return PTR_ERR(node);
251         }
252
253         spin_lock_bh(&m->lock);
254         old_xs = READ_ONCE(*map_entry);
255         if (old_xs == xs) {
256                 err = 0;
257                 goto out;
258         } else if (old_xs && map_flags == BPF_NOEXIST) {
259                 err = -EEXIST;
260                 goto out;
261         } else if (!old_xs && map_flags == BPF_EXIST) {
262                 err = -ENOENT;
263                 goto out;
264         }
265         xsk_map_sock_add(xs, node);
266         WRITE_ONCE(*map_entry, xs);
267         if (old_xs)
268                 xsk_map_sock_delete(old_xs, map_entry);
269         spin_unlock_bh(&m->lock);
270         sockfd_put(sock);
271         return 0;
272
273 out:
274         spin_unlock_bh(&m->lock);
275         sockfd_put(sock);
276         xsk_map_node_free(node);
277         return err;
278 }
279
280 static int xsk_map_delete_elem(struct bpf_map *map, void *key)
281 {
282         struct xsk_map *m = container_of(map, struct xsk_map, map);
283         struct xdp_sock *old_xs, **map_entry;
284         int k = *(u32 *)key;
285
286         if (k >= map->max_entries)
287                 return -EINVAL;
288
289         spin_lock_bh(&m->lock);
290         map_entry = &m->xsk_map[k];
291         old_xs = xchg(map_entry, NULL);
292         if (old_xs)
293                 xsk_map_sock_delete(old_xs, map_entry);
294         spin_unlock_bh(&m->lock);
295
296         return 0;
297 }
298
299 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
300                              struct xdp_sock **map_entry)
301 {
302         spin_lock_bh(&map->lock);
303         if (READ_ONCE(*map_entry) == xs) {
304                 WRITE_ONCE(*map_entry, NULL);
305                 xsk_map_sock_delete(xs, map_entry);
306         }
307         spin_unlock_bh(&map->lock);
308 }
309
310 const struct bpf_map_ops xsk_map_ops = {
311         .map_alloc = xsk_map_alloc,
312         .map_free = xsk_map_free,
313         .map_get_next_key = xsk_map_get_next_key,
314         .map_lookup_elem = xsk_map_lookup_elem,
315         .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
316         .map_update_elem = xsk_map_update_elem,
317         .map_delete_elem = xsk_map_delete_elem,
318         .map_check_btf = map_check_no_btf,
319 };