Commit | Line | Data |
---|---|---|
fbfc504a BT |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* XSKMAP used for AF_XDP sockets | |
3 | * Copyright(c) 2018 Intel Corporation. | |
fbfc504a BT |
4 | */ |
5 | ||
6 | #include <linux/bpf.h> | |
b6459415 | 7 | #include <linux/filter.h> |
fbfc504a BT |
8 | #include <net/xdp_sock.h> |
9 | #include <linux/slab.h> | |
10 | #include <linux/sched.h> | |
c317ab71 | 11 | #include <linux/btf_ids.h> |
fbfc504a | 12 | |
d20a1676 BT |
13 | #include "xsk.h" |
14 | ||
0402acd6 | 15 | static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, |
782347b6 | 16 | struct xdp_sock __rcu **map_entry) |
0402acd6 BT |
17 | { |
18 | struct xsk_map_node *node; | |
0402acd6 | 19 | |
28e1dcde RG |
20 | node = bpf_map_kzalloc(&map->map, sizeof(*node), |
21 | GFP_ATOMIC | __GFP_NOWARN); | |
0402acd6 | 22 | if (!node) |
fcd30ae0 | 23 | return ERR_PTR(-ENOMEM); |
0402acd6 | 24 | |
bb1b25ca | 25 | bpf_map_inc(&map->map); |
b4fd0d67 | 26 | atomic_inc(&map->count); |
0402acd6 BT |
27 | |
28 | node->map = map; | |
29 | node->map_entry = map_entry; | |
30 | return node; | |
31 | } | |
32 | ||
33 | static void xsk_map_node_free(struct xsk_map_node *node) | |
34 | { | |
b4fd0d67 YS |
35 | struct xsk_map *map = node->map; |
36 | ||
bb1b25ca | 37 | bpf_map_put(&node->map->map); |
0402acd6 | 38 | kfree(node); |
b4fd0d67 | 39 | atomic_dec(&map->count); |
0402acd6 BT |
40 | } |
41 | ||
42 | static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) | |
43 | { | |
44 | spin_lock_bh(&xs->map_list_lock); | |
45 | list_add_tail(&node->node, &xs->map_list); | |
46 | spin_unlock_bh(&xs->map_list_lock); | |
47 | } | |
48 | ||
49 | static void xsk_map_sock_delete(struct xdp_sock *xs, | |
782347b6 | 50 | struct xdp_sock __rcu **map_entry) |
0402acd6 BT |
51 | { |
52 | struct xsk_map_node *n, *tmp; | |
53 | ||
54 | spin_lock_bh(&xs->map_list_lock); | |
55 | list_for_each_entry_safe(n, tmp, &xs->map_list, node) { | |
56 | if (map_entry == n->map_entry) { | |
57 | list_del(&n->node); | |
58 | xsk_map_node_free(n); | |
59 | } | |
60 | } | |
61 | spin_unlock_bh(&xs->map_list_lock); | |
62 | } | |
63 | ||
fbfc504a BT |
64 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) |
65 | { | |
fbfc504a | 66 | struct xsk_map *m; |
819a4f32 | 67 | int numa_node; |
e312b9e7 | 68 | u64 size; |
fbfc504a | 69 | |
fbfc504a BT |
70 | if (attr->max_entries == 0 || attr->key_size != 4 || |
71 | attr->value_size != 4 || | |
72 | attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) | |
73 | return ERR_PTR(-EINVAL); | |
74 | ||
64fe8c06 BT |
75 | numa_node = bpf_map_attr_numa_node(attr); |
76 | size = struct_size(m, xsk_map, attr->max_entries); | |
64fe8c06 | 77 | |
64fe8c06 | 78 | m = bpf_map_area_alloc(size, numa_node); |
819a4f32 | 79 | if (!m) |
fbfc504a BT |
80 | return ERR_PTR(-ENOMEM); |
81 | ||
82 | bpf_map_init_from_attr(&m->map, attr); | |
0402acd6 | 83 | spin_lock_init(&m->lock); |
fbfc504a | 84 | |
fbfc504a | 85 | return &m->map; |
fbfc504a BT |
86 | } |
87 | ||
b4fd0d67 YS |
88 | static u64 xsk_map_mem_usage(const struct bpf_map *map) |
89 | { | |
90 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
91 | ||
92 | return struct_size(m, xsk_map, map->max_entries) + | |
93 | (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node); | |
94 | } | |
95 | ||
fbfc504a BT |
96 | static void xsk_map_free(struct bpf_map *map) |
97 | { | |
98 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
fbfc504a BT |
99 | |
100 | synchronize_net(); | |
64fe8c06 | 101 | bpf_map_area_free(m); |
fbfc504a BT |
102 | } |
103 | ||
104 | static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |
105 | { | |
106 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
107 | u32 index = key ? *(u32 *)key : U32_MAX; | |
108 | u32 *next = next_key; | |
109 | ||
110 | if (index >= m->map.max_entries) { | |
111 | *next = 0; | |
112 | return 0; | |
113 | } | |
114 | ||
115 | if (index == m->map.max_entries - 1) | |
116 | return -ENOENT; | |
117 | *next = index + 1; | |
118 | return 0; | |
119 | } | |
120 | ||
4a8f87e6 | 121 | static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) |
e65650f2 MF |
122 | { |
123 | const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; | |
124 | struct bpf_insn *insn = insn_buf; | |
125 | ||
126 | *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); | |
127 | *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); | |
128 | *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); | |
129 | *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); | |
130 | *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); | |
131 | *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); | |
132 | *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); | |
133 | *insn++ = BPF_MOV64_IMM(ret, 0); | |
134 | return insn - insn_buf; | |
135 | } | |
136 | ||
782347b6 THJ |
137 | /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or |
138 | * by local_bh_disable() (from XDP calls inside NAPI). The | |
139 | * rcu_read_lock_bh_held() below makes lockdep accept both. | |
140 | */ | |
e6a4750f BT |
141 | static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) |
142 | { | |
143 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
144 | ||
145 | if (key >= map->max_entries) | |
146 | return NULL; | |
147 | ||
782347b6 | 148 | return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held()); |
e6a4750f BT |
149 | } |
150 | ||
fbfc504a | 151 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) |
fada7fdc | 152 | { |
fada7fdc JL |
153 | return __xsk_map_lookup_elem(map, *(u32 *)key); |
154 | } | |
155 | ||
156 | static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) | |
fbfc504a | 157 | { |
3b4a63f6 | 158 | return ERR_PTR(-EOPNOTSUPP); |
fbfc504a BT |
159 | } |
160 | ||
d7ba4cc9 JK |
161 | static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, |
162 | u64 map_flags) | |
fbfc504a BT |
163 | { |
164 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
782347b6 THJ |
165 | struct xdp_sock __rcu **map_entry; |
166 | struct xdp_sock *xs, *old_xs; | |
fbfc504a | 167 | u32 i = *(u32 *)key, fd = *(u32 *)value; |
0402acd6 | 168 | struct xsk_map_node *node; |
fbfc504a BT |
169 | struct socket *sock; |
170 | int err; | |
171 | ||
172 | if (unlikely(map_flags > BPF_EXIST)) | |
173 | return -EINVAL; | |
174 | if (unlikely(i >= m->map.max_entries)) | |
175 | return -E2BIG; | |
fbfc504a BT |
176 | |
177 | sock = sockfd_lookup(fd, &err); | |
178 | if (!sock) | |
179 | return err; | |
180 | ||
181 | if (sock->sk->sk_family != PF_XDP) { | |
182 | sockfd_put(sock); | |
183 | return -EOPNOTSUPP; | |
184 | } | |
185 | ||
186 | xs = (struct xdp_sock *)sock->sk; | |
187 | ||
0402acd6 BT |
188 | map_entry = &m->xsk_map[i]; |
189 | node = xsk_map_node_alloc(m, map_entry); | |
190 | if (IS_ERR(node)) { | |
191 | sockfd_put(sock); | |
192 | return PTR_ERR(node); | |
193 | } | |
fbfc504a | 194 | |
0402acd6 | 195 | spin_lock_bh(&m->lock); |
782347b6 | 196 | old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock)); |
0402acd6 BT |
197 | if (old_xs == xs) { |
198 | err = 0; | |
199 | goto out; | |
36cc3435 BT |
200 | } else if (old_xs && map_flags == BPF_NOEXIST) { |
201 | err = -EEXIST; | |
202 | goto out; | |
203 | } else if (!old_xs && map_flags == BPF_EXIST) { | |
204 | err = -ENOENT; | |
205 | goto out; | |
0402acd6 BT |
206 | } |
207 | xsk_map_sock_add(xs, node); | |
782347b6 | 208 | rcu_assign_pointer(*map_entry, xs); |
cee27167 | 209 | if (old_xs) |
0402acd6 BT |
210 | xsk_map_sock_delete(old_xs, map_entry); |
211 | spin_unlock_bh(&m->lock); | |
fbfc504a BT |
212 | sockfd_put(sock); |
213 | return 0; | |
0402acd6 BT |
214 | |
215 | out: | |
216 | spin_unlock_bh(&m->lock); | |
217 | sockfd_put(sock); | |
218 | xsk_map_node_free(node); | |
219 | return err; | |
fbfc504a BT |
220 | } |
221 | ||
d7ba4cc9 | 222 | static long xsk_map_delete_elem(struct bpf_map *map, void *key) |
fbfc504a BT |
223 | { |
224 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
782347b6 THJ |
225 | struct xdp_sock __rcu **map_entry; |
226 | struct xdp_sock *old_xs; | |
fbfc504a BT |
227 | int k = *(u32 *)key; |
228 | ||
229 | if (k >= map->max_entries) | |
230 | return -EINVAL; | |
231 | ||
0402acd6 BT |
232 | spin_lock_bh(&m->lock); |
233 | map_entry = &m->xsk_map[k]; | |
782347b6 | 234 | old_xs = unrcu_pointer(xchg(map_entry, NULL)); |
cee27167 | 235 | if (old_xs) |
0402acd6 BT |
236 | xsk_map_sock_delete(old_xs, map_entry); |
237 | spin_unlock_bh(&m->lock); | |
fbfc504a BT |
238 | |
239 | return 0; | |
240 | } | |
241 | ||
d7ba4cc9 | 242 | static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) |
e6a4750f | 243 | { |
32637e33 | 244 | return __bpf_xdp_redirect_map(map, index, flags, 0, |
e624d4ed | 245 | __xsk_map_lookup_elem); |
e6a4750f BT |
246 | } |
247 | ||
0402acd6 | 248 | void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, |
782347b6 | 249 | struct xdp_sock __rcu **map_entry) |
0402acd6 BT |
250 | { |
251 | spin_lock_bh(&map->lock); | |
782347b6 THJ |
252 | if (rcu_access_pointer(*map_entry) == xs) { |
253 | rcu_assign_pointer(*map_entry, NULL); | |
0402acd6 BT |
254 | xsk_map_sock_delete(xs, map_entry); |
255 | } | |
256 | spin_unlock_bh(&map->lock); | |
257 | } | |
258 | ||
134fede4 MKL |
259 | static bool xsk_map_meta_equal(const struct bpf_map *meta0, |
260 | const struct bpf_map *meta1) | |
261 | { | |
262 | return meta0->max_entries == meta1->max_entries && | |
263 | bpf_map_meta_equal(meta0, meta1); | |
264 | } | |
265 | ||
c317ab71 | 266 | BTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map) |
fbfc504a | 267 | const struct bpf_map_ops xsk_map_ops = { |
134fede4 | 268 | .map_meta_equal = xsk_map_meta_equal, |
fbfc504a BT |
269 | .map_alloc = xsk_map_alloc, |
270 | .map_free = xsk_map_free, | |
271 | .map_get_next_key = xsk_map_get_next_key, | |
272 | .map_lookup_elem = xsk_map_lookup_elem, | |
e65650f2 | 273 | .map_gen_lookup = xsk_map_gen_lookup, |
fada7fdc | 274 | .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, |
fbfc504a BT |
275 | .map_update_elem = xsk_map_update_elem, |
276 | .map_delete_elem = xsk_map_delete_elem, | |
e8d2bec0 | 277 | .map_check_btf = map_check_no_btf, |
b4fd0d67 | 278 | .map_mem_usage = xsk_map_mem_usage, |
c317ab71 | 279 | .map_btf_id = &xsk_map_btf_ids[0], |
e6a4750f | 280 | .map_redirect = xsk_map_redirect, |
fbfc504a | 281 | }; |