Commit | Line | Data |
---|---|---|
fbfc504a BT |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* XSKMAP used for AF_XDP sockets | |
3 | * Copyright(c) 2018 Intel Corporation. | |
fbfc504a BT |
4 | */ |
5 | ||
6 | #include <linux/bpf.h> | |
7 | #include <linux/capability.h> | |
8 | #include <net/xdp_sock.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/sched.h> | |
11 | ||
12 | struct xsk_map { | |
13 | struct bpf_map map; | |
14 | struct xdp_sock **xsk_map; | |
15 | struct list_head __percpu *flush_list; | |
16 | }; | |
17 | ||
18 | static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) | |
19 | { | |
20 | int cpu, err = -EINVAL; | |
21 | struct xsk_map *m; | |
22 | u64 cost; | |
23 | ||
24 | if (!capable(CAP_NET_ADMIN)) | |
25 | return ERR_PTR(-EPERM); | |
26 | ||
27 | if (attr->max_entries == 0 || attr->key_size != 4 || | |
28 | attr->value_size != 4 || | |
29 | attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) | |
30 | return ERR_PTR(-EINVAL); | |
31 | ||
32 | m = kzalloc(sizeof(*m), GFP_USER); | |
33 | if (!m) | |
34 | return ERR_PTR(-ENOMEM); | |
35 | ||
36 | bpf_map_init_from_attr(&m->map, attr); | |
37 | ||
38 | cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *); | |
39 | cost += sizeof(struct list_head) * num_possible_cpus(); | |
40 | if (cost >= U32_MAX - PAGE_SIZE) | |
41 | goto free_m; | |
42 | ||
43 | m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | |
44 | ||
45 | /* Notice returns -EPERM on if map size is larger than memlock limit */ | |
46 | err = bpf_map_precharge_memlock(m->map.pages); | |
47 | if (err) | |
48 | goto free_m; | |
49 | ||
e94fa1d9 DB |
50 | err = -ENOMEM; |
51 | ||
fbfc504a BT |
52 | m->flush_list = alloc_percpu(struct list_head); |
53 | if (!m->flush_list) | |
54 | goto free_m; | |
55 | ||
56 | for_each_possible_cpu(cpu) | |
57 | INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); | |
58 | ||
59 | m->xsk_map = bpf_map_area_alloc(m->map.max_entries * | |
60 | sizeof(struct xdp_sock *), | |
61 | m->map.numa_node); | |
62 | if (!m->xsk_map) | |
63 | goto free_percpu; | |
64 | return &m->map; | |
65 | ||
66 | free_percpu: | |
67 | free_percpu(m->flush_list); | |
68 | free_m: | |
69 | kfree(m); | |
70 | return ERR_PTR(err); | |
71 | } | |
72 | ||
73 | static void xsk_map_free(struct bpf_map *map) | |
74 | { | |
75 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
76 | int i; | |
77 | ||
f6069b9a | 78 | bpf_clear_redirect_map(map); |
fbfc504a BT |
79 | synchronize_net(); |
80 | ||
81 | for (i = 0; i < map->max_entries; i++) { | |
82 | struct xdp_sock *xs; | |
83 | ||
84 | xs = m->xsk_map[i]; | |
85 | if (!xs) | |
86 | continue; | |
87 | ||
88 | sock_put((struct sock *)xs); | |
89 | } | |
90 | ||
91 | free_percpu(m->flush_list); | |
92 | bpf_map_area_free(m->xsk_map); | |
93 | kfree(m); | |
94 | } | |
95 | ||
96 | static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | |
97 | { | |
98 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
99 | u32 index = key ? *(u32 *)key : U32_MAX; | |
100 | u32 *next = next_key; | |
101 | ||
102 | if (index >= m->map.max_entries) { | |
103 | *next = 0; | |
104 | return 0; | |
105 | } | |
106 | ||
107 | if (index == m->map.max_entries - 1) | |
108 | return -ENOENT; | |
109 | *next = index + 1; | |
110 | return 0; | |
111 | } | |
112 | ||
113 | struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) | |
114 | { | |
115 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
116 | struct xdp_sock *xs; | |
117 | ||
118 | if (key >= map->max_entries) | |
119 | return NULL; | |
120 | ||
121 | xs = READ_ONCE(m->xsk_map[key]); | |
122 | return xs; | |
123 | } | |
124 | ||
125 | int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, | |
126 | struct xdp_sock *xs) | |
127 | { | |
128 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
129 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | |
130 | int err; | |
131 | ||
132 | err = xsk_rcv(xs, xdp); | |
133 | if (err) | |
134 | return err; | |
135 | ||
136 | if (!xs->flush_node.prev) | |
137 | list_add(&xs->flush_node, flush_list); | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
142 | void __xsk_map_flush(struct bpf_map *map) | |
143 | { | |
144 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
145 | struct list_head *flush_list = this_cpu_ptr(m->flush_list); | |
146 | struct xdp_sock *xs, *tmp; | |
147 | ||
148 | list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { | |
149 | xsk_flush(xs); | |
150 | __list_del(xs->flush_node.prev, xs->flush_node.next); | |
151 | xs->flush_node.prev = NULL; | |
152 | } | |
153 | } | |
154 | ||
155 | static void *xsk_map_lookup_elem(struct bpf_map *map, void *key) | |
156 | { | |
3b4a63f6 | 157 | return ERR_PTR(-EOPNOTSUPP); |
fbfc504a BT |
158 | } |
159 | ||
160 | static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, | |
161 | u64 map_flags) | |
162 | { | |
163 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
164 | u32 i = *(u32 *)key, fd = *(u32 *)value; | |
165 | struct xdp_sock *xs, *old_xs; | |
166 | struct socket *sock; | |
167 | int err; | |
168 | ||
169 | if (unlikely(map_flags > BPF_EXIST)) | |
170 | return -EINVAL; | |
171 | if (unlikely(i >= m->map.max_entries)) | |
172 | return -E2BIG; | |
173 | if (unlikely(map_flags == BPF_NOEXIST)) | |
174 | return -EEXIST; | |
175 | ||
176 | sock = sockfd_lookup(fd, &err); | |
177 | if (!sock) | |
178 | return err; | |
179 | ||
180 | if (sock->sk->sk_family != PF_XDP) { | |
181 | sockfd_put(sock); | |
182 | return -EOPNOTSUPP; | |
183 | } | |
184 | ||
185 | xs = (struct xdp_sock *)sock->sk; | |
186 | ||
187 | if (!xsk_is_setup_for_bpf_map(xs)) { | |
188 | sockfd_put(sock); | |
189 | return -EOPNOTSUPP; | |
190 | } | |
191 | ||
192 | sock_hold(sock->sk); | |
193 | ||
194 | old_xs = xchg(&m->xsk_map[i], xs); | |
cee27167 | 195 | if (old_xs) |
fbfc504a | 196 | sock_put((struct sock *)old_xs); |
fbfc504a BT |
197 | |
198 | sockfd_put(sock); | |
199 | return 0; | |
200 | } | |
201 | ||
202 | static int xsk_map_delete_elem(struct bpf_map *map, void *key) | |
203 | { | |
204 | struct xsk_map *m = container_of(map, struct xsk_map, map); | |
205 | struct xdp_sock *old_xs; | |
206 | int k = *(u32 *)key; | |
207 | ||
208 | if (k >= map->max_entries) | |
209 | return -EINVAL; | |
210 | ||
211 | old_xs = xchg(&m->xsk_map[k], NULL); | |
cee27167 | 212 | if (old_xs) |
fbfc504a | 213 | sock_put((struct sock *)old_xs); |
fbfc504a BT |
214 | |
215 | return 0; | |
216 | } | |
217 | ||
218 | const struct bpf_map_ops xsk_map_ops = { | |
219 | .map_alloc = xsk_map_alloc, | |
220 | .map_free = xsk_map_free, | |
221 | .map_get_next_key = xsk_map_get_next_key, | |
222 | .map_lookup_elem = xsk_map_lookup_elem, | |
223 | .map_update_elem = xsk_map_update_elem, | |
224 | .map_delete_elem = xsk_map_delete_elem, | |
e8d2bec0 | 225 | .map_check_btf = map_check_no_btf, |
fbfc504a | 226 | }; |