Commit | Line | Data |
---|---|---|
ff7d6b27 JDB |
1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * | |
3 | * page_pool.c | |
4 | * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> | |
5 | * Copyright (C) 2016 Red Hat, Inc. | |
6 | */ | |
32c28f7e | 7 | |
12b6c3a0 | 8 | #include <linux/error-injection.h> |
ff7d6b27 JDB |
9 | #include <linux/types.h> |
10 | #include <linux/kernel.h> | |
11 | #include <linux/slab.h> | |
f71fec47 | 12 | #include <linux/device.h> |
ff7d6b27 | 13 | |
a9ca9f9c | 14 | #include <net/page_pool/helpers.h> |
78862447 LB |
15 | #include <net/xdp.h> |
16 | ||
ff7d6b27 JDB |
17 | #include <linux/dma-direction.h> |
18 | #include <linux/dma-mapping.h> | |
19 | #include <linux/page-flags.h> | |
8d29c703 | 20 | #include <linux/mm.h> /* for put_page() */ |
c07aea3e | 21 | #include <linux/poison.h> |
f3c5264f | 22 | #include <linux/ethtool.h> |
8c48eea3 | 23 | #include <linux/netdevice.h> |
ff7d6b27 | 24 | |
32c28f7e JDB |
25 | #include <trace/events/page_pool.h> |
26 | ||
f17c6964 JK |
27 | #include "page_pool_priv.h" |
28 | ||
c3f812ce JL |
29 | #define DEFER_TIME (msecs_to_jiffies(1000)) |
30 | #define DEFER_WARN_INTERVAL (60 * HZ) | |
31 | ||
aaf153ae | 32 | #define BIAS_MAX (LONG_MAX >> 1) |
53e0961d | 33 | |
8610037e | 34 | #ifdef CONFIG_PAGE_POOL_STATS |
f853fa5c LB |
35 | static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); |
36 | ||
8610037e JD |
37 | /* alloc_stat_inc is intended to be used in softirq context */ |
38 | #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) | |
ad6fa1e1 JD |
39 | /* recycle_stat_inc is safe to use when preemption is possible. */ |
40 | #define recycle_stat_inc(pool, __stat) \ | |
41 | do { \ | |
42 | struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ | |
43 | this_cpu_inc(s->__stat); \ | |
44 | } while (0) | |
6b95e338 | 45 | |
590032a4 LB |
46 | #define recycle_stat_add(pool, __stat, val) \ |
47 | do { \ | |
48 | struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ | |
49 | this_cpu_add(s->__stat, val); \ | |
50 | } while (0) | |
51 | ||
f3c5264f LB |
52 | static const char pp_stats[][ETH_GSTRING_LEN] = { |
53 | "rx_pp_alloc_fast", | |
54 | "rx_pp_alloc_slow", | |
55 | "rx_pp_alloc_slow_ho", | |
56 | "rx_pp_alloc_empty", | |
57 | "rx_pp_alloc_refill", | |
58 | "rx_pp_alloc_waive", | |
59 | "rx_pp_recycle_cached", | |
60 | "rx_pp_recycle_cache_full", | |
61 | "rx_pp_recycle_ring", | |
62 | "rx_pp_recycle_ring_full", | |
63 | "rx_pp_recycle_released_ref", | |
64 | }; | |
65 | ||
82e896d9 JK |
66 | /** |
67 | * page_pool_get_stats() - fetch page pool stats | |
68 | * @pool: pool from which page was allocated | |
69 | * @stats: struct page_pool_stats to fill in | |
70 | * | |
71 | * Retrieve statistics about the page_pool. This API is only available | |
72 | * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``. | |
73 | * A pointer to a caller allocated struct page_pool_stats structure | |
74 | * is passed to this API which is filled in. The caller can then report | |
75 | * those stats to the user (perhaps via ethtool, debugfs, etc.). | |
76 | */ | |
d49010ad | 77 | bool page_pool_get_stats(const struct page_pool *pool, |
6b95e338 JD |
78 | struct page_pool_stats *stats) |
79 | { | |
80 | int cpu = 0; | |
81 | ||
82 | if (!stats) | |
83 | return false; | |
84 | ||
f3c5264f LB |
85 | /* The caller is responsible to initialize stats. */ |
86 | stats->alloc_stats.fast += pool->alloc_stats.fast; | |
87 | stats->alloc_stats.slow += pool->alloc_stats.slow; | |
88 | stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; | |
89 | stats->alloc_stats.empty += pool->alloc_stats.empty; | |
90 | stats->alloc_stats.refill += pool->alloc_stats.refill; | |
91 | stats->alloc_stats.waive += pool->alloc_stats.waive; | |
6b95e338 JD |
92 | |
93 | for_each_possible_cpu(cpu) { | |
94 | const struct page_pool_recycle_stats *pcpu = | |
95 | per_cpu_ptr(pool->recycle_stats, cpu); | |
96 | ||
97 | stats->recycle_stats.cached += pcpu->cached; | |
98 | stats->recycle_stats.cache_full += pcpu->cache_full; | |
99 | stats->recycle_stats.ring += pcpu->ring; | |
100 | stats->recycle_stats.ring_full += pcpu->ring_full; | |
101 | stats->recycle_stats.released_refcnt += pcpu->released_refcnt; | |
102 | } | |
103 | ||
104 | return true; | |
105 | } | |
106 | EXPORT_SYMBOL(page_pool_get_stats); | |
f3c5264f LB |
107 | |
108 | u8 *page_pool_ethtool_stats_get_strings(u8 *data) | |
109 | { | |
110 | int i; | |
111 | ||
112 | for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { | |
113 | memcpy(data, pp_stats[i], ETH_GSTRING_LEN); | |
114 | data += ETH_GSTRING_LEN; | |
115 | } | |
116 | ||
117 | return data; | |
118 | } | |
119 | EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); | |
120 | ||
121 | int page_pool_ethtool_stats_get_count(void) | |
122 | { | |
123 | return ARRAY_SIZE(pp_stats); | |
124 | } | |
125 | EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); | |
126 | ||
ef9226cd | 127 | u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) |
f3c5264f | 128 | { |
ef9226cd | 129 | const struct page_pool_stats *pool_stats = stats; |
f3c5264f LB |
130 | |
131 | *data++ = pool_stats->alloc_stats.fast; | |
132 | *data++ = pool_stats->alloc_stats.slow; | |
133 | *data++ = pool_stats->alloc_stats.slow_high_order; | |
134 | *data++ = pool_stats->alloc_stats.empty; | |
135 | *data++ = pool_stats->alloc_stats.refill; | |
136 | *data++ = pool_stats->alloc_stats.waive; | |
137 | *data++ = pool_stats->recycle_stats.cached; | |
138 | *data++ = pool_stats->recycle_stats.cache_full; | |
139 | *data++ = pool_stats->recycle_stats.ring; | |
140 | *data++ = pool_stats->recycle_stats.ring_full; | |
141 | *data++ = pool_stats->recycle_stats.released_refcnt; | |
142 | ||
143 | return data; | |
144 | } | |
145 | EXPORT_SYMBOL(page_pool_ethtool_stats_get); | |
146 | ||
8610037e JD |
147 | #else |
148 | #define alloc_stat_inc(pool, __stat) | |
ad6fa1e1 | 149 | #define recycle_stat_inc(pool, __stat) |
590032a4 | 150 | #define recycle_stat_add(pool, __stat, val) |
8610037e JD |
151 | #endif |
152 | ||
368d3cb4 YL |
153 | static bool page_pool_producer_lock(struct page_pool *pool) |
154 | __acquires(&pool->ring.producer_lock) | |
155 | { | |
156 | bool in_softirq = in_softirq(); | |
157 | ||
158 | if (in_softirq) | |
159 | spin_lock(&pool->ring.producer_lock); | |
160 | else | |
161 | spin_lock_bh(&pool->ring.producer_lock); | |
162 | ||
163 | return in_softirq; | |
164 | } | |
165 | ||
166 | static void page_pool_producer_unlock(struct page_pool *pool, | |
167 | bool in_softirq) | |
168 | __releases(&pool->ring.producer_lock) | |
169 | { | |
170 | if (in_softirq) | |
171 | spin_unlock(&pool->ring.producer_lock); | |
172 | else | |
173 | spin_unlock_bh(&pool->ring.producer_lock); | |
174 | } | |
175 | ||
1f20a576 AL |
176 | static void page_pool_struct_check(void) |
177 | { | |
178 | CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); | |
179 | CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); | |
180 | CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); | |
181 | CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long)); | |
182 | } | |
183 | ||
ff7d6b27 | 184 | static int page_pool_init(struct page_pool *pool, |
2b0cfa6e LB |
185 | const struct page_pool_params *params, |
186 | int cpuid) | |
ff7d6b27 JDB |
187 | { |
188 | unsigned int ring_qsize = 1024; /* Default */ | |
189 | ||
1f20a576 AL |
190 | page_pool_struct_check(); |
191 | ||
5027ec19 JK |
192 | memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); |
193 | memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); | |
ff7d6b27 | 194 | |
2b0cfa6e LB |
195 | pool->cpuid = cpuid; |
196 | ||
ff7d6b27 | 197 | /* Validate only known flags were used */ |
403f11ac | 198 | if (pool->slow.flags & ~PP_FLAG_ALL) |
ff7d6b27 JDB |
199 | return -EINVAL; |
200 | ||
201 | if (pool->p.pool_size) | |
202 | ring_qsize = pool->p.pool_size; | |
203 | ||
204 | /* Sanity limit mem that can be pinned down */ | |
205 | if (ring_qsize > 32768) | |
206 | return -E2BIG; | |
207 | ||
208 | /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. | |
209 | * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, | |
210 | * which is the XDP_TX use-case. | |
211 | */ | |
403f11ac | 212 | if (pool->slow.flags & PP_FLAG_DMA_MAP) { |
798dda81 DK |
213 | if ((pool->p.dma_dir != DMA_FROM_DEVICE) && |
214 | (pool->p.dma_dir != DMA_BIDIRECTIONAL)) | |
215 | return -EINVAL; | |
403f11ac AL |
216 | |
217 | pool->dma_map = true; | |
798dda81 | 218 | } |
ff7d6b27 | 219 | |
403f11ac | 220 | if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { |
e68bc756 LB |
221 | /* In order to request DMA-sync-for-device the page |
222 | * needs to be mapped | |
223 | */ | |
403f11ac | 224 | if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) |
e68bc756 LB |
225 | return -EINVAL; |
226 | ||
227 | if (!pool->p.max_len) | |
228 | return -EINVAL; | |
229 | ||
403f11ac AL |
230 | pool->dma_sync = true; |
231 | ||
e68bc756 LB |
232 | /* pool->p.offset has to be set according to the address |
233 | * offset used by the DMA engine to start copying rx data | |
234 | */ | |
235 | } | |
236 | ||
2da0cac1 JK |
237 | pool->has_init_callback = !!pool->slow.init_callback; |
238 | ||
ad6fa1e1 | 239 | #ifdef CONFIG_PAGE_POOL_STATS |
403f11ac | 240 | if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) { |
f853fa5c LB |
241 | pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); |
242 | if (!pool->recycle_stats) | |
243 | return -ENOMEM; | |
244 | } else { | |
245 | /* For system page pool instance we use a singular stats object | |
246 | * instead of allocating a separate percpu variable for each | |
247 | * (also percpu) page pool instance. | |
248 | */ | |
249 | pool->recycle_stats = &pp_system_recycle_stats; | |
403f11ac | 250 | pool->system = true; |
f853fa5c | 251 | } |
ad6fa1e1 JD |
252 | #endif |
253 | ||
8ffbd166 JS |
254 | if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { |
255 | #ifdef CONFIG_PAGE_POOL_STATS | |
403f11ac | 256 | if (!pool->system) |
f853fa5c | 257 | free_percpu(pool->recycle_stats); |
8ffbd166 | 258 | #endif |
ff7d6b27 | 259 | return -ENOMEM; |
8ffbd166 | 260 | } |
ff7d6b27 | 261 | |
99c07c43 JDB |
262 | atomic_set(&pool->pages_state_release_cnt, 0); |
263 | ||
1da4bbef IK |
264 | /* Driver calling page_pool_create() also call page_pool_destroy() */ |
265 | refcount_set(&pool->user_cnt, 1); | |
266 | ||
403f11ac | 267 | if (pool->dma_map) |
f71fec47 JDB |
268 | get_device(pool->p.dev); |
269 | ||
ff7d6b27 JDB |
270 | return 0; |
271 | } | |
272 | ||
23cfaf67 JK |
273 | static void page_pool_uninit(struct page_pool *pool) |
274 | { | |
275 | ptr_ring_cleanup(&pool->ring, NULL); | |
276 | ||
403f11ac | 277 | if (pool->dma_map) |
23cfaf67 JK |
278 | put_device(pool->p.dev); |
279 | ||
280 | #ifdef CONFIG_PAGE_POOL_STATS | |
403f11ac | 281 | if (!pool->system) |
f853fa5c | 282 | free_percpu(pool->recycle_stats); |
23cfaf67 JK |
283 | #endif |
284 | } | |
285 | ||
82e896d9 | 286 | /** |
2b0cfa6e | 287 | * page_pool_create_percpu() - create a page pool for a given cpu. |
82e896d9 | 288 | * @params: parameters, see struct page_pool_params |
2b0cfa6e | 289 | * @cpuid: cpu identifier |
82e896d9 | 290 | */ |
2b0cfa6e LB |
291 | struct page_pool * |
292 | page_pool_create_percpu(const struct page_pool_params *params, int cpuid) | |
ff7d6b27 JDB |
293 | { |
294 | struct page_pool *pool; | |
873343e7 | 295 | int err; |
ff7d6b27 JDB |
296 | |
297 | pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); | |
298 | if (!pool) | |
299 | return ERR_PTR(-ENOMEM); | |
300 | ||
2b0cfa6e | 301 | err = page_pool_init(pool, params, cpuid); |
f17c6964 JK |
302 | if (err < 0) |
303 | goto err_free; | |
304 | ||
305 | err = page_pool_list(pool); | |
306 | if (err) | |
307 | goto err_uninit; | |
1da4bbef | 308 | |
ff7d6b27 | 309 | return pool; |
f17c6964 JK |
310 | |
311 | err_uninit: | |
312 | page_pool_uninit(pool); | |
313 | err_free: | |
314 | pr_warn("%s() gave up with errno %d\n", __func__, err); | |
315 | kfree(pool); | |
316 | return ERR_PTR(err); | |
ff7d6b27 | 317 | } |
2b0cfa6e LB |
318 | EXPORT_SYMBOL(page_pool_create_percpu); |
319 | ||
320 | /** | |
321 | * page_pool_create() - create a page pool | |
322 | * @params: parameters, see struct page_pool_params | |
323 | */ | |
324 | struct page_pool *page_pool_create(const struct page_pool_params *params) | |
325 | { | |
326 | return page_pool_create_percpu(params, -1); | |
327 | } | |
ff7d6b27 JDB |
328 | EXPORT_SYMBOL(page_pool_create); |
329 | ||
458de8a9 | 330 | static void page_pool_return_page(struct page_pool *pool, struct page *page); |
44768dec JDB |
331 | |
332 | noinline | |
304db6cb | 333 | static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) |
44768dec JDB |
334 | { |
335 | struct ptr_ring *r = &pool->ring; | |
336 | struct page *page; | |
337 | int pref_nid; /* preferred NUMA node */ | |
338 | ||
339 | /* Quicker fallback, avoid locks when ring is empty */ | |
8610037e JD |
340 | if (__ptr_ring_empty(r)) { |
341 | alloc_stat_inc(pool, empty); | |
44768dec | 342 | return NULL; |
8610037e | 343 | } |
44768dec JDB |
344 | |
345 | /* Softirq guarantee CPU and thus NUMA node is stable. This, | |
346 | * assumes CPU refilling driver RX-ring will also run RX-NAPI. | |
347 | */ | |
f13fc107 | 348 | #ifdef CONFIG_NUMA |
44768dec | 349 | pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; |
f13fc107 JDB |
350 | #else |
351 | /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ | |
352 | pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ | |
353 | #endif | |
44768dec | 354 | |
44768dec JDB |
355 | /* Refill alloc array, but only if NUMA match */ |
356 | do { | |
357 | page = __ptr_ring_consume(r); | |
358 | if (unlikely(!page)) | |
359 | break; | |
360 | ||
361 | if (likely(page_to_nid(page) == pref_nid)) { | |
362 | pool->alloc.cache[pool->alloc.count++] = page; | |
363 | } else { | |
364 | /* NUMA mismatch; | |
365 | * (1) release 1 page to page-allocator and | |
366 | * (2) break out to fallthrough to alloc_pages_node. | |
367 | * This limit stress on page buddy alloactor. | |
368 | */ | |
458de8a9 | 369 | page_pool_return_page(pool, page); |
8610037e | 370 | alloc_stat_inc(pool, waive); |
44768dec JDB |
371 | page = NULL; |
372 | break; | |
373 | } | |
304db6cb | 374 | } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); |
44768dec JDB |
375 | |
376 | /* Return last page */ | |
8610037e | 377 | if (likely(pool->alloc.count > 0)) { |
44768dec | 378 | page = pool->alloc.cache[--pool->alloc.count]; |
8610037e JD |
379 | alloc_stat_inc(pool, refill); |
380 | } | |
44768dec | 381 | |
44768dec JDB |
382 | return page; |
383 | } | |
384 | ||
ff7d6b27 JDB |
385 | /* fast path */ |
386 | static struct page *__page_pool_get_cached(struct page_pool *pool) | |
387 | { | |
ff7d6b27 JDB |
388 | struct page *page; |
389 | ||
304db6cb LR |
390 | /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ |
391 | if (likely(pool->alloc.count)) { | |
392 | /* Fast-path */ | |
393 | page = pool->alloc.cache[--pool->alloc.count]; | |
8610037e | 394 | alloc_stat_inc(pool, fast); |
304db6cb LR |
395 | } else { |
396 | page = page_pool_refill_alloc_cache(pool); | |
ff7d6b27 JDB |
397 | } |
398 | ||
ff7d6b27 JDB |
399 | return page; |
400 | } | |
401 | ||
4321de44 | 402 | static void __page_pool_dma_sync_for_device(const struct page_pool *pool, |
daa12112 | 403 | const struct page *page, |
4321de44 | 404 | u32 dma_sync_size) |
e68bc756 | 405 | { |
4321de44 | 406 | #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) |
9ddb3c14 MWO |
407 | dma_addr_t dma_addr = page_pool_get_dma_addr(page); |
408 | ||
e68bc756 | 409 | dma_sync_size = min(dma_sync_size, pool->p.max_len); |
4321de44 AL |
410 | __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, |
411 | dma_sync_size, pool->p.dma_dir); | |
412 | #endif | |
413 | } | |
414 | ||
415 | static __always_inline void | |
416 | page_pool_dma_sync_for_device(const struct page_pool *pool, | |
daa12112 | 417 | const struct page *page, |
4321de44 AL |
418 | u32 dma_sync_size) |
419 | { | |
420 | if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) | |
421 | __page_pool_dma_sync_for_device(pool, page, dma_sync_size); | |
e68bc756 LB |
422 | } |
423 | ||
dfa59717 JDB |
424 | static bool page_pool_dma_map(struct page_pool *pool, struct page *page) |
425 | { | |
426 | dma_addr_t dma; | |
427 | ||
428 | /* Setup DMA mapping: use 'struct page' area for storing DMA-addr | |
429 | * since dma_addr_t can be either 32 or 64 bits and does not always fit | |
430 | * into page private data (i.e 32bit cpu with 64bit DMA caps) | |
431 | * This mapping is kept for lifetime of page, until leaving pool. | |
432 | */ | |
433 | dma = dma_map_page_attrs(pool->p.dev, page, 0, | |
434 | (PAGE_SIZE << pool->p.order), | |
8e4c62c7 JK |
435 | pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | |
436 | DMA_ATTR_WEAK_ORDERING); | |
dfa59717 JDB |
437 | if (dma_mapping_error(pool->p.dev, dma)) |
438 | return false; | |
439 | ||
90de47f0 YL |
440 | if (page_pool_set_dma_addr(page, dma)) |
441 | goto unmap_failed; | |
dfa59717 | 442 | |
4321de44 | 443 | page_pool_dma_sync_for_device(pool, page, pool->p.max_len); |
dfa59717 JDB |
444 | |
445 | return true; | |
90de47f0 YL |
446 | |
447 | unmap_failed: | |
448 | WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); | |
449 | dma_unmap_page_attrs(pool->p.dev, dma, | |
450 | PAGE_SIZE << pool->p.order, pool->p.dma_dir, | |
451 | DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); | |
452 | return false; | |
dfa59717 JDB |
453 | } |
454 | ||
57f05bc2 YL |
455 | static void page_pool_set_pp_info(struct page_pool *pool, |
456 | struct page *page) | |
457 | { | |
458 | page->pp = pool; | |
459 | page->pp_magic |= PP_SIGNATURE; | |
58d53d8f YL |
460 | |
461 | /* Ensuring all pages have been split into one fragment initially: | |
462 | * page_pool_set_pp_info() is only called once for every page when it | |
463 | * is allocated from the page allocator and page_pool_fragment_page() | |
464 | * is dirtying the same cache line as the page->pp_magic above, so | |
465 | * the overhead is negligible. | |
466 | */ | |
467 | page_pool_fragment_page(page, 1); | |
2da0cac1 | 468 | if (pool->has_init_callback) |
5027ec19 | 469 | pool->slow.init_callback(page, pool->slow.init_arg); |
57f05bc2 YL |
470 | } |
471 | ||
472 | static void page_pool_clear_pp_info(struct page *page) | |
473 | { | |
474 | page->pp_magic = 0; | |
475 | page->pp = NULL; | |
476 | } | |
477 | ||
be5dba25 JDB |
478 | static struct page *__page_pool_alloc_page_order(struct page_pool *pool, |
479 | gfp_t gfp) | |
ff7d6b27 JDB |
480 | { |
481 | struct page *page; | |
ff7d6b27 | 482 | |
be5dba25 | 483 | gfp |= __GFP_COMP; |
ff7d6b27 | 484 | page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); |
be5dba25 | 485 | if (unlikely(!page)) |
ff7d6b27 JDB |
486 | return NULL; |
487 | ||
403f11ac | 488 | if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { |
ff7d6b27 JDB |
489 | put_page(page); |
490 | return NULL; | |
491 | } | |
ff7d6b27 | 492 | |
8610037e | 493 | alloc_stat_inc(pool, slow_high_order); |
57f05bc2 | 494 | page_pool_set_pp_info(pool, page); |
c07aea3e | 495 | |
99c07c43 JDB |
496 | /* Track how many pages are held 'in-flight' */ |
497 | pool->pages_state_hold_cnt++; | |
32c28f7e | 498 | trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); |
be5dba25 JDB |
499 | return page; |
500 | } | |
501 | ||
502 | /* slow path */ | |
503 | noinline | |
504 | static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, | |
505 | gfp_t gfp) | |
506 | { | |
507 | const int bulk = PP_ALLOC_CACHE_REFILL; | |
be5dba25 | 508 | unsigned int pp_order = pool->p.order; |
403f11ac | 509 | bool dma_map = pool->dma_map; |
be5dba25 JDB |
510 | struct page *page; |
511 | int i, nr_pages; | |
512 | ||
513 | /* Don't support bulk alloc for high-order pages */ | |
514 | if (unlikely(pp_order)) | |
515 | return __page_pool_alloc_page_order(pool, gfp); | |
516 | ||
517 | /* Unnecessary as alloc cache is empty, but guarantees zero count */ | |
518 | if (unlikely(pool->alloc.count > 0)) | |
519 | return pool->alloc.cache[--pool->alloc.count]; | |
520 | ||
521 | /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ | |
522 | memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); | |
523 | ||
d810d367 JW |
524 | nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, |
525 | pool->alloc.cache); | |
be5dba25 JDB |
526 | if (unlikely(!nr_pages)) |
527 | return NULL; | |
528 | ||
529 | /* Pages have been filled into alloc.cache array, but count is zero and | |
530 | * page element have not been (possibly) DMA mapped. | |
531 | */ | |
532 | for (i = 0; i < nr_pages; i++) { | |
533 | page = pool->alloc.cache[i]; | |
403f11ac | 534 | if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { |
be5dba25 JDB |
535 | put_page(page); |
536 | continue; | |
537 | } | |
57f05bc2 YL |
538 | |
539 | page_pool_set_pp_info(pool, page); | |
be5dba25 JDB |
540 | pool->alloc.cache[pool->alloc.count++] = page; |
541 | /* Track how many pages are held 'in-flight' */ | |
542 | pool->pages_state_hold_cnt++; | |
543 | trace_page_pool_state_hold(pool, page, | |
544 | pool->pages_state_hold_cnt); | |
545 | } | |
546 | ||
547 | /* Return last page */ | |
8610037e | 548 | if (likely(pool->alloc.count > 0)) { |
be5dba25 | 549 | page = pool->alloc.cache[--pool->alloc.count]; |
8610037e JD |
550 | alloc_stat_inc(pool, slow); |
551 | } else { | |
be5dba25 | 552 | page = NULL; |
8610037e | 553 | } |
32c28f7e | 554 | |
ff7d6b27 JDB |
555 | /* When page just alloc'ed is should/must have refcnt 1. */ |
556 | return page; | |
557 | } | |
558 | ||
559 | /* For using page_pool replace: alloc_pages() API calls, but provide | |
560 | * synchronization guarantee for allocation side. | |
561 | */ | |
562 | struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) | |
563 | { | |
564 | struct page *page; | |
565 | ||
566 | /* Fast-path: Get a page from cache */ | |
567 | page = __page_pool_get_cached(pool); | |
568 | if (page) | |
569 | return page; | |
570 | ||
571 | /* Slow-path: cache empty, do real allocation */ | |
572 | page = __page_pool_alloc_pages_slow(pool, gfp); | |
573 | return page; | |
574 | } | |
575 | EXPORT_SYMBOL(page_pool_alloc_pages); | |
12b6c3a0 | 576 | ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); |
ff7d6b27 | 577 | |
99c07c43 JDB |
578 | /* Calculate distance between two u32 values, valid if distance is below 2^(31) |
579 | * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution | |
580 | */ | |
581 | #define _distance(a, b) (s32)((a) - (b)) | |
582 | ||
7aee8429 | 583 | s32 page_pool_inflight(const struct page_pool *pool, bool strict) |
99c07c43 JDB |
584 | { |
585 | u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); | |
586 | u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); | |
c3f812ce | 587 | s32 inflight; |
99c07c43 | 588 | |
c3f812ce | 589 | inflight = _distance(hold_cnt, release_cnt); |
99c07c43 | 590 | |
7aee8429 JK |
591 | if (strict) { |
592 | trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); | |
593 | WARN(inflight < 0, "Negative(%d) inflight packet-pages", | |
594 | inflight); | |
595 | } else { | |
596 | inflight = max(0, inflight); | |
597 | } | |
99c07c43 | 598 | |
c3f812ce | 599 | return inflight; |
99c07c43 JDB |
600 | } |
601 | ||
c3f687d8 JK |
602 | static __always_inline |
603 | void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) | |
ff7d6b27 | 604 | { |
1567b85e IA |
605 | dma_addr_t dma; |
606 | ||
403f11ac | 607 | if (!pool->dma_map) |
458de8a9 IA |
608 | /* Always account for inflight pages, even if we didn't |
609 | * map them | |
610 | */ | |
c3f687d8 | 611 | return; |
ff7d6b27 | 612 | |
9ddb3c14 | 613 | dma = page_pool_get_dma_addr(page); |
458de8a9 | 614 | |
9ddb3c14 | 615 | /* When page is unmapped, it cannot be returned to our pool */ |
13f16d9d JDB |
616 | dma_unmap_page_attrs(pool->p.dev, dma, |
617 | PAGE_SIZE << pool->p.order, pool->p.dma_dir, | |
8e4c62c7 | 618 | DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); |
9ddb3c14 | 619 | page_pool_set_dma_addr(page, 0); |
c3f687d8 JK |
620 | } |
621 | ||
622 | /* Disconnects a page (from a page_pool). API users can have a need | |
623 | * to disconnect a page (from a page_pool), to allow it to be used as | |
624 | * a regular page (that will eventually be returned to the normal | |
625 | * page-allocator via put_page). | |
626 | */ | |
627 | void page_pool_return_page(struct page_pool *pool, struct page *page) | |
628 | { | |
629 | int count; | |
630 | ||
631 | __page_pool_release_page_dma(pool, page); | |
632 | ||
57f05bc2 | 633 | page_pool_clear_pp_info(page); |
c07aea3e | 634 | |
c3f812ce JL |
635 | /* This may be the last page returned, releasing the pool, so |
636 | * it is not safe to reference pool afterwards. | |
637 | */ | |
7fb9b66d | 638 | count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); |
c3f812ce | 639 | trace_page_pool_state_release(pool, page, count); |
99c07c43 | 640 | |
ff7d6b27 JDB |
641 | put_page(page); |
642 | /* An optimization would be to call __free_pages(page, pool->p.order) | |
643 | * knowing page is not part of page-cache (thus avoiding a | |
644 | * __page_cache_release() call). | |
645 | */ | |
646 | } | |
647 | ||
458de8a9 | 648 | static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) |
ff7d6b27 JDB |
649 | { |
650 | int ret; | |
542bcea4 QD |
651 | /* BH protection not needed if current is softirq */ |
652 | if (in_softirq()) | |
ff7d6b27 JDB |
653 | ret = ptr_ring_produce(&pool->ring, page); |
654 | else | |
655 | ret = ptr_ring_produce_bh(&pool->ring, page); | |
656 | ||
ad6fa1e1 JD |
657 | if (!ret) { |
658 | recycle_stat_inc(pool, ring); | |
659 | return true; | |
660 | } | |
661 | ||
662 | return false; | |
ff7d6b27 JDB |
663 | } |
664 | ||
665 | /* Only allow direct recycling in special circumstances, into the | |
666 | * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. | |
667 | * | |
668 | * Caller must provide appropriate safe context. | |
669 | */ | |
458de8a9 | 670 | static bool page_pool_recycle_in_cache(struct page *page, |
ff7d6b27 JDB |
671 | struct page_pool *pool) |
672 | { | |
ad6fa1e1 JD |
673 | if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { |
674 | recycle_stat_inc(pool, cache_full); | |
ff7d6b27 | 675 | return false; |
ad6fa1e1 | 676 | } |
ff7d6b27 JDB |
677 | |
678 | /* Caller MUST have verified/know (page_ref_count(page) == 1) */ | |
679 | pool->alloc.cache[pool->alloc.count++] = page; | |
ad6fa1e1 | 680 | recycle_stat_inc(pool, cached); |
ff7d6b27 JDB |
681 | return true; |
682 | } | |
683 | ||
46f40172 MA |
684 | static bool __page_pool_page_can_be_recycled(const struct page *page) |
685 | { | |
686 | return page_ref_count(page) == 1 && !page_is_pfmemalloc(page); | |
687 | } | |
688 | ||
458de8a9 | 689 | /* If the page refcnt == 1, this will try to recycle the page. |
403f11ac | 690 | * If pool->dma_sync is set, we'll try to sync the DMA area for |
458de8a9 IA |
691 | * the configured size min(dma_sync_size, pool->max_len). |
692 | * If the page refcnt != 1, then the page will be returned to memory | |
693 | * subsystem. | |
694 | */ | |
78862447 LB |
695 | static __always_inline struct page * |
696 | __page_pool_put_page(struct page_pool *pool, struct page *page, | |
697 | unsigned int dma_sync_size, bool allow_direct) | |
ff7d6b27 | 698 | { |
ff4e538c JK |
699 | lockdep_assert_no_hardirq(); |
700 | ||
ff7d6b27 JDB |
701 | /* This allocator is optimized for the XDP mode that uses |
702 | * one-frame-per-page, but have fallbacks that act like the | |
703 | * regular page allocator APIs. | |
704 | * | |
705 | * refcnt == 1 means page_pool owns page, and can recycle it. | |
05656132 AL |
706 | * |
707 | * page is NOT reusable when allocated when system is under | |
708 | * some pressure. (page_is_pfmemalloc) | |
ff7d6b27 | 709 | */ |
46f40172 | 710 | if (likely(__page_pool_page_can_be_recycled(page))) { |
ff7d6b27 JDB |
711 | /* Read barrier done in page_ref_count / READ_ONCE */ |
712 | ||
4321de44 | 713 | page_pool_dma_sync_for_device(pool, page, dma_sync_size); |
e68bc756 | 714 | |
4a96a4e8 | 715 | if (allow_direct && page_pool_recycle_in_cache(page, pool)) |
78862447 | 716 | return NULL; |
ff7d6b27 | 717 | |
78862447 LB |
718 | /* Page found as candidate for recycling */ |
719 | return page; | |
ff7d6b27 JDB |
720 | } |
721 | /* Fallback/non-XDP mode: API user have elevated refcnt. | |
722 | * | |
723 | * Many drivers split up the page into fragments, and some | |
724 | * want to keep doing this to save memory and do refcnt based | |
725 | * recycling. Support this use case too, to ease drivers | |
726 | * switching between XDP/non-XDP. | |
727 | * | |
728 | * In-case page_pool maintains the DMA mapping, API user must | |
729 | * call page_pool_put_page once. In this elevated refcnt | |
730 | * case, the DMA is unmapped/released, as driver is likely | |
731 | * doing refcnt based recycle tricks, meaning another process | |
732 | * will be invoking put_page. | |
733 | */ | |
ad6fa1e1 | 734 | recycle_stat_inc(pool, released_refcnt); |
07e0c7d3 | 735 | page_pool_return_page(pool, page); |
78862447 LB |
736 | |
737 | return NULL; | |
738 | } | |
739 | ||
4a96a4e8 AL |
740 | static bool page_pool_napi_local(const struct page_pool *pool) |
741 | { | |
742 | const struct napi_struct *napi; | |
743 | u32 cpuid; | |
744 | ||
745 | if (unlikely(!in_softirq())) | |
746 | return false; | |
747 | ||
748 | /* Allow direct recycle if we have reasons to believe that we are | |
749 | * in the same context as the consumer would run, so there's | |
750 | * no possible race. | |
751 | * __page_pool_put_page() makes sure we're not in hardirq context | |
752 | * and interrupts are enabled prior to accessing the cache. | |
753 | */ | |
754 | cpuid = smp_processor_id(); | |
755 | if (READ_ONCE(pool->cpuid) == cpuid) | |
756 | return true; | |
757 | ||
758 | napi = READ_ONCE(pool->p.napi); | |
759 | ||
760 | return napi && READ_ONCE(napi->list_owner) == cpuid; | |
761 | } | |
762 | ||
0a149ab7 LC |
763 | void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, |
764 | unsigned int dma_sync_size, bool allow_direct) | |
78862447 | 765 | { |
4a96a4e8 AL |
766 | if (!allow_direct) |
767 | allow_direct = page_pool_napi_local(pool); | |
768 | ||
78862447 LB |
769 | page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); |
770 | if (page && !page_pool_recycle_in_ring(pool, page)) { | |
771 | /* Cache full, fallback to free pages */ | |
ad6fa1e1 | 772 | recycle_stat_inc(pool, ring_full); |
78862447 LB |
773 | page_pool_return_page(pool, page); |
774 | } | |
ff7d6b27 | 775 | } |
0a149ab7 | 776 | EXPORT_SYMBOL(page_pool_put_unrefed_page); |
ff7d6b27 | 777 | |
82e896d9 JK |
778 | /** |
779 | * page_pool_put_page_bulk() - release references on multiple pages | |
780 | * @pool: pool from which pages were allocated | |
781 | * @data: array holding page pointers | |
782 | * @count: number of pages in @data | |
783 | * | |
784 | * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring | |
785 | * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() | |
786 | * will release leftover pages to the page allocator. | |
787 | * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx | |
788 | * completion loop for the XDP_REDIRECT use case. | |
789 | * | |
790 | * Please note the caller must not use data area after running | |
791 | * page_pool_put_page_bulk(), as this function overwrites it. | |
792 | */ | |
78862447 LB |
793 | void page_pool_put_page_bulk(struct page_pool *pool, void **data, |
794 | int count) | |
795 | { | |
796 | int i, bulk_len = 0; | |
39806b96 | 797 | bool allow_direct; |
368d3cb4 | 798 | bool in_softirq; |
78862447 | 799 | |
39806b96 AL |
800 | allow_direct = page_pool_napi_local(pool); |
801 | ||
78862447 LB |
802 | for (i = 0; i < count; i++) { |
803 | struct page *page = virt_to_head_page(data[i]); | |
804 | ||
52cc6ffc | 805 | /* It is not the last user for the page frag case */ |
0a149ab7 | 806 | if (!page_pool_is_last_ref(page)) |
52cc6ffc AD |
807 | continue; |
808 | ||
39806b96 | 809 | page = __page_pool_put_page(pool, page, -1, allow_direct); |
78862447 LB |
810 | /* Approved for bulk recycling in ptr_ring cache */ |
811 | if (page) | |
812 | data[bulk_len++] = page; | |
813 | } | |
814 | ||
39806b96 | 815 | if (!bulk_len) |
78862447 LB |
816 | return; |
817 | ||
818 | /* Bulk producer into ptr_ring page_pool cache */ | |
368d3cb4 | 819 | in_softirq = page_pool_producer_lock(pool); |
78862447 | 820 | for (i = 0; i < bulk_len; i++) { |
590032a4 LB |
821 | if (__ptr_ring_produce(&pool->ring, data[i])) { |
822 | /* ring full */ | |
823 | recycle_stat_inc(pool, ring_full); | |
824 | break; | |
825 | } | |
78862447 | 826 | } |
590032a4 | 827 | recycle_stat_add(pool, ring, i); |
368d3cb4 | 828 | page_pool_producer_unlock(pool, in_softirq); |
78862447 LB |
829 | |
830 | /* Hopefully all pages was return into ptr_ring */ | |
831 | if (likely(i == bulk_len)) | |
832 | return; | |
833 | ||
834 | /* ptr_ring cache full, free remaining pages outside producer lock | |
835 | * since put_page() with refcnt == 1 can be an expensive operation | |
836 | */ | |
837 | for (; i < bulk_len; i++) | |
838 | page_pool_return_page(pool, data[i]); | |
839 | } | |
840 | EXPORT_SYMBOL(page_pool_put_page_bulk); | |
841 | ||
53e0961d YL |
842 | static struct page *page_pool_drain_frag(struct page_pool *pool, |
843 | struct page *page) | |
844 | { | |
845 | long drain_count = BIAS_MAX - pool->frag_users; | |
846 | ||
847 | /* Some user is still using the page frag */ | |
0a149ab7 | 848 | if (likely(page_pool_unref_page(page, drain_count))) |
53e0961d YL |
849 | return NULL; |
850 | ||
46f40172 | 851 | if (__page_pool_page_can_be_recycled(page)) { |
4321de44 | 852 | page_pool_dma_sync_for_device(pool, page, -1); |
53e0961d YL |
853 | return page; |
854 | } | |
855 | ||
856 | page_pool_return_page(pool, page); | |
857 | return NULL; | |
858 | } | |
859 | ||
860 | static void page_pool_free_frag(struct page_pool *pool) | |
861 | { | |
862 | long drain_count = BIAS_MAX - pool->frag_users; | |
863 | struct page *page = pool->frag_page; | |
864 | ||
865 | pool->frag_page = NULL; | |
866 | ||
0a149ab7 | 867 | if (!page || page_pool_unref_page(page, drain_count)) |
53e0961d YL |
868 | return; |
869 | ||
870 | page_pool_return_page(pool, page); | |
871 | } | |
872 | ||
873 | struct page *page_pool_alloc_frag(struct page_pool *pool, | |
874 | unsigned int *offset, | |
875 | unsigned int size, gfp_t gfp) | |
876 | { | |
877 | unsigned int max_size = PAGE_SIZE << pool->p.order; | |
878 | struct page *page = pool->frag_page; | |
879 | ||
09d96ee5 | 880 | if (WARN_ON(size > max_size)) |
53e0961d YL |
881 | return NULL; |
882 | ||
883 | size = ALIGN(size, dma_get_cache_alignment()); | |
884 | *offset = pool->frag_offset; | |
885 | ||
886 | if (page && *offset + size > max_size) { | |
887 | page = page_pool_drain_frag(pool, page); | |
0f6deac3 JW |
888 | if (page) { |
889 | alloc_stat_inc(pool, fast); | |
53e0961d | 890 | goto frag_reset; |
0f6deac3 | 891 | } |
53e0961d YL |
892 | } |
893 | ||
894 | if (!page) { | |
895 | page = page_pool_alloc_pages(pool, gfp); | |
896 | if (unlikely(!page)) { | |
897 | pool->frag_page = NULL; | |
898 | return NULL; | |
899 | } | |
900 | ||
901 | pool->frag_page = page; | |
902 | ||
903 | frag_reset: | |
904 | pool->frag_users = 1; | |
905 | *offset = 0; | |
906 | pool->frag_offset = size; | |
52cc6ffc | 907 | page_pool_fragment_page(page, BIAS_MAX); |
53e0961d YL |
908 | return page; |
909 | } | |
910 | ||
911 | pool->frag_users++; | |
912 | pool->frag_offset = *offset + size; | |
0f6deac3 | 913 | alloc_stat_inc(pool, fast); |
53e0961d YL |
914 | return page; |
915 | } | |
916 | EXPORT_SYMBOL(page_pool_alloc_frag); | |
917 | ||
458de8a9 | 918 | static void page_pool_empty_ring(struct page_pool *pool) |
ff7d6b27 JDB |
919 | { |
920 | struct page *page; | |
921 | ||
922 | /* Empty recycle ring */ | |
4905bd9a | 923 | while ((page = ptr_ring_consume_bh(&pool->ring))) { |
ff7d6b27 JDB |
924 | /* Verify the refcnt invariant of cached pages */ |
925 | if (!(page_ref_count(page) == 1)) | |
926 | pr_crit("%s() page_pool refcnt %d violation\n", | |
927 | __func__, page_ref_count(page)); | |
928 | ||
458de8a9 | 929 | page_pool_return_page(pool, page); |
ff7d6b27 JDB |
930 | } |
931 | } | |
932 | ||
de97502e | 933 | static void __page_pool_destroy(struct page_pool *pool) |
d956a048 | 934 | { |
c3f812ce JL |
935 | if (pool->disconnect) |
936 | pool->disconnect(pool); | |
e54cfd7e | 937 | |
f17c6964 | 938 | page_pool_unlist(pool); |
23cfaf67 | 939 | page_pool_uninit(pool); |
e54cfd7e JDB |
940 | kfree(pool); |
941 | } | |
e54cfd7e | 942 | |
7c9e6942 | 943 | static void page_pool_empty_alloc_cache_once(struct page_pool *pool) |
ff7d6b27 JDB |
944 | { |
945 | struct page *page; | |
946 | ||
7c9e6942 JDB |
947 | if (pool->destroy_cnt) |
948 | return; | |
949 | ||
ff7d6b27 JDB |
950 | /* Empty alloc cache, assume caller made sure this is |
951 | * no-longer in use, and page_pool_alloc_pages() cannot be | |
952 | * call concurrently. | |
953 | */ | |
954 | while (pool->alloc.count) { | |
955 | page = pool->alloc.cache[--pool->alloc.count]; | |
458de8a9 | 956 | page_pool_return_page(pool, page); |
ff7d6b27 | 957 | } |
7c9e6942 JDB |
958 | } |
959 | ||
960 | static void page_pool_scrub(struct page_pool *pool) | |
961 | { | |
962 | page_pool_empty_alloc_cache_once(pool); | |
963 | pool->destroy_cnt++; | |
ff7d6b27 JDB |
964 | |
965 | /* No more consumers should exist, but producers could still | |
966 | * be in-flight. | |
967 | */ | |
458de8a9 | 968 | page_pool_empty_ring(pool); |
c3f812ce JL |
969 | } |
970 | ||
971 | static int page_pool_release(struct page_pool *pool) | |
972 | { | |
973 | int inflight; | |
974 | ||
975 | page_pool_scrub(pool); | |
7aee8429 | 976 | inflight = page_pool_inflight(pool, true); |
c3f812ce | 977 | if (!inflight) |
de97502e | 978 | __page_pool_destroy(pool); |
c3f812ce JL |
979 | |
980 | return inflight; | |
981 | } | |
982 | ||
983 | static void page_pool_release_retry(struct work_struct *wq) | |
984 | { | |
985 | struct delayed_work *dwq = to_delayed_work(wq); | |
986 | struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); | |
be009667 | 987 | void *netdev; |
c3f812ce JL |
988 | int inflight; |
989 | ||
990 | inflight = page_pool_release(pool); | |
991 | if (!inflight) | |
992 | return; | |
993 | ||
be009667 JK |
994 | /* Periodic warning for page pools the user can't see */ |
995 | netdev = READ_ONCE(pool->slow.netdev); | |
996 | if (time_after_eq(jiffies, pool->defer_warn) && | |
997 | (!netdev || netdev == NET_PTR_POISON)) { | |
c3f812ce JL |
998 | int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; |
999 | ||
be009667 JK |
1000 | pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n", |
1001 | __func__, pool->user.id, inflight, sec); | |
c3f812ce JL |
1002 | pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; |
1003 | } | |
1004 | ||
1005 | /* Still not ready to be disconnected, retry later */ | |
1006 | schedule_delayed_work(&pool->release_dw, DEFER_TIME); | |
1007 | } | |
1008 | ||
64693ec7 | 1009 | void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), |
ef9226cd | 1010 | const struct xdp_mem_info *mem) |
c3f812ce JL |
1011 | { |
1012 | refcount_inc(&pool->user_cnt); | |
1013 | pool->disconnect = disconnect; | |
64693ec7 | 1014 | pool->xdp_mem_id = mem->id; |
c3f812ce JL |
1015 | } |
1016 | ||
56ef27e3 | 1017 | static void page_pool_disable_direct_recycling(struct page_pool *pool) |
dd64b232 | 1018 | { |
56ef27e3 | 1019 | /* Disable direct recycling based on pool->cpuid. |
4a96a4e8 | 1020 | * Paired with READ_ONCE() in page_pool_napi_local(). |
56ef27e3 AL |
1021 | */ |
1022 | WRITE_ONCE(pool->cpuid, -1); | |
1023 | ||
dd64b232 JK |
1024 | if (!pool->p.napi) |
1025 | return; | |
1026 | ||
1027 | /* To avoid races with recycling and additional barriers make sure | |
1028 | * pool and NAPI are unlinked when NAPI is disabled. | |
1029 | */ | |
1030 | WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state) || | |
1031 | READ_ONCE(pool->p.napi->list_owner) != -1); | |
1032 | ||
1033 | WRITE_ONCE(pool->p.napi, NULL); | |
1034 | } | |
dd64b232 | 1035 | |
c3f812ce JL |
1036 | void page_pool_destroy(struct page_pool *pool) |
1037 | { | |
1038 | if (!pool) | |
1039 | return; | |
1040 | ||
1041 | if (!page_pool_put(pool)) | |
1042 | return; | |
1043 | ||
56ef27e3 | 1044 | page_pool_disable_direct_recycling(pool); |
53e0961d YL |
1045 | page_pool_free_frag(pool); |
1046 | ||
c3f812ce JL |
1047 | if (!page_pool_release(pool)) |
1048 | return; | |
1049 | ||
69cb4952 | 1050 | page_pool_detached(pool); |
c3f812ce JL |
1051 | pool->defer_start = jiffies; |
1052 | pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; | |
ff7d6b27 | 1053 | |
c3f812ce JL |
1054 | INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry); |
1055 | schedule_delayed_work(&pool->release_dw, DEFER_TIME); | |
ff7d6b27 | 1056 | } |
c3f812ce | 1057 | EXPORT_SYMBOL(page_pool_destroy); |
bc836748 SM |
1058 | |
1059 | /* Caller must provide appropriate safe context, e.g. NAPI. */ | |
1060 | void page_pool_update_nid(struct page_pool *pool, int new_nid) | |
1061 | { | |
44768dec JDB |
1062 | struct page *page; |
1063 | ||
bc836748 SM |
1064 | trace_page_pool_update_nid(pool, new_nid); |
1065 | pool->p.nid = new_nid; | |
44768dec JDB |
1066 | |
1067 | /* Flush pool alloc cache, as refill will check NUMA node */ | |
1068 | while (pool->alloc.count) { | |
1069 | page = pool->alloc.cache[--pool->alloc.count]; | |
458de8a9 | 1070 | page_pool_return_page(pool, page); |
44768dec | 1071 | } |
bc836748 SM |
1072 | } |
1073 | EXPORT_SYMBOL(page_pool_update_nid); |