Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/ptlrpc/sec_bulk.c | |
37 | * | |
38 | * Author: Eric Mei <ericm@clusterfs.com> | |
39 | */ | |
40 | ||
41 | #define DEBUG_SUBSYSTEM S_SEC | |
42 | ||
9fdaf8c0 | 43 | #include "../../include/linux/libcfs/libcfs.h" |
d7e09d03 PT |
44 | #include <linux/crypto.h> |
45 | ||
e27db149 GKH |
46 | #include "../include/obd.h" |
47 | #include "../include/obd_cksum.h" | |
48 | #include "../include/obd_class.h" | |
49 | #include "../include/obd_support.h" | |
50 | #include "../include/lustre_net.h" | |
51 | #include "../include/lustre_import.h" | |
52 | #include "../include/lustre_dlm.h" | |
53 | #include "../include/lustre_sec.h" | |
d7e09d03 PT |
54 | |
55 | #include "ptlrpc_internal.h" | |
56 | ||
57 | /**************************************** | |
58 | * bulk encryption page pools * | |
59 | ****************************************/ | |
60 | ||
61 | ||
ae18c5c6 AM |
62 | #define POINTERS_PER_PAGE (PAGE_CACHE_SIZE / sizeof(void *)) |
63 | #define PAGES_PER_POOL (POINTERS_PER_PAGE) | |
d7e09d03 | 64 | |
d0bfef31 | 65 | #define IDLE_IDX_MAX (100) |
d7e09d03 PT |
66 | #define IDLE_IDX_WEIGHT (3) |
67 | ||
68 | #define CACHE_QUIESCENT_PERIOD (20) | |
69 | ||
70 | static struct ptlrpc_enc_page_pool { | |
71 | /* | |
72 | * constants | |
73 | */ | |
74 | unsigned long epp_max_pages; /* maximum pages can hold, const */ | |
75 | unsigned int epp_max_pools; /* number of pools, const */ | |
76 | ||
77 | /* | |
78 | * wait queue in case of not enough free pages. | |
79 | */ | |
80 | wait_queue_head_t epp_waitq; /* waiting threads */ | |
81 | unsigned int epp_waitqlen; /* wait queue length */ | |
82 | unsigned long epp_pages_short; /* # of pages wanted of in-q users */ | |
83 | unsigned int epp_growing:1; /* during adding pages */ | |
84 | ||
85 | /* | |
86 | * indicating how idle the pools are, from 0 to MAX_IDLE_IDX | |
87 | * this is counted based on each time when getting pages from | |
88 | * the pools, not based on time. which means in case that system | |
89 | * is idled for a while but the idle_idx might still be low if no | |
90 | * activities happened in the pools. | |
91 | */ | |
92 | unsigned long epp_idle_idx; | |
93 | ||
94 | /* last shrink time due to mem tight */ | |
95 | long epp_last_shrink; | |
96 | long epp_last_access; | |
97 | ||
98 | /* | |
99 | * in-pool pages bookkeeping | |
100 | */ | |
101 | spinlock_t epp_lock; /* protect following fields */ | |
102 | unsigned long epp_total_pages; /* total pages in pools */ | |
103 | unsigned long epp_free_pages; /* current pages available */ | |
104 | ||
105 | /* | |
106 | * statistics | |
107 | */ | |
108 | unsigned long epp_st_max_pages; /* # of pages ever reached */ | |
109 | unsigned int epp_st_grows; /* # of grows */ | |
110 | unsigned int epp_st_grow_fails; /* # of add pages failures */ | |
111 | unsigned int epp_st_shrinks; /* # of shrinks */ | |
112 | unsigned long epp_st_access; /* # of access */ | |
113 | unsigned long epp_st_missings; /* # of cache missing */ | |
114 | unsigned long epp_st_lowfree; /* lowest free pages reached */ | |
115 | unsigned int epp_st_max_wqlen; /* highest waitqueue length */ | |
a649ad1d | 116 | unsigned long epp_st_max_wait; /* in jiffies */ |
d7e09d03 PT |
117 | /* |
118 | * pointers to pools | |
119 | */ | |
120 | struct page ***epp_pools; | |
121 | } page_pools; | |
122 | ||
d7e09d03 PT |
123 | /* |
124 | * /proc/fs/lustre/sptlrpc/encrypt_page_pools | |
125 | */ | |
73bb1da6 | 126 | int sptlrpc_proc_enc_pool_seq_show(struct seq_file *m, void *v) |
d7e09d03 | 127 | { |
d7e09d03 PT |
128 | spin_lock(&page_pools.epp_lock); |
129 | ||
91b3a685 JP |
130 | seq_printf(m, |
131 | "physical pages: %lu\n" | |
132 | "pages per pool: %lu\n" | |
133 | "max pages: %lu\n" | |
134 | "max pools: %u\n" | |
135 | "total pages: %lu\n" | |
136 | "total free: %lu\n" | |
137 | "idle index: %lu/100\n" | |
138 | "last shrink: %lds\n" | |
139 | "last access: %lds\n" | |
140 | "max pages reached: %lu\n" | |
141 | "grows: %u\n" | |
142 | "grows failure: %u\n" | |
143 | "shrinks: %u\n" | |
144 | "cache access: %lu\n" | |
145 | "cache missing: %lu\n" | |
146 | "low free mark: %lu\n" | |
147 | "max waitqueue depth: %u\n" | |
148 | "max wait time: " CFS_TIME_T "/%u\n", | |
149 | totalram_pages, | |
150 | PAGES_PER_POOL, | |
151 | page_pools.epp_max_pages, | |
152 | page_pools.epp_max_pools, | |
153 | page_pools.epp_total_pages, | |
154 | page_pools.epp_free_pages, | |
155 | page_pools.epp_idle_idx, | |
156 | get_seconds() - page_pools.epp_last_shrink, | |
157 | get_seconds() - page_pools.epp_last_access, | |
158 | page_pools.epp_st_max_pages, | |
159 | page_pools.epp_st_grows, | |
160 | page_pools.epp_st_grow_fails, | |
161 | page_pools.epp_st_shrinks, | |
162 | page_pools.epp_st_access, | |
163 | page_pools.epp_st_missings, | |
164 | page_pools.epp_st_lowfree, | |
165 | page_pools.epp_st_max_wqlen, | |
166 | page_pools.epp_st_max_wait, | |
167 | HZ); | |
d7e09d03 PT |
168 | |
169 | spin_unlock(&page_pools.epp_lock); | |
91b3a685 JP |
170 | |
171 | return 0; | |
d7e09d03 PT |
172 | } |
173 | ||
174 | static void enc_pools_release_free_pages(long npages) | |
175 | { | |
d0bfef31 CH |
176 | int p_idx, g_idx; |
177 | int p_idx_max1, p_idx_max2; | |
d7e09d03 PT |
178 | |
179 | LASSERT(npages > 0); | |
180 | LASSERT(npages <= page_pools.epp_free_pages); | |
181 | LASSERT(page_pools.epp_free_pages <= page_pools.epp_total_pages); | |
182 | ||
183 | /* max pool index before the release */ | |
184 | p_idx_max2 = (page_pools.epp_total_pages - 1) / PAGES_PER_POOL; | |
185 | ||
186 | page_pools.epp_free_pages -= npages; | |
187 | page_pools.epp_total_pages -= npages; | |
188 | ||
189 | /* max pool index after the release */ | |
190 | p_idx_max1 = page_pools.epp_total_pages == 0 ? -1 : | |
191 | ((page_pools.epp_total_pages - 1) / PAGES_PER_POOL); | |
192 | ||
193 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
194 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
195 | LASSERT(page_pools.epp_pools[p_idx]); | |
196 | ||
197 | while (npages--) { | |
198 | LASSERT(page_pools.epp_pools[p_idx]); | |
199 | LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); | |
200 | ||
201 | __free_page(page_pools.epp_pools[p_idx][g_idx]); | |
202 | page_pools.epp_pools[p_idx][g_idx] = NULL; | |
203 | ||
204 | if (++g_idx == PAGES_PER_POOL) { | |
205 | p_idx++; | |
206 | g_idx = 0; | |
207 | } | |
9076b09e | 208 | } |
d7e09d03 PT |
209 | |
210 | /* free unused pools */ | |
211 | while (p_idx_max1 < p_idx_max2) { | |
212 | LASSERT(page_pools.epp_pools[p_idx_max2]); | |
9ae10597 | 213 | kfree(page_pools.epp_pools[p_idx_max2]); |
d7e09d03 PT |
214 | page_pools.epp_pools[p_idx_max2] = NULL; |
215 | p_idx_max2--; | |
216 | } | |
217 | } | |
218 | ||
219 | /* | |
d7e09d03 PT |
220 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. |
221 | */ | |
3bb22ec5 PT |
222 | static unsigned long enc_pools_shrink_count(struct shrinker *s, |
223 | struct shrink_control *sc) | |
d7e09d03 | 224 | { |
3bb22ec5 PT |
225 | /* |
226 | * if no pool access for a long time, we consider it's fully idle. | |
227 | * a little race here is fine. | |
228 | */ | |
7264b8a5 | 229 | if (unlikely(get_seconds() - page_pools.epp_last_access > |
3bb22ec5 | 230 | CACHE_QUIESCENT_PERIOD)) { |
d7e09d03 | 231 | spin_lock(&page_pools.epp_lock); |
3bb22ec5 | 232 | page_pools.epp_idle_idx = IDLE_IDX_MAX; |
d7e09d03 PT |
233 | spin_unlock(&page_pools.epp_lock); |
234 | } | |
235 | ||
3bb22ec5 PT |
236 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); |
237 | return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) * | |
238 | (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX; | |
239 | } | |
240 | ||
241 | /* | |
242 | * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool. | |
243 | */ | |
244 | static unsigned long enc_pools_shrink_scan(struct shrinker *s, | |
245 | struct shrink_control *sc) | |
246 | { | |
247 | spin_lock(&page_pools.epp_lock); | |
248 | sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan, | |
249 | page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES); | |
250 | if (sc->nr_to_scan > 0) { | |
251 | enc_pools_release_free_pages(sc->nr_to_scan); | |
252 | CDEBUG(D_SEC, "released %ld pages, %ld left\n", | |
253 | (long)sc->nr_to_scan, page_pools.epp_free_pages); | |
254 | ||
255 | page_pools.epp_st_shrinks++; | |
7264b8a5 | 256 | page_pools.epp_last_shrink = get_seconds(); |
3bb22ec5 PT |
257 | } |
258 | spin_unlock(&page_pools.epp_lock); | |
259 | ||
d7e09d03 PT |
260 | /* |
261 | * if no pool access for a long time, we consider it's fully idle. | |
262 | * a little race here is fine. | |
263 | */ | |
7264b8a5 | 264 | if (unlikely(get_seconds() - page_pools.epp_last_access > |
d7e09d03 PT |
265 | CACHE_QUIESCENT_PERIOD)) { |
266 | spin_lock(&page_pools.epp_lock); | |
267 | page_pools.epp_idle_idx = IDLE_IDX_MAX; | |
268 | spin_unlock(&page_pools.epp_lock); | |
269 | } | |
270 | ||
271 | LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX); | |
3bb22ec5 | 272 | return sc->nr_to_scan; |
d7e09d03 PT |
273 | } |
274 | ||
275 | static inline | |
276 | int npages_to_npools(unsigned long npages) | |
277 | { | |
278 | return (int) ((npages + PAGES_PER_POOL - 1) / PAGES_PER_POOL); | |
279 | } | |
280 | ||
281 | /* | |
282 | * return how many pages cleaned up. | |
283 | */ | |
284 | static unsigned long enc_pools_cleanup(struct page ***pools, int npools) | |
285 | { | |
286 | unsigned long cleaned = 0; | |
d0bfef31 | 287 | int i, j; |
d7e09d03 PT |
288 | |
289 | for (i = 0; i < npools; i++) { | |
290 | if (pools[i]) { | |
291 | for (j = 0; j < PAGES_PER_POOL; j++) { | |
292 | if (pools[i][j]) { | |
293 | __free_page(pools[i][j]); | |
294 | cleaned++; | |
295 | } | |
296 | } | |
9ae10597 | 297 | kfree(pools[i]); |
d7e09d03 PT |
298 | pools[i] = NULL; |
299 | } | |
300 | } | |
301 | ||
302 | return cleaned; | |
303 | } | |
304 | ||
305 | /* | |
306 | * merge @npools pointed by @pools which contains @npages new pages | |
307 | * into current pools. | |
308 | * | |
309 | * we have options to avoid most memory copy with some tricks. but we choose | |
310 | * the simplest way to avoid complexity. It's not frequently called. | |
311 | */ | |
312 | static void enc_pools_insert(struct page ***pools, int npools, int npages) | |
313 | { | |
d0bfef31 CH |
314 | int freeslot; |
315 | int op_idx, np_idx, og_idx, ng_idx; | |
316 | int cur_npools, end_npools; | |
d7e09d03 PT |
317 | |
318 | LASSERT(npages > 0); | |
319 | LASSERT(page_pools.epp_total_pages+npages <= page_pools.epp_max_pages); | |
320 | LASSERT(npages_to_npools(npages) == npools); | |
321 | LASSERT(page_pools.epp_growing); | |
322 | ||
323 | spin_lock(&page_pools.epp_lock); | |
324 | ||
325 | /* | |
326 | * (1) fill all the free slots of current pools. | |
327 | */ | |
328 | /* free slots are those left by rent pages, and the extra ones with | |
329 | * index >= total_pages, locate at the tail of last pool. */ | |
330 | freeslot = page_pools.epp_total_pages % PAGES_PER_POOL; | |
331 | if (freeslot != 0) | |
332 | freeslot = PAGES_PER_POOL - freeslot; | |
333 | freeslot += page_pools.epp_total_pages - page_pools.epp_free_pages; | |
334 | ||
335 | op_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
336 | og_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
337 | np_idx = npools - 1; | |
338 | ng_idx = (npages - 1) % PAGES_PER_POOL; | |
339 | ||
340 | while (freeslot) { | |
341 | LASSERT(page_pools.epp_pools[op_idx][og_idx] == NULL); | |
342 | LASSERT(pools[np_idx][ng_idx] != NULL); | |
343 | ||
344 | page_pools.epp_pools[op_idx][og_idx] = pools[np_idx][ng_idx]; | |
345 | pools[np_idx][ng_idx] = NULL; | |
346 | ||
347 | freeslot--; | |
348 | ||
349 | if (++og_idx == PAGES_PER_POOL) { | |
350 | op_idx++; | |
351 | og_idx = 0; | |
352 | } | |
353 | if (--ng_idx < 0) { | |
354 | if (np_idx == 0) | |
355 | break; | |
356 | np_idx--; | |
357 | ng_idx = PAGES_PER_POOL - 1; | |
358 | } | |
359 | } | |
360 | ||
361 | /* | |
362 | * (2) add pools if needed. | |
363 | */ | |
364 | cur_npools = (page_pools.epp_total_pages + PAGES_PER_POOL - 1) / | |
365 | PAGES_PER_POOL; | |
9f0d9bfa GV |
366 | end_npools = (page_pools.epp_total_pages + npages + PAGES_PER_POOL - 1) |
367 | / PAGES_PER_POOL; | |
d7e09d03 PT |
368 | LASSERT(end_npools <= page_pools.epp_max_pools); |
369 | ||
370 | np_idx = 0; | |
371 | while (cur_npools < end_npools) { | |
372 | LASSERT(page_pools.epp_pools[cur_npools] == NULL); | |
373 | LASSERT(np_idx < npools); | |
374 | LASSERT(pools[np_idx] != NULL); | |
375 | ||
376 | page_pools.epp_pools[cur_npools++] = pools[np_idx]; | |
377 | pools[np_idx++] = NULL; | |
378 | } | |
379 | ||
380 | page_pools.epp_total_pages += npages; | |
381 | page_pools.epp_free_pages += npages; | |
382 | page_pools.epp_st_lowfree = page_pools.epp_free_pages; | |
383 | ||
384 | if (page_pools.epp_total_pages > page_pools.epp_st_max_pages) | |
385 | page_pools.epp_st_max_pages = page_pools.epp_total_pages; | |
386 | ||
387 | CDEBUG(D_SEC, "add %d pages to total %lu\n", npages, | |
388 | page_pools.epp_total_pages); | |
389 | ||
390 | spin_unlock(&page_pools.epp_lock); | |
391 | } | |
392 | ||
393 | static int enc_pools_add_pages(int npages) | |
394 | { | |
395 | static DEFINE_MUTEX(add_pages_mutex); | |
d0bfef31 CH |
396 | struct page ***pools; |
397 | int npools, alloced = 0; | |
398 | int i, j, rc = -ENOMEM; | |
d7e09d03 PT |
399 | |
400 | if (npages < PTLRPC_MAX_BRW_PAGES) | |
401 | npages = PTLRPC_MAX_BRW_PAGES; | |
402 | ||
403 | mutex_lock(&add_pages_mutex); | |
404 | ||
405 | if (npages + page_pools.epp_total_pages > page_pools.epp_max_pages) | |
406 | npages = page_pools.epp_max_pages - page_pools.epp_total_pages; | |
407 | LASSERT(npages > 0); | |
408 | ||
409 | page_pools.epp_st_grows++; | |
410 | ||
411 | npools = npages_to_npools(npages); | |
9ae10597 | 412 | pools = kcalloc(npools, sizeof(*pools), GFP_NOFS); |
d7e09d03 PT |
413 | if (pools == NULL) |
414 | goto out; | |
415 | ||
416 | for (i = 0; i < npools; i++) { | |
9ae10597 | 417 | pools[i] = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
597851ac | 418 | if (!pools[i]) |
d7e09d03 PT |
419 | goto out_pools; |
420 | ||
421 | for (j = 0; j < PAGES_PER_POOL && alloced < npages; j++) { | |
0be19afa | 422 | pools[i][j] = alloc_page(GFP_NOFS | |
d7e09d03 PT |
423 | __GFP_HIGHMEM); |
424 | if (pools[i][j] == NULL) | |
425 | goto out_pools; | |
426 | ||
427 | alloced++; | |
428 | } | |
429 | } | |
430 | LASSERT(alloced == npages); | |
431 | ||
432 | enc_pools_insert(pools, npools, npages); | |
433 | CDEBUG(D_SEC, "added %d pages into pools\n", npages); | |
434 | rc = 0; | |
435 | ||
436 | out_pools: | |
437 | enc_pools_cleanup(pools, npools); | |
9ae10597 | 438 | kfree(pools); |
d7e09d03 PT |
439 | out: |
440 | if (rc) { | |
441 | page_pools.epp_st_grow_fails++; | |
442 | CERROR("Failed to allocate %d enc pages\n", npages); | |
443 | } | |
444 | ||
445 | mutex_unlock(&add_pages_mutex); | |
446 | return rc; | |
447 | } | |
448 | ||
449 | static inline void enc_pools_wakeup(void) | |
450 | { | |
5e42bc9d | 451 | assert_spin_locked(&page_pools.epp_lock); |
d7e09d03 PT |
452 | LASSERT(page_pools.epp_waitqlen >= 0); |
453 | ||
454 | if (unlikely(page_pools.epp_waitqlen)) { | |
455 | LASSERT(waitqueue_active(&page_pools.epp_waitq)); | |
456 | wake_up_all(&page_pools.epp_waitq); | |
457 | } | |
458 | } | |
459 | ||
460 | static int enc_pools_should_grow(int page_needed, long now) | |
461 | { | |
462 | /* don't grow if someone else is growing the pools right now, | |
463 | * or the pools has reached its full capacity | |
464 | */ | |
465 | if (page_pools.epp_growing || | |
466 | page_pools.epp_total_pages == page_pools.epp_max_pages) | |
467 | return 0; | |
468 | ||
469 | /* if total pages is not enough, we need to grow */ | |
470 | if (page_pools.epp_total_pages < page_needed) | |
471 | return 1; | |
472 | ||
473 | /* | |
474 | * we wanted to return 0 here if there was a shrink just happened | |
475 | * moment ago, but this may cause deadlock if both client and ost | |
476 | * live on single node. | |
477 | */ | |
478 | #if 0 | |
479 | if (now - page_pools.epp_last_shrink < 2) | |
480 | return 0; | |
481 | #endif | |
482 | ||
483 | /* | |
484 | * here we perhaps need consider other factors like wait queue | |
485 | * length, idle index, etc. ? | |
486 | */ | |
487 | ||
488 | /* grow the pools in any other cases */ | |
489 | return 1; | |
490 | } | |
491 | ||
492 | /* | |
493 | * we allocate the requested pages atomically. | |
494 | */ | |
495 | int sptlrpc_enc_pool_get_pages(struct ptlrpc_bulk_desc *desc) | |
496 | { | |
d0bfef31 CH |
497 | wait_queue_t waitlink; |
498 | unsigned long this_idle = -1; | |
499 | unsigned long tick = 0; | |
500 | long now; | |
501 | int p_idx, g_idx; | |
502 | int i; | |
d7e09d03 PT |
503 | |
504 | LASSERT(desc->bd_iov_count > 0); | |
505 | LASSERT(desc->bd_iov_count <= page_pools.epp_max_pages); | |
506 | ||
507 | /* resent bulk, enc iov might have been allocated previously */ | |
508 | if (desc->bd_enc_iov != NULL) | |
509 | return 0; | |
510 | ||
9ae10597 JL |
511 | desc->bd_enc_iov = kcalloc(desc->bd_iov_count, |
512 | sizeof(*desc->bd_enc_iov), GFP_NOFS); | |
d7e09d03 PT |
513 | if (desc->bd_enc_iov == NULL) |
514 | return -ENOMEM; | |
515 | ||
516 | spin_lock(&page_pools.epp_lock); | |
517 | ||
518 | page_pools.epp_st_access++; | |
519 | again: | |
520 | if (unlikely(page_pools.epp_free_pages < desc->bd_iov_count)) { | |
521 | if (tick == 0) | |
522 | tick = cfs_time_current(); | |
523 | ||
7264b8a5 | 524 | now = get_seconds(); |
d7e09d03 PT |
525 | |
526 | page_pools.epp_st_missings++; | |
527 | page_pools.epp_pages_short += desc->bd_iov_count; | |
528 | ||
529 | if (enc_pools_should_grow(desc->bd_iov_count, now)) { | |
530 | page_pools.epp_growing = 1; | |
531 | ||
532 | spin_unlock(&page_pools.epp_lock); | |
533 | enc_pools_add_pages(page_pools.epp_pages_short / 2); | |
534 | spin_lock(&page_pools.epp_lock); | |
535 | ||
536 | page_pools.epp_growing = 0; | |
537 | ||
538 | enc_pools_wakeup(); | |
539 | } else { | |
540 | if (++page_pools.epp_waitqlen > | |
541 | page_pools.epp_st_max_wqlen) | |
542 | page_pools.epp_st_max_wqlen = | |
543 | page_pools.epp_waitqlen; | |
544 | ||
545 | set_current_state(TASK_UNINTERRUPTIBLE); | |
9e795d35 | 546 | init_waitqueue_entry(&waitlink, current); |
d7e09d03 PT |
547 | add_wait_queue(&page_pools.epp_waitq, &waitlink); |
548 | ||
549 | spin_unlock(&page_pools.epp_lock); | |
b3669a7f | 550 | schedule(); |
d7e09d03 PT |
551 | remove_wait_queue(&page_pools.epp_waitq, &waitlink); |
552 | LASSERT(page_pools.epp_waitqlen > 0); | |
553 | spin_lock(&page_pools.epp_lock); | |
554 | page_pools.epp_waitqlen--; | |
555 | } | |
556 | ||
557 | LASSERT(page_pools.epp_pages_short >= desc->bd_iov_count); | |
558 | page_pools.epp_pages_short -= desc->bd_iov_count; | |
559 | ||
560 | this_idle = 0; | |
561 | goto again; | |
562 | } | |
563 | ||
564 | /* record max wait time */ | |
565 | if (unlikely(tick != 0)) { | |
566 | tick = cfs_time_current() - tick; | |
567 | if (tick > page_pools.epp_st_max_wait) | |
568 | page_pools.epp_st_max_wait = tick; | |
569 | } | |
570 | ||
571 | /* proceed with rest of allocation */ | |
572 | page_pools.epp_free_pages -= desc->bd_iov_count; | |
573 | ||
574 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
575 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
576 | ||
577 | for (i = 0; i < desc->bd_iov_count; i++) { | |
578 | LASSERT(page_pools.epp_pools[p_idx][g_idx] != NULL); | |
579 | desc->bd_enc_iov[i].kiov_page = | |
580 | page_pools.epp_pools[p_idx][g_idx]; | |
581 | page_pools.epp_pools[p_idx][g_idx] = NULL; | |
582 | ||
583 | if (++g_idx == PAGES_PER_POOL) { | |
584 | p_idx++; | |
585 | g_idx = 0; | |
586 | } | |
587 | } | |
588 | ||
589 | if (page_pools.epp_free_pages < page_pools.epp_st_lowfree) | |
590 | page_pools.epp_st_lowfree = page_pools.epp_free_pages; | |
591 | ||
592 | /* | |
593 | * new idle index = (old * weight + new) / (weight + 1) | |
594 | */ | |
595 | if (this_idle == -1) { | |
596 | this_idle = page_pools.epp_free_pages * IDLE_IDX_MAX / | |
597 | page_pools.epp_total_pages; | |
598 | } | |
599 | page_pools.epp_idle_idx = (page_pools.epp_idle_idx * IDLE_IDX_WEIGHT + | |
600 | this_idle) / | |
601 | (IDLE_IDX_WEIGHT + 1); | |
602 | ||
7264b8a5 | 603 | page_pools.epp_last_access = get_seconds(); |
d7e09d03 PT |
604 | |
605 | spin_unlock(&page_pools.epp_lock); | |
606 | return 0; | |
607 | } | |
608 | EXPORT_SYMBOL(sptlrpc_enc_pool_get_pages); | |
609 | ||
610 | void sptlrpc_enc_pool_put_pages(struct ptlrpc_bulk_desc *desc) | |
611 | { | |
d0bfef31 CH |
612 | int p_idx, g_idx; |
613 | int i; | |
d7e09d03 PT |
614 | |
615 | if (desc->bd_enc_iov == NULL) | |
616 | return; | |
617 | ||
618 | LASSERT(desc->bd_iov_count > 0); | |
619 | ||
620 | spin_lock(&page_pools.epp_lock); | |
621 | ||
622 | p_idx = page_pools.epp_free_pages / PAGES_PER_POOL; | |
623 | g_idx = page_pools.epp_free_pages % PAGES_PER_POOL; | |
624 | ||
625 | LASSERT(page_pools.epp_free_pages + desc->bd_iov_count <= | |
626 | page_pools.epp_total_pages); | |
627 | LASSERT(page_pools.epp_pools[p_idx]); | |
628 | ||
629 | for (i = 0; i < desc->bd_iov_count; i++) { | |
630 | LASSERT(desc->bd_enc_iov[i].kiov_page != NULL); | |
631 | LASSERT(g_idx != 0 || page_pools.epp_pools[p_idx]); | |
632 | LASSERT(page_pools.epp_pools[p_idx][g_idx] == NULL); | |
633 | ||
634 | page_pools.epp_pools[p_idx][g_idx] = | |
635 | desc->bd_enc_iov[i].kiov_page; | |
636 | ||
637 | if (++g_idx == PAGES_PER_POOL) { | |
638 | p_idx++; | |
639 | g_idx = 0; | |
640 | } | |
641 | } | |
642 | ||
643 | page_pools.epp_free_pages += desc->bd_iov_count; | |
644 | ||
645 | enc_pools_wakeup(); | |
646 | ||
647 | spin_unlock(&page_pools.epp_lock); | |
648 | ||
9ae10597 | 649 | kfree(desc->bd_enc_iov); |
d7e09d03 PT |
650 | desc->bd_enc_iov = NULL; |
651 | } | |
652 | EXPORT_SYMBOL(sptlrpc_enc_pool_put_pages); | |
653 | ||
654 | /* | |
655 | * we don't do much stuff for add_user/del_user anymore, except adding some | |
656 | * initial pages in add_user() if current pools are empty, rest would be | |
657 | * handled by the pools's self-adaption. | |
658 | */ | |
659 | int sptlrpc_enc_pool_add_user(void) | |
660 | { | |
d0bfef31 | 661 | int need_grow = 0; |
d7e09d03 PT |
662 | |
663 | spin_lock(&page_pools.epp_lock); | |
664 | if (page_pools.epp_growing == 0 && page_pools.epp_total_pages == 0) { | |
665 | page_pools.epp_growing = 1; | |
666 | need_grow = 1; | |
667 | } | |
668 | spin_unlock(&page_pools.epp_lock); | |
669 | ||
670 | if (need_grow) { | |
671 | enc_pools_add_pages(PTLRPC_MAX_BRW_PAGES + | |
672 | PTLRPC_MAX_BRW_PAGES); | |
673 | ||
674 | spin_lock(&page_pools.epp_lock); | |
675 | page_pools.epp_growing = 0; | |
676 | enc_pools_wakeup(); | |
677 | spin_unlock(&page_pools.epp_lock); | |
678 | } | |
679 | return 0; | |
680 | } | |
681 | EXPORT_SYMBOL(sptlrpc_enc_pool_add_user); | |
682 | ||
683 | int sptlrpc_enc_pool_del_user(void) | |
684 | { | |
685 | return 0; | |
686 | } | |
687 | EXPORT_SYMBOL(sptlrpc_enc_pool_del_user); | |
688 | ||
689 | static inline void enc_pools_alloc(void) | |
690 | { | |
691 | LASSERT(page_pools.epp_max_pools); | |
ee0ec194 JL |
692 | page_pools.epp_pools = |
693 | libcfs_kvzalloc(page_pools.epp_max_pools * | |
694 | sizeof(*page_pools.epp_pools), | |
695 | GFP_NOFS); | |
d7e09d03 PT |
696 | } |
697 | ||
698 | static inline void enc_pools_free(void) | |
699 | { | |
700 | LASSERT(page_pools.epp_max_pools); | |
701 | LASSERT(page_pools.epp_pools); | |
702 | ||
ee0ec194 | 703 | kvfree(page_pools.epp_pools); |
d7e09d03 PT |
704 | } |
705 | ||
3bb22ec5 PT |
706 | static struct shrinker pools_shrinker = { |
707 | .count_objects = enc_pools_shrink_count, | |
708 | .scan_objects = enc_pools_shrink_scan, | |
709 | .seeks = DEFAULT_SEEKS, | |
710 | }; | |
711 | ||
d7e09d03 PT |
712 | int sptlrpc_enc_pool_init(void) |
713 | { | |
714 | /* | |
715 | * maximum capacity is 1/8 of total physical memory. | |
716 | * is the 1/8 a good number? | |
717 | */ | |
4f6cc9ab | 718 | page_pools.epp_max_pages = totalram_pages / 8; |
d7e09d03 PT |
719 | page_pools.epp_max_pools = npages_to_npools(page_pools.epp_max_pages); |
720 | ||
721 | init_waitqueue_head(&page_pools.epp_waitq); | |
722 | page_pools.epp_waitqlen = 0; | |
723 | page_pools.epp_pages_short = 0; | |
724 | ||
725 | page_pools.epp_growing = 0; | |
726 | ||
727 | page_pools.epp_idle_idx = 0; | |
7264b8a5 GKH |
728 | page_pools.epp_last_shrink = get_seconds(); |
729 | page_pools.epp_last_access = get_seconds(); | |
d7e09d03 PT |
730 | |
731 | spin_lock_init(&page_pools.epp_lock); | |
732 | page_pools.epp_total_pages = 0; | |
733 | page_pools.epp_free_pages = 0; | |
734 | ||
735 | page_pools.epp_st_max_pages = 0; | |
736 | page_pools.epp_st_grows = 0; | |
737 | page_pools.epp_st_grow_fails = 0; | |
738 | page_pools.epp_st_shrinks = 0; | |
739 | page_pools.epp_st_access = 0; | |
740 | page_pools.epp_st_missings = 0; | |
741 | page_pools.epp_st_lowfree = 0; | |
742 | page_pools.epp_st_max_wqlen = 0; | |
743 | page_pools.epp_st_max_wait = 0; | |
744 | ||
745 | enc_pools_alloc(); | |
746 | if (page_pools.epp_pools == NULL) | |
747 | return -ENOMEM; | |
748 | ||
3bb22ec5 | 749 | register_shrinker(&pools_shrinker); |
d7e09d03 PT |
750 | |
751 | return 0; | |
752 | } | |
753 | ||
754 | void sptlrpc_enc_pool_fini(void) | |
755 | { | |
756 | unsigned long cleaned, npools; | |
757 | ||
d7e09d03 PT |
758 | LASSERT(page_pools.epp_pools); |
759 | LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages); | |
760 | ||
3bb22ec5 | 761 | unregister_shrinker(&pools_shrinker); |
d7e09d03 PT |
762 | |
763 | npools = npages_to_npools(page_pools.epp_total_pages); | |
764 | cleaned = enc_pools_cleanup(page_pools.epp_pools, npools); | |
765 | LASSERT(cleaned == page_pools.epp_total_pages); | |
766 | ||
767 | enc_pools_free(); | |
768 | ||
769 | if (page_pools.epp_st_access > 0) { | |
770 | CDEBUG(D_SEC, | |
2d00bd17 | 771 | "max pages %lu, grows %u, grow fails %u, shrinks %u, access %lu, missing %lu, max qlen %u, max wait " |
d7e09d03 PT |
772 | CFS_TIME_T"/%d\n", |
773 | page_pools.epp_st_max_pages, page_pools.epp_st_grows, | |
774 | page_pools.epp_st_grow_fails, | |
775 | page_pools.epp_st_shrinks, page_pools.epp_st_access, | |
776 | page_pools.epp_st_missings, page_pools.epp_st_max_wqlen, | |
777 | page_pools.epp_st_max_wait, HZ); | |
778 | } | |
779 | } | |
780 | ||
781 | ||
782 | static int cfs_hash_alg_id[] = { | |
783 | [BULK_HASH_ALG_NULL] = CFS_HASH_ALG_NULL, | |
784 | [BULK_HASH_ALG_ADLER32] = CFS_HASH_ALG_ADLER32, | |
785 | [BULK_HASH_ALG_CRC32] = CFS_HASH_ALG_CRC32, | |
786 | [BULK_HASH_ALG_MD5] = CFS_HASH_ALG_MD5, | |
787 | [BULK_HASH_ALG_SHA1] = CFS_HASH_ALG_SHA1, | |
788 | [BULK_HASH_ALG_SHA256] = CFS_HASH_ALG_SHA256, | |
789 | [BULK_HASH_ALG_SHA384] = CFS_HASH_ALG_SHA384, | |
790 | [BULK_HASH_ALG_SHA512] = CFS_HASH_ALG_SHA512, | |
791 | }; | |
aff9d8e8 | 792 | const char *sptlrpc_get_hash_name(__u8 hash_alg) |
d7e09d03 PT |
793 | { |
794 | return cfs_crypto_hash_name(cfs_hash_alg_id[hash_alg]); | |
795 | } | |
796 | EXPORT_SYMBOL(sptlrpc_get_hash_name); | |
797 | ||
798 | __u8 sptlrpc_get_hash_alg(const char *algname) | |
799 | { | |
800 | return cfs_crypto_hash_alg(algname); | |
801 | } | |
802 | EXPORT_SYMBOL(sptlrpc_get_hash_alg); | |
803 | ||
804 | int bulk_sec_desc_unpack(struct lustre_msg *msg, int offset, int swabbed) | |
805 | { | |
806 | struct ptlrpc_bulk_sec_desc *bsd; | |
807 | int size = msg->lm_buflens[offset]; | |
808 | ||
809 | bsd = lustre_msg_buf(msg, offset, sizeof(*bsd)); | |
810 | if (bsd == NULL) { | |
811 | CERROR("Invalid bulk sec desc: size %d\n", size); | |
812 | return -EINVAL; | |
813 | } | |
814 | ||
acf60c3d | 815 | if (swabbed) |
d7e09d03 | 816 | __swab32s(&bsd->bsd_nob); |
d7e09d03 PT |
817 | |
818 | if (unlikely(bsd->bsd_version != 0)) { | |
819 | CERROR("Unexpected version %u\n", bsd->bsd_version); | |
820 | return -EPROTO; | |
821 | } | |
822 | ||
823 | if (unlikely(bsd->bsd_type >= SPTLRPC_BULK_MAX)) { | |
824 | CERROR("Invalid type %u\n", bsd->bsd_type); | |
825 | return -EPROTO; | |
826 | } | |
827 | ||
828 | /* FIXME more sanity check here */ | |
829 | ||
830 | if (unlikely(bsd->bsd_svc != SPTLRPC_BULK_SVC_NULL && | |
831 | bsd->bsd_svc != SPTLRPC_BULK_SVC_INTG && | |
832 | bsd->bsd_svc != SPTLRPC_BULK_SVC_PRIV)) { | |
833 | CERROR("Invalid svc %u\n", bsd->bsd_svc); | |
834 | return -EPROTO; | |
835 | } | |
836 | ||
837 | return 0; | |
838 | } | |
839 | EXPORT_SYMBOL(bulk_sec_desc_unpack); | |
840 | ||
841 | int sptlrpc_get_bulk_checksum(struct ptlrpc_bulk_desc *desc, __u8 alg, | |
842 | void *buf, int buflen) | |
843 | { | |
d0bfef31 CH |
844 | struct cfs_crypto_hash_desc *hdesc; |
845 | int hashsize; | |
846 | char hashbuf[64]; | |
847 | unsigned int bufsize; | |
848 | int i, err; | |
d7e09d03 PT |
849 | |
850 | LASSERT(alg > BULK_HASH_ALG_NULL && alg < BULK_HASH_ALG_MAX); | |
851 | LASSERT(buflen >= 4); | |
852 | ||
853 | hdesc = cfs_crypto_hash_init(cfs_hash_alg_id[alg], NULL, 0); | |
854 | if (IS_ERR(hdesc)) { | |
855 | CERROR("Unable to initialize checksum hash %s\n", | |
856 | cfs_crypto_hash_name(cfs_hash_alg_id[alg])); | |
857 | return PTR_ERR(hdesc); | |
858 | } | |
859 | ||
860 | hashsize = cfs_crypto_hash_digestsize(cfs_hash_alg_id[alg]); | |
861 | ||
862 | for (i = 0; i < desc->bd_iov_count; i++) { | |
863 | cfs_crypto_hash_update_page(hdesc, desc->bd_iov[i].kiov_page, | |
864 | desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK, | |
865 | desc->bd_iov[i].kiov_len); | |
866 | } | |
867 | if (hashsize > buflen) { | |
868 | bufsize = sizeof(hashbuf); | |
869 | err = cfs_crypto_hash_final(hdesc, (unsigned char *)hashbuf, | |
870 | &bufsize); | |
871 | memcpy(buf, hashbuf, buflen); | |
872 | } else { | |
873 | bufsize = buflen; | |
874 | err = cfs_crypto_hash_final(hdesc, (unsigned char *)buf, | |
875 | &bufsize); | |
876 | } | |
877 | ||
878 | if (err) | |
879 | cfs_crypto_hash_final(hdesc, NULL, NULL); | |
880 | return err; | |
881 | } | |
882 | EXPORT_SYMBOL(sptlrpc_get_bulk_checksum); |