fs: out of bounds on stack in iov_iter_advance
[linux-block.git] / fs / orangefs / pvfs2-bufmap.c
CommitLineData
274dcf55
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6#include "protocol.h"
7#include "pvfs2-kernel.h"
8#include "pvfs2-bufmap.h"
9
10DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
11
84d02150 12static struct pvfs2_bufmap {
274dcf55
MM
13 atomic_t refcnt;
14
15 int desc_size;
16 int desc_shift;
17 int desc_count;
18 int total_size;
19 int page_count;
20
21 struct page **page_array;
22 struct pvfs_bufmap_desc *desc_array;
23
24 /* array to track usage of buffer descriptors */
25 int *buffer_index_array;
26 spinlock_t buffer_index_lock;
27
28 /* array to track usage of buffer descriptors for readdir */
29 int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
30 spinlock_t readdir_index_lock;
31} *__pvfs2_bufmap;
32
33static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
34
35static void
36pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
37{
38 int i;
39
40 for (i = 0; i < bufmap->page_count; i++)
41 page_cache_release(bufmap->page_array[i]);
42}
43
44static void
45pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
46{
47 kfree(bufmap->page_array);
48 kfree(bufmap->desc_array);
49 kfree(bufmap->buffer_index_array);
50 kfree(bufmap);
51}
52
53struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
54{
55 struct pvfs2_bufmap *bufmap = NULL;
56
57 spin_lock(&pvfs2_bufmap_lock);
58 if (__pvfs2_bufmap) {
59 bufmap = __pvfs2_bufmap;
60 atomic_inc(&bufmap->refcnt);
61 }
62 spin_unlock(&pvfs2_bufmap_lock);
63 return bufmap;
64}
65
66void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
67{
68 if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
69 __pvfs2_bufmap = NULL;
70 spin_unlock(&pvfs2_bufmap_lock);
71
72 pvfs2_bufmap_unmap(bufmap);
73 pvfs2_bufmap_free(bufmap);
74 }
75}
76
77inline int pvfs_bufmap_size_query(void)
78{
79 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
80 int size = bufmap ? bufmap->desc_size : 0;
81
82 pvfs2_bufmap_unref(bufmap);
83 return size;
84}
85
86inline int pvfs_bufmap_shift_query(void)
87{
88 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
89 int shift = bufmap ? bufmap->desc_shift : 0;
90
91 pvfs2_bufmap_unref(bufmap);
92 return shift;
93}
94
95static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
97
98/*
99 * get_bufmap_init
100 *
101 * If bufmap_init is 1, then the shared memory system, including the
102 * buffer_index_array, is available. Otherwise, it is not.
103 *
104 * returns the value of bufmap_init
105 */
106int get_bufmap_init(void)
107{
108 return __pvfs2_bufmap ? 1 : 0;
109}
110
111
112static struct pvfs2_bufmap *
113pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
114{
115 struct pvfs2_bufmap *bufmap;
116
117 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
118 if (!bufmap)
119 goto out;
120
121 atomic_set(&bufmap->refcnt, 1);
122 bufmap->total_size = user_desc->total_size;
123 bufmap->desc_count = user_desc->count;
124 bufmap->desc_size = user_desc->size;
125 bufmap->desc_shift = ilog2(bufmap->desc_size);
126
127 spin_lock_init(&bufmap->buffer_index_lock);
128 bufmap->buffer_index_array =
129 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130 if (!bufmap->buffer_index_array) {
131 gossip_err("pvfs2: could not allocate %d buffer indices\n",
132 bufmap->desc_count);
133 goto out_free_bufmap;
134 }
135 spin_lock_init(&bufmap->readdir_index_lock);
136
137 bufmap->desc_array =
138 kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
139 GFP_KERNEL);
140 if (!bufmap->desc_array) {
141 gossip_err("pvfs2: could not allocate %d descriptors\n",
142 bufmap->desc_count);
143 goto out_free_index_array;
144 }
145
146 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
147
148 /* allocate storage to track our page mappings */
149 bufmap->page_array =
150 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151 if (!bufmap->page_array)
152 goto out_free_desc_array;
153
154 return bufmap;
155
156out_free_desc_array:
157 kfree(bufmap->desc_array);
158out_free_index_array:
159 kfree(bufmap->buffer_index_array);
160out_free_bufmap:
161 kfree(bufmap);
162out:
163 return NULL;
164}
165
166static int
167pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
168 struct PVFS_dev_map_desc *user_desc)
169{
170 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171 int offset = 0, ret, i;
172
173 /* map the pages */
16742f2d
AV
174 ret = get_user_pages_fast((unsigned long)user_desc->ptr,
175 bufmap->page_count, 1, bufmap->page_array);
274dcf55
MM
176
177 if (ret < 0)
178 return ret;
179
180 if (ret != bufmap->page_count) {
181 gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
182 bufmap->page_count, ret);
183
184 for (i = 0; i < ret; i++) {
185 SetPageError(bufmap->page_array[i]);
186 page_cache_release(bufmap->page_array[i]);
187 }
188 return -ENOMEM;
189 }
190
191 /*
192 * ideally we want to get kernel space pointers for each page, but
193 * we can't kmap that many pages at once if highmem is being used.
194 * so instead, we just kmap/kunmap the page address each time the
195 * kaddr is needed.
196 */
197 for (i = 0; i < bufmap->page_count; i++)
198 flush_dcache_page(bufmap->page_array[i]);
199
200 /* build a list of available descriptors */
201 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
202 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
203 bufmap->desc_array[i].array_count = pages_per_desc;
204 bufmap->desc_array[i].uaddr =
205 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
206 offset += pages_per_desc;
207 }
208
209 return 0;
210}
211
212/*
213 * pvfs_bufmap_initialize()
214 *
215 * initializes the mapped buffer interface
216 *
217 * returns 0 on success, -errno on failure
218 */
219int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
220{
221 struct pvfs2_bufmap *bufmap;
222 int ret = -EINVAL;
223
224 gossip_debug(GOSSIP_BUFMAP_DEBUG,
225 "pvfs_bufmap_initialize: called (ptr ("
226 "%p) sz (%d) cnt(%d).\n",
227 user_desc->ptr,
228 user_desc->size,
229 user_desc->count);
230
231 /*
232 * sanity check alignment and size of buffer that caller wants to
233 * work with
234 */
235 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
236 (unsigned long)user_desc->ptr) {
237 gossip_err("pvfs2 error: memory alignment (front). %p\n",
238 user_desc->ptr);
239 goto out;
240 }
241
242 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
243 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
244 gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
245 user_desc->ptr,
246 user_desc->total_size);
247 goto out;
248 }
249
250 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
251 gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
252 user_desc->total_size,
253 user_desc->size,
254 user_desc->count);
255 goto out;
256 }
257
258 if ((user_desc->size % PAGE_SIZE) != 0) {
259 gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
260 user_desc->size);
261 goto out;
262 }
263
264 ret = -ENOMEM;
265 bufmap = pvfs2_bufmap_alloc(user_desc);
266 if (!bufmap)
267 goto out;
268
269 ret = pvfs2_bufmap_map(bufmap, user_desc);
270 if (ret)
271 goto out_free_bufmap;
272
273
274 spin_lock(&pvfs2_bufmap_lock);
275 if (__pvfs2_bufmap) {
276 spin_unlock(&pvfs2_bufmap_lock);
277 gossip_err("pvfs2: error: bufmap already initialized.\n");
278 ret = -EALREADY;
279 goto out_unmap_bufmap;
280 }
281 __pvfs2_bufmap = bufmap;
282 spin_unlock(&pvfs2_bufmap_lock);
283
284 /*
285 * If there are operations in pvfs2_bufmap_init_waitq, wake them up.
286 * This scenario occurs when the client-core is restarted and I/O
287 * requests in the in-progress or waiting tables are restarted. I/O
288 * requests cannot be restarted until the shared memory system is
289 * completely re-initialized, so we put the I/O requests in this
290 * waitq until initialization has completed. NOTE: the I/O requests
291 * are also on a timer, so they don't wait forever just in case the
292 * client-core doesn't come back up.
293 */
294 wake_up_interruptible(&pvfs2_bufmap_init_waitq);
295
296 gossip_debug(GOSSIP_BUFMAP_DEBUG,
297 "pvfs_bufmap_initialize: exiting normally\n");
298 return 0;
299
300out_unmap_bufmap:
301 pvfs2_bufmap_unmap(bufmap);
302out_free_bufmap:
303 pvfs2_bufmap_free(bufmap);
304out:
305 return ret;
306}
307
308/*
309 * pvfs_bufmap_finalize()
310 *
311 * shuts down the mapped buffer interface and releases any resources
312 * associated with it
313 *
314 * no return value
315 */
316void pvfs_bufmap_finalize(void)
317{
318 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
319 BUG_ON(!__pvfs2_bufmap);
320 pvfs2_bufmap_unref(__pvfs2_bufmap);
321 gossip_debug(GOSSIP_BUFMAP_DEBUG,
322 "pvfs2_bufmap_finalize: exiting normally\n");
323}
324
325struct slot_args {
326 int slot_count;
327 int *slot_array;
328 spinlock_t *slot_lock;
329 wait_queue_head_t *slot_wq;
330};
331
332static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
333{
334 int ret = -1;
335 int i = 0;
336 DECLARE_WAITQUEUE(my_wait, current);
337
338
339 add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
340
341 while (1) {
342 set_current_state(TASK_INTERRUPTIBLE);
343
344 /*
345 * check for available desc, slot_lock is the appropriate
346 * index_lock
347 */
348 spin_lock(slargs->slot_lock);
349 for (i = 0; i < slargs->slot_count; i++)
350 if (slargs->slot_array[i] == 0) {
351 slargs->slot_array[i] = 1;
352 *buffer_index = i;
353 ret = 0;
354 break;
355 }
356 spin_unlock(slargs->slot_lock);
357
358 /* if we acquired a buffer, then break out of while */
359 if (ret == 0)
360 break;
361
362 if (!signal_pending(current)) {
363 int timeout =
364 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
365 gossip_debug(GOSSIP_BUFMAP_DEBUG,
366 "[BUFMAP]: waiting %d "
367 "seconds for a slot\n",
368 slot_timeout_secs);
369 if (!schedule_timeout(timeout)) {
370 gossip_debug(GOSSIP_BUFMAP_DEBUG,
371 "*** wait_for_a_slot timed out\n");
372 ret = -ETIMEDOUT;
373 break;
374 }
375 gossip_debug(GOSSIP_BUFMAP_DEBUG,
376 "[BUFMAP]: woken up by a slot becoming available.\n");
377 continue;
378 }
379
380 gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
381 __func__);
382 ret = -EINTR;
383 break;
384 }
385
386 set_current_state(TASK_RUNNING);
387 remove_wait_queue(slargs->slot_wq, &my_wait);
388 return ret;
389}
390
391static void put_back_slot(struct slot_args *slargs, int buffer_index)
392{
393 /* slot_lock is the appropriate index_lock */
394 spin_lock(slargs->slot_lock);
395 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
396 spin_unlock(slargs->slot_lock);
397 return;
398 }
399
400 /* put the desc back on the queue */
401 slargs->slot_array[buffer_index] = 0;
402 spin_unlock(slargs->slot_lock);
403
404 /* wake up anyone who may be sleeping on the queue */
405 wake_up_interruptible(slargs->slot_wq);
406}
407
408/*
409 * pvfs_bufmap_get()
410 *
411 * gets a free mapped buffer descriptor, will sleep until one becomes
412 * available if necessary
413 *
414 * returns 0 on success, -errno on failure
415 */
416int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
417{
418 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
419 struct slot_args slargs;
420 int ret;
421
422 if (!bufmap) {
423 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
424 return -EIO;
425 }
426
427 slargs.slot_count = bufmap->desc_count;
428 slargs.slot_array = bufmap->buffer_index_array;
429 slargs.slot_lock = &bufmap->buffer_index_lock;
430 slargs.slot_wq = &bufmap_waitq;
431 ret = wait_for_a_slot(&slargs, buffer_index);
432 if (ret)
433 pvfs2_bufmap_unref(bufmap);
434 *mapp = bufmap;
435 return ret;
436}
437
438/*
439 * pvfs_bufmap_put()
440 *
441 * returns a mapped buffer descriptor to the collection
442 *
443 * no return value
444 */
445void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
446{
447 struct slot_args slargs;
448
449 slargs.slot_count = bufmap->desc_count;
450 slargs.slot_array = bufmap->buffer_index_array;
451 slargs.slot_lock = &bufmap->buffer_index_lock;
452 slargs.slot_wq = &bufmap_waitq;
453 put_back_slot(&slargs, buffer_index);
454 pvfs2_bufmap_unref(bufmap);
455}
456
457/*
458 * readdir_index_get()
459 *
460 * gets a free descriptor, will sleep until one becomes
461 * available if necessary.
462 * Although the readdir buffers are not mapped into kernel space
463 * we could do that at a later point of time. Regardless, these
464 * indices are used by the client-core.
465 *
466 * returns 0 on success, -errno on failure
467 */
468int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
469{
470 struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
471 struct slot_args slargs;
472 int ret;
473
474 if (!bufmap) {
475 gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
476 return -EIO;
477 }
478
479 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
480 slargs.slot_array = bufmap->readdir_index_array;
481 slargs.slot_lock = &bufmap->readdir_index_lock;
482 slargs.slot_wq = &readdir_waitq;
483 ret = wait_for_a_slot(&slargs, buffer_index);
484 if (ret)
485 pvfs2_bufmap_unref(bufmap);
486 *mapp = bufmap;
487 return ret;
488}
489
490void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
491{
492 struct slot_args slargs;
493
494 slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
495 slargs.slot_array = bufmap->readdir_index_array;
496 slargs.slot_lock = &bufmap->readdir_index_lock;
497 slargs.slot_wq = &readdir_waitq;
498 put_back_slot(&slargs, buffer_index);
499 pvfs2_bufmap_unref(bufmap);
500}
501
4d1c4404 502int pvfs_bufmap_copy_from_iovec(struct pvfs2_bufmap *bufmap,
54804949
MM
503 struct iov_iter *iter,
504 int buffer_index,
505 size_t size)
274dcf55 506{
34204fde 507 struct pvfs_bufmap_desc *to = &bufmap->desc_array[buffer_index];
4d1c4404 508 int i;
274dcf55
MM
509
510 gossip_debug(GOSSIP_BUFMAP_DEBUG,
34204fde 511 "%s: buffer_index:%d: size:%zu:\n",
4d1c4404 512 __func__, buffer_index, size);
274dcf55 513
274dcf55 514
4d1c4404 515 for (i = 0; size; i++) {
34204fde
AV
516 struct page *page = to->page_array[i];
517 size_t n = size;
518 if (n > PAGE_SIZE)
519 n = PAGE_SIZE;
520 n = copy_page_from_iter(page, 0, n, iter);
521 if (!n)
522 return -EFAULT;
523 size -= n;
274dcf55 524 }
34204fde 525 return 0;
274dcf55 526
274dcf55
MM
527}
528
529/*
4d1c4404
MM
530 * Iterate through the array of pages containing the bytes from
531 * a file being read.
274dcf55 532 *
274dcf55 533 */
4d1c4404
MM
534int pvfs_bufmap_copy_to_iovec(struct pvfs2_bufmap *bufmap,
535 struct iov_iter *iter,
5c278228
AV
536 int buffer_index,
537 size_t size)
274dcf55 538{
5c278228 539 struct pvfs_bufmap_desc *from = &bufmap->desc_array[buffer_index];
4d1c4404 540 int i;
274dcf55
MM
541
542 gossip_debug(GOSSIP_BUFMAP_DEBUG,
5c278228
AV
543 "%s: buffer_index:%d: size:%zu:\n",
544 __func__, buffer_index, size);
274dcf55 545
274dcf55 546
5c278228
AV
547 for (i = 0; size; i++) {
548 struct page *page = from->page_array[i];
549 size_t n = size;
550 if (n > PAGE_SIZE)
551 n = PAGE_SIZE;
552 n = copy_page_to_iter(page, 0, n, iter);
553 if (!n)
554 return -EFAULT;
555 size -= n;
274dcf55 556 }
5c278228 557 return 0;
274dcf55 558}