orangefs: Change visibility of several bufmap helpers to static.
[linux-2.6-block.git] / fs / orangefs / orangefs-bufmap.c
CommitLineData
274dcf55
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6#include "protocol.h"
575e9461
MM
7#include "orangefs-kernel.h"
8#include "orangefs-bufmap.h"
274dcf55 9
8bb8aefd 10DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq);
274dcf55 11
bf89f584
MB
12/* used to describe mapped buffers */
13struct orangefs_bufmap_desc {
14 void *uaddr; /* user space address pointer */
15 struct page **page_array; /* array of mapped pages */
16 int array_count; /* size of above arrays */
17 struct list_head list_link;
18};
19
8bb8aefd 20static struct orangefs_bufmap {
274dcf55
MM
21 atomic_t refcnt;
22
23 int desc_size;
24 int desc_shift;
25 int desc_count;
26 int total_size;
27 int page_count;
28
29 struct page **page_array;
8bb8aefd 30 struct orangefs_bufmap_desc *desc_array;
274dcf55
MM
31
32 /* array to track usage of buffer descriptors */
33 int *buffer_index_array;
34 spinlock_t buffer_index_lock;
35
36 /* array to track usage of buffer descriptors for readdir */
8bb8aefd 37 int readdir_index_array[ORANGEFS_READDIR_DEFAULT_DESC_COUNT];
274dcf55 38 spinlock_t readdir_index_lock;
8bb8aefd 39} *__orangefs_bufmap;
274dcf55 40
8bb8aefd 41static DEFINE_SPINLOCK(orangefs_bufmap_lock);
274dcf55
MM
42
43static void
8bb8aefd 44orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
274dcf55
MM
45{
46 int i;
47
48 for (i = 0; i < bufmap->page_count; i++)
49 page_cache_release(bufmap->page_array[i]);
50}
51
52static void
8bb8aefd 53orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
274dcf55
MM
54{
55 kfree(bufmap->page_array);
56 kfree(bufmap->desc_array);
57 kfree(bufmap->buffer_index_array);
58 kfree(bufmap);
59}
60
bf89f584 61static struct orangefs_bufmap *orangefs_bufmap_ref(void)
274dcf55 62{
8bb8aefd 63 struct orangefs_bufmap *bufmap = NULL;
274dcf55 64
8bb8aefd
YL
65 spin_lock(&orangefs_bufmap_lock);
66 if (__orangefs_bufmap) {
67 bufmap = __orangefs_bufmap;
274dcf55
MM
68 atomic_inc(&bufmap->refcnt);
69 }
8bb8aefd 70 spin_unlock(&orangefs_bufmap_lock);
274dcf55
MM
71 return bufmap;
72}
73
bf89f584 74static void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap)
274dcf55 75{
8bb8aefd
YL
76 if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) {
77 __orangefs_bufmap = NULL;
78 spin_unlock(&orangefs_bufmap_lock);
274dcf55 79
8bb8aefd
YL
80 orangefs_bufmap_unmap(bufmap);
81 orangefs_bufmap_free(bufmap);
274dcf55
MM
82 }
83}
84
8bb8aefd 85inline int orangefs_bufmap_size_query(void)
274dcf55 86{
8bb8aefd 87 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
88 int size = bufmap ? bufmap->desc_size : 0;
89
8bb8aefd 90 orangefs_bufmap_unref(bufmap);
274dcf55
MM
91 return size;
92}
93
8bb8aefd 94inline int orangefs_bufmap_shift_query(void)
274dcf55 95{
8bb8aefd 96 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
97 int shift = bufmap ? bufmap->desc_shift : 0;
98
8bb8aefd 99 orangefs_bufmap_unref(bufmap);
274dcf55
MM
100 return shift;
101}
102
103static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
104static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
105
106/*
107 * get_bufmap_init
108 *
109 * If bufmap_init is 1, then the shared memory system, including the
110 * buffer_index_array, is available. Otherwise, it is not.
111 *
112 * returns the value of bufmap_init
113 */
114int get_bufmap_init(void)
115{
8bb8aefd 116 return __orangefs_bufmap ? 1 : 0;
274dcf55
MM
117}
118
119
8bb8aefd
YL
120static struct orangefs_bufmap *
121orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
274dcf55 122{
8bb8aefd 123 struct orangefs_bufmap *bufmap;
274dcf55
MM
124
125 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
126 if (!bufmap)
127 goto out;
128
129 atomic_set(&bufmap->refcnt, 1);
130 bufmap->total_size = user_desc->total_size;
131 bufmap->desc_count = user_desc->count;
132 bufmap->desc_size = user_desc->size;
133 bufmap->desc_shift = ilog2(bufmap->desc_size);
134
135 spin_lock_init(&bufmap->buffer_index_lock);
136 bufmap->buffer_index_array =
137 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
138 if (!bufmap->buffer_index_array) {
8bb8aefd 139 gossip_err("orangefs: could not allocate %d buffer indices\n",
274dcf55
MM
140 bufmap->desc_count);
141 goto out_free_bufmap;
142 }
143 spin_lock_init(&bufmap->readdir_index_lock);
144
145 bufmap->desc_array =
8bb8aefd 146 kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
274dcf55
MM
147 GFP_KERNEL);
148 if (!bufmap->desc_array) {
8bb8aefd 149 gossip_err("orangefs: could not allocate %d descriptors\n",
274dcf55
MM
150 bufmap->desc_count);
151 goto out_free_index_array;
152 }
153
154 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
155
156 /* allocate storage to track our page mappings */
157 bufmap->page_array =
158 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
159 if (!bufmap->page_array)
160 goto out_free_desc_array;
161
162 return bufmap;
163
164out_free_desc_array:
165 kfree(bufmap->desc_array);
166out_free_index_array:
167 kfree(bufmap->buffer_index_array);
168out_free_bufmap:
169 kfree(bufmap);
170out:
171 return NULL;
172}
173
174static int
8bb8aefd
YL
175orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
176 struct ORANGEFS_dev_map_desc *user_desc)
274dcf55
MM
177{
178 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
179 int offset = 0, ret, i;
180
181 /* map the pages */
16742f2d
AV
182 ret = get_user_pages_fast((unsigned long)user_desc->ptr,
183 bufmap->page_count, 1, bufmap->page_array);
274dcf55
MM
184
185 if (ret < 0)
186 return ret;
187
188 if (ret != bufmap->page_count) {
8bb8aefd 189 gossip_err("orangefs error: asked for %d pages, only got %d.\n",
274dcf55
MM
190 bufmap->page_count, ret);
191
192 for (i = 0; i < ret; i++) {
193 SetPageError(bufmap->page_array[i]);
194 page_cache_release(bufmap->page_array[i]);
195 }
196 return -ENOMEM;
197 }
198
199 /*
200 * ideally we want to get kernel space pointers for each page, but
201 * we can't kmap that many pages at once if highmem is being used.
202 * so instead, we just kmap/kunmap the page address each time the
203 * kaddr is needed.
204 */
205 for (i = 0; i < bufmap->page_count; i++)
206 flush_dcache_page(bufmap->page_array[i]);
207
208 /* build a list of available descriptors */
209 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
210 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
211 bufmap->desc_array[i].array_count = pages_per_desc;
212 bufmap->desc_array[i].uaddr =
213 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
214 offset += pages_per_desc;
215 }
216
217 return 0;
218}
219
220/*
8bb8aefd 221 * orangefs_bufmap_initialize()
274dcf55
MM
222 *
223 * initializes the mapped buffer interface
224 *
225 * returns 0 on success, -errno on failure
226 */
8bb8aefd 227int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
274dcf55 228{
8bb8aefd 229 struct orangefs_bufmap *bufmap;
274dcf55
MM
230 int ret = -EINVAL;
231
232 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 233 "orangefs_bufmap_initialize: called (ptr ("
274dcf55
MM
234 "%p) sz (%d) cnt(%d).\n",
235 user_desc->ptr,
236 user_desc->size,
237 user_desc->count);
238
239 /*
240 * sanity check alignment and size of buffer that caller wants to
241 * work with
242 */
243 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
244 (unsigned long)user_desc->ptr) {
8bb8aefd 245 gossip_err("orangefs error: memory alignment (front). %p\n",
274dcf55
MM
246 user_desc->ptr);
247 goto out;
248 }
249
250 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
251 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
8bb8aefd 252 gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
274dcf55
MM
253 user_desc->ptr,
254 user_desc->total_size);
255 goto out;
256 }
257
258 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
8bb8aefd 259 gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
274dcf55
MM
260 user_desc->total_size,
261 user_desc->size,
262 user_desc->count);
263 goto out;
264 }
265
266 if ((user_desc->size % PAGE_SIZE) != 0) {
8bb8aefd 267 gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
274dcf55
MM
268 user_desc->size);
269 goto out;
270 }
271
272 ret = -ENOMEM;
8bb8aefd 273 bufmap = orangefs_bufmap_alloc(user_desc);
274dcf55
MM
274 if (!bufmap)
275 goto out;
276
8bb8aefd 277 ret = orangefs_bufmap_map(bufmap, user_desc);
274dcf55
MM
278 if (ret)
279 goto out_free_bufmap;
280
281
8bb8aefd
YL
282 spin_lock(&orangefs_bufmap_lock);
283 if (__orangefs_bufmap) {
284 spin_unlock(&orangefs_bufmap_lock);
285 gossip_err("orangefs: error: bufmap already initialized.\n");
274dcf55
MM
286 ret = -EALREADY;
287 goto out_unmap_bufmap;
288 }
8bb8aefd
YL
289 __orangefs_bufmap = bufmap;
290 spin_unlock(&orangefs_bufmap_lock);
274dcf55
MM
291
292 /*
8bb8aefd 293 * If there are operations in orangefs_bufmap_init_waitq, wake them up.
274dcf55
MM
294 * This scenario occurs when the client-core is restarted and I/O
295 * requests in the in-progress or waiting tables are restarted. I/O
296 * requests cannot be restarted until the shared memory system is
297 * completely re-initialized, so we put the I/O requests in this
298 * waitq until initialization has completed. NOTE: the I/O requests
299 * are also on a timer, so they don't wait forever just in case the
300 * client-core doesn't come back up.
301 */
8bb8aefd 302 wake_up_interruptible(&orangefs_bufmap_init_waitq);
274dcf55
MM
303
304 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 305 "orangefs_bufmap_initialize: exiting normally\n");
274dcf55
MM
306 return 0;
307
308out_unmap_bufmap:
8bb8aefd 309 orangefs_bufmap_unmap(bufmap);
274dcf55 310out_free_bufmap:
8bb8aefd 311 orangefs_bufmap_free(bufmap);
274dcf55
MM
312out:
313 return ret;
314}
315
316/*
8bb8aefd 317 * orangefs_bufmap_finalize()
274dcf55
MM
318 *
319 * shuts down the mapped buffer interface and releases any resources
320 * associated with it
321 *
322 * no return value
323 */
8bb8aefd 324void orangefs_bufmap_finalize(void)
274dcf55 325{
8bb8aefd
YL
326 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
327 BUG_ON(!__orangefs_bufmap);
328 orangefs_bufmap_unref(__orangefs_bufmap);
274dcf55 329 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 330 "orangefs_bufmap_finalize: exiting normally\n");
274dcf55
MM
331}
332
333struct slot_args {
334 int slot_count;
335 int *slot_array;
336 spinlock_t *slot_lock;
337 wait_queue_head_t *slot_wq;
338};
339
340static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
341{
342 int ret = -1;
343 int i = 0;
ce6c414e 344 DEFINE_WAIT(wait_entry);
274dcf55
MM
345
346 while (1) {
274dcf55
MM
347 /*
348 * check for available desc, slot_lock is the appropriate
349 * index_lock
350 */
351 spin_lock(slargs->slot_lock);
ce6c414e
MM
352 prepare_to_wait_exclusive(slargs->slot_wq,
353 &wait_entry,
354 TASK_INTERRUPTIBLE);
274dcf55
MM
355 for (i = 0; i < slargs->slot_count; i++)
356 if (slargs->slot_array[i] == 0) {
357 slargs->slot_array[i] = 1;
358 *buffer_index = i;
359 ret = 0;
360 break;
361 }
362 spin_unlock(slargs->slot_lock);
363
364 /* if we acquired a buffer, then break out of while */
365 if (ret == 0)
366 break;
367
368 if (!signal_pending(current)) {
369 int timeout =
370 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
371 gossip_debug(GOSSIP_BUFMAP_DEBUG,
372 "[BUFMAP]: waiting %d "
373 "seconds for a slot\n",
374 slot_timeout_secs);
375 if (!schedule_timeout(timeout)) {
376 gossip_debug(GOSSIP_BUFMAP_DEBUG,
377 "*** wait_for_a_slot timed out\n");
378 ret = -ETIMEDOUT;
379 break;
380 }
381 gossip_debug(GOSSIP_BUFMAP_DEBUG,
382 "[BUFMAP]: woken up by a slot becoming available.\n");
383 continue;
384 }
385
8bb8aefd 386 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs: %s interrupted.\n",
274dcf55
MM
387 __func__);
388 ret = -EINTR;
389 break;
390 }
391
ce6c414e
MM
392 spin_lock(slargs->slot_lock);
393 finish_wait(slargs->slot_wq, &wait_entry);
394 spin_unlock(slargs->slot_lock);
274dcf55
MM
395 return ret;
396}
397
398static void put_back_slot(struct slot_args *slargs, int buffer_index)
399{
400 /* slot_lock is the appropriate index_lock */
401 spin_lock(slargs->slot_lock);
402 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
403 spin_unlock(slargs->slot_lock);
404 return;
405 }
406
407 /* put the desc back on the queue */
408 slargs->slot_array[buffer_index] = 0;
409 spin_unlock(slargs->slot_lock);
410
411 /* wake up anyone who may be sleeping on the queue */
412 wake_up_interruptible(slargs->slot_wq);
413}
414
415/*
8bb8aefd 416 * orangefs_bufmap_get()
274dcf55
MM
417 *
418 * gets a free mapped buffer descriptor, will sleep until one becomes
419 * available if necessary
420 *
421 * returns 0 on success, -errno on failure
422 */
8bb8aefd 423int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index)
274dcf55 424{
8bb8aefd 425 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
426 struct slot_args slargs;
427 int ret;
428
429 if (!bufmap) {
8bb8aefd 430 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
274dcf55
MM
431 return -EIO;
432 }
433
434 slargs.slot_count = bufmap->desc_count;
435 slargs.slot_array = bufmap->buffer_index_array;
436 slargs.slot_lock = &bufmap->buffer_index_lock;
437 slargs.slot_wq = &bufmap_waitq;
438 ret = wait_for_a_slot(&slargs, buffer_index);
439 if (ret)
8bb8aefd 440 orangefs_bufmap_unref(bufmap);
274dcf55
MM
441 *mapp = bufmap;
442 return ret;
443}
444
445/*
8bb8aefd 446 * orangefs_bufmap_put()
274dcf55
MM
447 *
448 * returns a mapped buffer descriptor to the collection
449 *
450 * no return value
451 */
8bb8aefd 452void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index)
274dcf55
MM
453{
454 struct slot_args slargs;
455
456 slargs.slot_count = bufmap->desc_count;
457 slargs.slot_array = bufmap->buffer_index_array;
458 slargs.slot_lock = &bufmap->buffer_index_lock;
459 slargs.slot_wq = &bufmap_waitq;
460 put_back_slot(&slargs, buffer_index);
8bb8aefd 461 orangefs_bufmap_unref(bufmap);
274dcf55
MM
462}
463
464/*
465 * readdir_index_get()
466 *
467 * gets a free descriptor, will sleep until one becomes
468 * available if necessary.
469 * Although the readdir buffers are not mapped into kernel space
470 * we could do that at a later point of time. Regardless, these
471 * indices are used by the client-core.
472 *
473 * returns 0 on success, -errno on failure
474 */
8bb8aefd 475int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index)
274dcf55 476{
8bb8aefd 477 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
478 struct slot_args slargs;
479 int ret;
480
481 if (!bufmap) {
8bb8aefd 482 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
274dcf55
MM
483 return -EIO;
484 }
485
8bb8aefd 486 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
274dcf55
MM
487 slargs.slot_array = bufmap->readdir_index_array;
488 slargs.slot_lock = &bufmap->readdir_index_lock;
489 slargs.slot_wq = &readdir_waitq;
490 ret = wait_for_a_slot(&slargs, buffer_index);
491 if (ret)
8bb8aefd 492 orangefs_bufmap_unref(bufmap);
274dcf55
MM
493 *mapp = bufmap;
494 return ret;
495}
496
8bb8aefd 497void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index)
274dcf55
MM
498{
499 struct slot_args slargs;
500
8bb8aefd 501 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
274dcf55
MM
502 slargs.slot_array = bufmap->readdir_index_array;
503 slargs.slot_lock = &bufmap->readdir_index_lock;
504 slargs.slot_wq = &readdir_waitq;
505 put_back_slot(&slargs, buffer_index);
8bb8aefd 506 orangefs_bufmap_unref(bufmap);
274dcf55
MM
507}
508
b5e376ea
MM
509/*
510 * we've been handed an iovec, we need to copy it to
511 * the shared memory descriptor at "buffer_index".
512 */
8bb8aefd 513int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap,
54804949
MM
514 struct iov_iter *iter,
515 int buffer_index,
516 size_t size)
274dcf55 517{
8bb8aefd 518 struct orangefs_bufmap_desc *to = &bufmap->desc_array[buffer_index];
4d1c4404 519 int i;
274dcf55
MM
520
521 gossip_debug(GOSSIP_BUFMAP_DEBUG,
34204fde 522 "%s: buffer_index:%d: size:%zu:\n",
4d1c4404 523 __func__, buffer_index, size);
274dcf55 524
274dcf55 525
4d1c4404 526 for (i = 0; size; i++) {
34204fde
AV
527 struct page *page = to->page_array[i];
528 size_t n = size;
529 if (n > PAGE_SIZE)
530 n = PAGE_SIZE;
531 n = copy_page_from_iter(page, 0, n, iter);
532 if (!n)
533 return -EFAULT;
534 size -= n;
274dcf55 535 }
34204fde 536 return 0;
274dcf55 537
274dcf55
MM
538}
539
540/*
b5e376ea
MM
541 * we've been handed an iovec, we need to fill it from
542 * the shared memory descriptor at "buffer_index".
274dcf55 543 */
8bb8aefd 544int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap,
4d1c4404 545 struct iov_iter *iter,
5c278228
AV
546 int buffer_index,
547 size_t size)
274dcf55 548{
8bb8aefd 549 struct orangefs_bufmap_desc *from = &bufmap->desc_array[buffer_index];
4d1c4404 550 int i;
274dcf55
MM
551
552 gossip_debug(GOSSIP_BUFMAP_DEBUG,
5c278228
AV
553 "%s: buffer_index:%d: size:%zu:\n",
554 __func__, buffer_index, size);
274dcf55 555
274dcf55 556
5c278228
AV
557 for (i = 0; size; i++) {
558 struct page *page = from->page_array[i];
559 size_t n = size;
560 if (n > PAGE_SIZE)
561 n = PAGE_SIZE;
562 n = copy_page_to_iter(page, 0, n, iter);
563 if (!n)
564 return -EFAULT;
565 size -= n;
274dcf55 566 }
5c278228 567 return 0;
274dcf55 568}