2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
7 #include "orangefs-kernel.h"
8 #include "orangefs-bufmap.h"
10 DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq);
12 static struct orangefs_bufmap {
21 struct page **page_array;
22 struct orangefs_bufmap_desc *desc_array;
24 /* array to track usage of buffer descriptors */
25 int *buffer_index_array;
26 spinlock_t buffer_index_lock;
28 /* array to track usage of buffer descriptors for readdir */
29 int readdir_index_array[ORANGEFS_READDIR_DEFAULT_DESC_COUNT];
30 spinlock_t readdir_index_lock;
33 static DEFINE_SPINLOCK(orangefs_bufmap_lock);
36 orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
40 for (i = 0; i < bufmap->page_count; i++)
41 page_cache_release(bufmap->page_array[i]);
45 orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
47 kfree(bufmap->page_array);
48 kfree(bufmap->desc_array);
49 kfree(bufmap->buffer_index_array);
53 struct orangefs_bufmap *orangefs_bufmap_ref(void)
55 struct orangefs_bufmap *bufmap = NULL;
57 spin_lock(&orangefs_bufmap_lock);
58 if (__orangefs_bufmap) {
59 bufmap = __orangefs_bufmap;
60 atomic_inc(&bufmap->refcnt);
62 spin_unlock(&orangefs_bufmap_lock);
66 void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap)
68 if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) {
69 __orangefs_bufmap = NULL;
70 spin_unlock(&orangefs_bufmap_lock);
72 orangefs_bufmap_unmap(bufmap);
73 orangefs_bufmap_free(bufmap);
77 inline int orangefs_bufmap_size_query(void)
79 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
80 int size = bufmap ? bufmap->desc_size : 0;
82 orangefs_bufmap_unref(bufmap);
86 inline int orangefs_bufmap_shift_query(void)
88 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
89 int shift = bufmap ? bufmap->desc_shift : 0;
91 orangefs_bufmap_unref(bufmap);
95 static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96 static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
101 * If bufmap_init is 1, then the shared memory system, including the
102 * buffer_index_array, is available. Otherwise, it is not.
104 * returns the value of bufmap_init
106 int get_bufmap_init(void)
108 return __orangefs_bufmap ? 1 : 0;
112 static struct orangefs_bufmap *
113 orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
115 struct orangefs_bufmap *bufmap;
117 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
121 atomic_set(&bufmap->refcnt, 1);
122 bufmap->total_size = user_desc->total_size;
123 bufmap->desc_count = user_desc->count;
124 bufmap->desc_size = user_desc->size;
125 bufmap->desc_shift = ilog2(bufmap->desc_size);
127 spin_lock_init(&bufmap->buffer_index_lock);
128 bufmap->buffer_index_array =
129 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130 if (!bufmap->buffer_index_array) {
131 gossip_err("orangefs: could not allocate %d buffer indices\n",
133 goto out_free_bufmap;
135 spin_lock_init(&bufmap->readdir_index_lock);
138 kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
140 if (!bufmap->desc_array) {
141 gossip_err("orangefs: could not allocate %d descriptors\n",
143 goto out_free_index_array;
146 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
148 /* allocate storage to track our page mappings */
150 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151 if (!bufmap->page_array)
152 goto out_free_desc_array;
157 kfree(bufmap->desc_array);
158 out_free_index_array:
159 kfree(bufmap->buffer_index_array);
167 orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
168 struct ORANGEFS_dev_map_desc *user_desc)
170 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171 int offset = 0, ret, i;
174 ret = get_user_pages_fast((unsigned long)user_desc->ptr,
175 bufmap->page_count, 1, bufmap->page_array);
180 if (ret != bufmap->page_count) {
181 gossip_err("orangefs error: asked for %d pages, only got %d.\n",
182 bufmap->page_count, ret);
184 for (i = 0; i < ret; i++) {
185 SetPageError(bufmap->page_array[i]);
186 page_cache_release(bufmap->page_array[i]);
192 * ideally we want to get kernel space pointers for each page, but
193 * we can't kmap that many pages at once if highmem is being used.
194 * so instead, we just kmap/kunmap the page address each time the
197 for (i = 0; i < bufmap->page_count; i++)
198 flush_dcache_page(bufmap->page_array[i]);
200 /* build a list of available descriptors */
201 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
202 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
203 bufmap->desc_array[i].array_count = pages_per_desc;
204 bufmap->desc_array[i].uaddr =
205 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
206 offset += pages_per_desc;
213 * orangefs_bufmap_initialize()
215 * initializes the mapped buffer interface
217 * returns 0 on success, -errno on failure
219 int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
221 struct orangefs_bufmap *bufmap;
224 gossip_debug(GOSSIP_BUFMAP_DEBUG,
225 "orangefs_bufmap_initialize: called (ptr ("
226 "%p) sz (%d) cnt(%d).\n",
232 * sanity check alignment and size of buffer that caller wants to
235 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
236 (unsigned long)user_desc->ptr) {
237 gossip_err("orangefs error: memory alignment (front). %p\n",
242 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
243 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
244 gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
246 user_desc->total_size);
250 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
251 gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
252 user_desc->total_size,
258 if ((user_desc->size % PAGE_SIZE) != 0) {
259 gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
265 bufmap = orangefs_bufmap_alloc(user_desc);
269 ret = orangefs_bufmap_map(bufmap, user_desc);
271 goto out_free_bufmap;
274 spin_lock(&orangefs_bufmap_lock);
275 if (__orangefs_bufmap) {
276 spin_unlock(&orangefs_bufmap_lock);
277 gossip_err("orangefs: error: bufmap already initialized.\n");
279 goto out_unmap_bufmap;
281 __orangefs_bufmap = bufmap;
282 spin_unlock(&orangefs_bufmap_lock);
285 * If there are operations in orangefs_bufmap_init_waitq, wake them up.
286 * This scenario occurs when the client-core is restarted and I/O
287 * requests in the in-progress or waiting tables are restarted. I/O
288 * requests cannot be restarted until the shared memory system is
289 * completely re-initialized, so we put the I/O requests in this
290 * waitq until initialization has completed. NOTE: the I/O requests
291 * are also on a timer, so they don't wait forever just in case the
292 * client-core doesn't come back up.
294 wake_up_interruptible(&orangefs_bufmap_init_waitq);
296 gossip_debug(GOSSIP_BUFMAP_DEBUG,
297 "orangefs_bufmap_initialize: exiting normally\n");
301 orangefs_bufmap_unmap(bufmap);
303 orangefs_bufmap_free(bufmap);
309 * orangefs_bufmap_finalize()
311 * shuts down the mapped buffer interface and releases any resources
316 void orangefs_bufmap_finalize(void)
318 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
319 BUG_ON(!__orangefs_bufmap);
320 orangefs_bufmap_unref(__orangefs_bufmap);
321 gossip_debug(GOSSIP_BUFMAP_DEBUG,
322 "orangefs_bufmap_finalize: exiting normally\n");
328 spinlock_t *slot_lock;
329 wait_queue_head_t *slot_wq;
332 static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
336 DEFINE_WAIT(wait_entry);
340 * check for available desc, slot_lock is the appropriate
343 spin_lock(slargs->slot_lock);
344 prepare_to_wait_exclusive(slargs->slot_wq,
347 for (i = 0; i < slargs->slot_count; i++)
348 if (slargs->slot_array[i] == 0) {
349 slargs->slot_array[i] = 1;
354 spin_unlock(slargs->slot_lock);
356 /* if we acquired a buffer, then break out of while */
360 if (!signal_pending(current)) {
362 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
363 gossip_debug(GOSSIP_BUFMAP_DEBUG,
364 "[BUFMAP]: waiting %d "
365 "seconds for a slot\n",
367 if (!schedule_timeout(timeout)) {
368 gossip_debug(GOSSIP_BUFMAP_DEBUG,
369 "*** wait_for_a_slot timed out\n");
373 gossip_debug(GOSSIP_BUFMAP_DEBUG,
374 "[BUFMAP]: woken up by a slot becoming available.\n");
378 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs: %s interrupted.\n",
384 spin_lock(slargs->slot_lock);
385 finish_wait(slargs->slot_wq, &wait_entry);
386 spin_unlock(slargs->slot_lock);
390 static void put_back_slot(struct slot_args *slargs, int buffer_index)
392 /* slot_lock is the appropriate index_lock */
393 spin_lock(slargs->slot_lock);
394 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
395 spin_unlock(slargs->slot_lock);
399 /* put the desc back on the queue */
400 slargs->slot_array[buffer_index] = 0;
401 spin_unlock(slargs->slot_lock);
403 /* wake up anyone who may be sleeping on the queue */
404 wake_up_interruptible(slargs->slot_wq);
408 * orangefs_bufmap_get()
410 * gets a free mapped buffer descriptor, will sleep until one becomes
411 * available if necessary
413 * returns 0 on success, -errno on failure
415 int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index)
417 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
418 struct slot_args slargs;
422 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
426 slargs.slot_count = bufmap->desc_count;
427 slargs.slot_array = bufmap->buffer_index_array;
428 slargs.slot_lock = &bufmap->buffer_index_lock;
429 slargs.slot_wq = &bufmap_waitq;
430 ret = wait_for_a_slot(&slargs, buffer_index);
432 orangefs_bufmap_unref(bufmap);
438 * orangefs_bufmap_put()
440 * returns a mapped buffer descriptor to the collection
444 void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index)
446 struct slot_args slargs;
448 slargs.slot_count = bufmap->desc_count;
449 slargs.slot_array = bufmap->buffer_index_array;
450 slargs.slot_lock = &bufmap->buffer_index_lock;
451 slargs.slot_wq = &bufmap_waitq;
452 put_back_slot(&slargs, buffer_index);
453 orangefs_bufmap_unref(bufmap);
457 * readdir_index_get()
459 * gets a free descriptor, will sleep until one becomes
460 * available if necessary.
461 * Although the readdir buffers are not mapped into kernel space
462 * we could do that at a later point of time. Regardless, these
463 * indices are used by the client-core.
465 * returns 0 on success, -errno on failure
467 int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index)
469 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
470 struct slot_args slargs;
474 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
478 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
479 slargs.slot_array = bufmap->readdir_index_array;
480 slargs.slot_lock = &bufmap->readdir_index_lock;
481 slargs.slot_wq = &readdir_waitq;
482 ret = wait_for_a_slot(&slargs, buffer_index);
484 orangefs_bufmap_unref(bufmap);
489 void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index)
491 struct slot_args slargs;
493 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
494 slargs.slot_array = bufmap->readdir_index_array;
495 slargs.slot_lock = &bufmap->readdir_index_lock;
496 slargs.slot_wq = &readdir_waitq;
497 put_back_slot(&slargs, buffer_index);
498 orangefs_bufmap_unref(bufmap);
502 * we've been handed an iovec, we need to copy it to
503 * the shared memory descriptor at "buffer_index".
505 int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap,
506 struct iov_iter *iter,
510 struct orangefs_bufmap_desc *to = &bufmap->desc_array[buffer_index];
513 gossip_debug(GOSSIP_BUFMAP_DEBUG,
514 "%s: buffer_index:%d: size:%zu:\n",
515 __func__, buffer_index, size);
518 for (i = 0; size; i++) {
519 struct page *page = to->page_array[i];
523 n = copy_page_from_iter(page, 0, n, iter);
533 * we've been handed an iovec, we need to fill it from
534 * the shared memory descriptor at "buffer_index".
536 int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap,
537 struct iov_iter *iter,
541 struct orangefs_bufmap_desc *from = &bufmap->desc_array[buffer_index];
544 gossip_debug(GOSSIP_BUFMAP_DEBUG,
545 "%s: buffer_index:%d: size:%zu:\n",
546 __func__, buffer_index, size);
549 for (i = 0; size; i++) {
550 struct page *page = from->page_array[i];
554 n = copy_page_to_iter(page, 0, n, iter);