Orangefs: don't keep checking stuff in on Friday afternoon.
[linux-2.6-block.git] / fs / orangefs / orangefs-bufmap.c
CommitLineData
274dcf55
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6#include "protocol.h"
575e9461
MM
7#include "orangefs-kernel.h"
8#include "orangefs-bufmap.h"
274dcf55 9
8bb8aefd 10DECLARE_WAIT_QUEUE_HEAD(orangefs_bufmap_init_waitq);
274dcf55 11
8bb8aefd 12static struct orangefs_bufmap {
274dcf55
MM
13 atomic_t refcnt;
14
15 int desc_size;
16 int desc_shift;
17 int desc_count;
18 int total_size;
19 int page_count;
20
21 struct page **page_array;
8bb8aefd 22 struct orangefs_bufmap_desc *desc_array;
274dcf55
MM
23
24 /* array to track usage of buffer descriptors */
25 int *buffer_index_array;
26 spinlock_t buffer_index_lock;
27
28 /* array to track usage of buffer descriptors for readdir */
8bb8aefd 29 int readdir_index_array[ORANGEFS_READDIR_DEFAULT_DESC_COUNT];
274dcf55 30 spinlock_t readdir_index_lock;
8bb8aefd 31} *__orangefs_bufmap;
274dcf55 32
8bb8aefd 33static DEFINE_SPINLOCK(orangefs_bufmap_lock);
274dcf55
MM
34
35static void
8bb8aefd 36orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
274dcf55
MM
37{
38 int i;
39
40 for (i = 0; i < bufmap->page_count; i++)
41 page_cache_release(bufmap->page_array[i]);
42}
43
44static void
8bb8aefd 45orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
274dcf55
MM
46{
47 kfree(bufmap->page_array);
48 kfree(bufmap->desc_array);
49 kfree(bufmap->buffer_index_array);
50 kfree(bufmap);
51}
52
8bb8aefd 53struct orangefs_bufmap *orangefs_bufmap_ref(void)
274dcf55 54{
8bb8aefd 55 struct orangefs_bufmap *bufmap = NULL;
274dcf55 56
8bb8aefd
YL
57 spin_lock(&orangefs_bufmap_lock);
58 if (__orangefs_bufmap) {
59 bufmap = __orangefs_bufmap;
274dcf55
MM
60 atomic_inc(&bufmap->refcnt);
61 }
8bb8aefd 62 spin_unlock(&orangefs_bufmap_lock);
274dcf55
MM
63 return bufmap;
64}
65
8bb8aefd 66void orangefs_bufmap_unref(struct orangefs_bufmap *bufmap)
274dcf55 67{
8bb8aefd
YL
68 if (atomic_dec_and_lock(&bufmap->refcnt, &orangefs_bufmap_lock)) {
69 __orangefs_bufmap = NULL;
70 spin_unlock(&orangefs_bufmap_lock);
274dcf55 71
8bb8aefd
YL
72 orangefs_bufmap_unmap(bufmap);
73 orangefs_bufmap_free(bufmap);
274dcf55
MM
74 }
75}
76
8bb8aefd 77inline int orangefs_bufmap_size_query(void)
274dcf55 78{
8bb8aefd 79 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
80 int size = bufmap ? bufmap->desc_size : 0;
81
8bb8aefd 82 orangefs_bufmap_unref(bufmap);
274dcf55
MM
83 return size;
84}
85
8bb8aefd 86inline int orangefs_bufmap_shift_query(void)
274dcf55 87{
8bb8aefd 88 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
89 int shift = bufmap ? bufmap->desc_shift : 0;
90
8bb8aefd 91 orangefs_bufmap_unref(bufmap);
274dcf55
MM
92 return shift;
93}
94
95static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
96static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
97
98/*
99 * get_bufmap_init
100 *
101 * If bufmap_init is 1, then the shared memory system, including the
102 * buffer_index_array, is available. Otherwise, it is not.
103 *
104 * returns the value of bufmap_init
105 */
106int get_bufmap_init(void)
107{
8bb8aefd 108 return __orangefs_bufmap ? 1 : 0;
274dcf55
MM
109}
110
111
8bb8aefd
YL
112static struct orangefs_bufmap *
113orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
274dcf55 114{
8bb8aefd 115 struct orangefs_bufmap *bufmap;
274dcf55
MM
116
117 bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
118 if (!bufmap)
119 goto out;
120
121 atomic_set(&bufmap->refcnt, 1);
122 bufmap->total_size = user_desc->total_size;
123 bufmap->desc_count = user_desc->count;
124 bufmap->desc_size = user_desc->size;
125 bufmap->desc_shift = ilog2(bufmap->desc_size);
126
127 spin_lock_init(&bufmap->buffer_index_lock);
128 bufmap->buffer_index_array =
129 kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
130 if (!bufmap->buffer_index_array) {
8bb8aefd 131 gossip_err("orangefs: could not allocate %d buffer indices\n",
274dcf55
MM
132 bufmap->desc_count);
133 goto out_free_bufmap;
134 }
135 spin_lock_init(&bufmap->readdir_index_lock);
136
137 bufmap->desc_array =
8bb8aefd 138 kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
274dcf55
MM
139 GFP_KERNEL);
140 if (!bufmap->desc_array) {
8bb8aefd 141 gossip_err("orangefs: could not allocate %d descriptors\n",
274dcf55
MM
142 bufmap->desc_count);
143 goto out_free_index_array;
144 }
145
146 bufmap->page_count = bufmap->total_size / PAGE_SIZE;
147
148 /* allocate storage to track our page mappings */
149 bufmap->page_array =
150 kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
151 if (!bufmap->page_array)
152 goto out_free_desc_array;
153
154 return bufmap;
155
156out_free_desc_array:
157 kfree(bufmap->desc_array);
158out_free_index_array:
159 kfree(bufmap->buffer_index_array);
160out_free_bufmap:
161 kfree(bufmap);
162out:
163 return NULL;
164}
165
166static int
8bb8aefd
YL
167orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
168 struct ORANGEFS_dev_map_desc *user_desc)
274dcf55
MM
169{
170 int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
171 int offset = 0, ret, i;
172
173 /* map the pages */
16742f2d
AV
174 ret = get_user_pages_fast((unsigned long)user_desc->ptr,
175 bufmap->page_count, 1, bufmap->page_array);
274dcf55
MM
176
177 if (ret < 0)
178 return ret;
179
180 if (ret != bufmap->page_count) {
8bb8aefd 181 gossip_err("orangefs error: asked for %d pages, only got %d.\n",
274dcf55
MM
182 bufmap->page_count, ret);
183
184 for (i = 0; i < ret; i++) {
185 SetPageError(bufmap->page_array[i]);
186 page_cache_release(bufmap->page_array[i]);
187 }
188 return -ENOMEM;
189 }
190
191 /*
192 * ideally we want to get kernel space pointers for each page, but
193 * we can't kmap that many pages at once if highmem is being used.
194 * so instead, we just kmap/kunmap the page address each time the
195 * kaddr is needed.
196 */
197 for (i = 0; i < bufmap->page_count; i++)
198 flush_dcache_page(bufmap->page_array[i]);
199
200 /* build a list of available descriptors */
201 for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
202 bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
203 bufmap->desc_array[i].array_count = pages_per_desc;
204 bufmap->desc_array[i].uaddr =
205 (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
206 offset += pages_per_desc;
207 }
208
209 return 0;
210}
211
212/*
8bb8aefd 213 * orangefs_bufmap_initialize()
274dcf55
MM
214 *
215 * initializes the mapped buffer interface
216 *
217 * returns 0 on success, -errno on failure
218 */
8bb8aefd 219int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
274dcf55 220{
8bb8aefd 221 struct orangefs_bufmap *bufmap;
274dcf55
MM
222 int ret = -EINVAL;
223
224 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 225 "orangefs_bufmap_initialize: called (ptr ("
274dcf55
MM
226 "%p) sz (%d) cnt(%d).\n",
227 user_desc->ptr,
228 user_desc->size,
229 user_desc->count);
230
231 /*
232 * sanity check alignment and size of buffer that caller wants to
233 * work with
234 */
235 if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
236 (unsigned long)user_desc->ptr) {
8bb8aefd 237 gossip_err("orangefs error: memory alignment (front). %p\n",
274dcf55
MM
238 user_desc->ptr);
239 goto out;
240 }
241
242 if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
243 != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
8bb8aefd 244 gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
274dcf55
MM
245 user_desc->ptr,
246 user_desc->total_size);
247 goto out;
248 }
249
250 if (user_desc->total_size != (user_desc->size * user_desc->count)) {
8bb8aefd 251 gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
274dcf55
MM
252 user_desc->total_size,
253 user_desc->size,
254 user_desc->count);
255 goto out;
256 }
257
258 if ((user_desc->size % PAGE_SIZE) != 0) {
8bb8aefd 259 gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
274dcf55
MM
260 user_desc->size);
261 goto out;
262 }
263
264 ret = -ENOMEM;
8bb8aefd 265 bufmap = orangefs_bufmap_alloc(user_desc);
274dcf55
MM
266 if (!bufmap)
267 goto out;
268
8bb8aefd 269 ret = orangefs_bufmap_map(bufmap, user_desc);
274dcf55
MM
270 if (ret)
271 goto out_free_bufmap;
272
273
8bb8aefd
YL
274 spin_lock(&orangefs_bufmap_lock);
275 if (__orangefs_bufmap) {
276 spin_unlock(&orangefs_bufmap_lock);
277 gossip_err("orangefs: error: bufmap already initialized.\n");
274dcf55
MM
278 ret = -EALREADY;
279 goto out_unmap_bufmap;
280 }
8bb8aefd
YL
281 __orangefs_bufmap = bufmap;
282 spin_unlock(&orangefs_bufmap_lock);
274dcf55
MM
283
284 /*
8bb8aefd 285 * If there are operations in orangefs_bufmap_init_waitq, wake them up.
274dcf55
MM
286 * This scenario occurs when the client-core is restarted and I/O
287 * requests in the in-progress or waiting tables are restarted. I/O
288 * requests cannot be restarted until the shared memory system is
289 * completely re-initialized, so we put the I/O requests in this
290 * waitq until initialization has completed. NOTE: the I/O requests
291 * are also on a timer, so they don't wait forever just in case the
292 * client-core doesn't come back up.
293 */
8bb8aefd 294 wake_up_interruptible(&orangefs_bufmap_init_waitq);
274dcf55
MM
295
296 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 297 "orangefs_bufmap_initialize: exiting normally\n");
274dcf55
MM
298 return 0;
299
300out_unmap_bufmap:
8bb8aefd 301 orangefs_bufmap_unmap(bufmap);
274dcf55 302out_free_bufmap:
8bb8aefd 303 orangefs_bufmap_free(bufmap);
274dcf55
MM
304out:
305 return ret;
306}
307
308/*
8bb8aefd 309 * orangefs_bufmap_finalize()
274dcf55
MM
310 *
311 * shuts down the mapped buffer interface and releases any resources
312 * associated with it
313 *
314 * no return value
315 */
8bb8aefd 316void orangefs_bufmap_finalize(void)
274dcf55 317{
8bb8aefd
YL
318 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
319 BUG_ON(!__orangefs_bufmap);
320 orangefs_bufmap_unref(__orangefs_bufmap);
274dcf55 321 gossip_debug(GOSSIP_BUFMAP_DEBUG,
8bb8aefd 322 "orangefs_bufmap_finalize: exiting normally\n");
274dcf55
MM
323}
324
325struct slot_args {
326 int slot_count;
327 int *slot_array;
328 spinlock_t *slot_lock;
329 wait_queue_head_t *slot_wq;
330};
331
332static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
333{
334 int ret = -1;
335 int i = 0;
336 DECLARE_WAITQUEUE(my_wait, current);
337
338
339 add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
340
341 while (1) {
342 set_current_state(TASK_INTERRUPTIBLE);
343
344 /*
345 * check for available desc, slot_lock is the appropriate
346 * index_lock
347 */
348 spin_lock(slargs->slot_lock);
349 for (i = 0; i < slargs->slot_count; i++)
350 if (slargs->slot_array[i] == 0) {
351 slargs->slot_array[i] = 1;
352 *buffer_index = i;
353 ret = 0;
354 break;
355 }
356 spin_unlock(slargs->slot_lock);
357
358 /* if we acquired a buffer, then break out of while */
359 if (ret == 0)
360 break;
361
362 if (!signal_pending(current)) {
363 int timeout =
364 MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
365 gossip_debug(GOSSIP_BUFMAP_DEBUG,
366 "[BUFMAP]: waiting %d "
367 "seconds for a slot\n",
368 slot_timeout_secs);
369 if (!schedule_timeout(timeout)) {
370 gossip_debug(GOSSIP_BUFMAP_DEBUG,
371 "*** wait_for_a_slot timed out\n");
372 ret = -ETIMEDOUT;
373 break;
374 }
375 gossip_debug(GOSSIP_BUFMAP_DEBUG,
376 "[BUFMAP]: woken up by a slot becoming available.\n");
377 continue;
378 }
379
8bb8aefd 380 gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs: %s interrupted.\n",
274dcf55
MM
381 __func__);
382 ret = -EINTR;
383 break;
384 }
385
386 set_current_state(TASK_RUNNING);
387 remove_wait_queue(slargs->slot_wq, &my_wait);
388 return ret;
389}
390
391static void put_back_slot(struct slot_args *slargs, int buffer_index)
392{
393 /* slot_lock is the appropriate index_lock */
394 spin_lock(slargs->slot_lock);
395 if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
396 spin_unlock(slargs->slot_lock);
397 return;
398 }
399
400 /* put the desc back on the queue */
401 slargs->slot_array[buffer_index] = 0;
402 spin_unlock(slargs->slot_lock);
403
404 /* wake up anyone who may be sleeping on the queue */
405 wake_up_interruptible(slargs->slot_wq);
406}
407
408/*
8bb8aefd 409 * orangefs_bufmap_get()
274dcf55
MM
410 *
411 * gets a free mapped buffer descriptor, will sleep until one becomes
412 * available if necessary
413 *
414 * returns 0 on success, -errno on failure
415 */
8bb8aefd 416int orangefs_bufmap_get(struct orangefs_bufmap **mapp, int *buffer_index)
274dcf55 417{
8bb8aefd 418 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
419 struct slot_args slargs;
420 int ret;
421
422 if (!bufmap) {
8bb8aefd 423 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
274dcf55
MM
424 return -EIO;
425 }
426
427 slargs.slot_count = bufmap->desc_count;
428 slargs.slot_array = bufmap->buffer_index_array;
429 slargs.slot_lock = &bufmap->buffer_index_lock;
430 slargs.slot_wq = &bufmap_waitq;
431 ret = wait_for_a_slot(&slargs, buffer_index);
432 if (ret)
8bb8aefd 433 orangefs_bufmap_unref(bufmap);
274dcf55
MM
434 *mapp = bufmap;
435 return ret;
436}
437
438/*
8bb8aefd 439 * orangefs_bufmap_put()
274dcf55
MM
440 *
441 * returns a mapped buffer descriptor to the collection
442 *
443 * no return value
444 */
8bb8aefd 445void orangefs_bufmap_put(struct orangefs_bufmap *bufmap, int buffer_index)
274dcf55
MM
446{
447 struct slot_args slargs;
448
449 slargs.slot_count = bufmap->desc_count;
450 slargs.slot_array = bufmap->buffer_index_array;
451 slargs.slot_lock = &bufmap->buffer_index_lock;
452 slargs.slot_wq = &bufmap_waitq;
453 put_back_slot(&slargs, buffer_index);
8bb8aefd 454 orangefs_bufmap_unref(bufmap);
274dcf55
MM
455}
456
457/*
458 * readdir_index_get()
459 *
460 * gets a free descriptor, will sleep until one becomes
461 * available if necessary.
462 * Although the readdir buffers are not mapped into kernel space
463 * we could do that at a later point of time. Regardless, these
464 * indices are used by the client-core.
465 *
466 * returns 0 on success, -errno on failure
467 */
8bb8aefd 468int readdir_index_get(struct orangefs_bufmap **mapp, int *buffer_index)
274dcf55 469{
8bb8aefd 470 struct orangefs_bufmap *bufmap = orangefs_bufmap_ref();
274dcf55
MM
471 struct slot_args slargs;
472 int ret;
473
474 if (!bufmap) {
8bb8aefd 475 gossip_err("orangefs: please confirm that pvfs2-client daemon is running.\n");
274dcf55
MM
476 return -EIO;
477 }
478
8bb8aefd 479 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
274dcf55
MM
480 slargs.slot_array = bufmap->readdir_index_array;
481 slargs.slot_lock = &bufmap->readdir_index_lock;
482 slargs.slot_wq = &readdir_waitq;
483 ret = wait_for_a_slot(&slargs, buffer_index);
484 if (ret)
8bb8aefd 485 orangefs_bufmap_unref(bufmap);
274dcf55
MM
486 *mapp = bufmap;
487 return ret;
488}
489
8bb8aefd 490void readdir_index_put(struct orangefs_bufmap *bufmap, int buffer_index)
274dcf55
MM
491{
492 struct slot_args slargs;
493
8bb8aefd 494 slargs.slot_count = ORANGEFS_READDIR_DEFAULT_DESC_COUNT;
274dcf55
MM
495 slargs.slot_array = bufmap->readdir_index_array;
496 slargs.slot_lock = &bufmap->readdir_index_lock;
497 slargs.slot_wq = &readdir_waitq;
498 put_back_slot(&slargs, buffer_index);
8bb8aefd 499 orangefs_bufmap_unref(bufmap);
274dcf55
MM
500}
501
8bb8aefd 502int orangefs_bufmap_copy_from_iovec(struct orangefs_bufmap *bufmap,
54804949
MM
503 struct iov_iter *iter,
504 int buffer_index,
505 size_t size)
274dcf55 506{
8bb8aefd 507 struct orangefs_bufmap_desc *to = &bufmap->desc_array[buffer_index];
4d1c4404 508 int i;
274dcf55
MM
509
510 gossip_debug(GOSSIP_BUFMAP_DEBUG,
34204fde 511 "%s: buffer_index:%d: size:%zu:\n",
4d1c4404 512 __func__, buffer_index, size);
274dcf55 513
274dcf55 514
4d1c4404 515 for (i = 0; size; i++) {
34204fde
AV
516 struct page *page = to->page_array[i];
517 size_t n = size;
518 if (n > PAGE_SIZE)
519 n = PAGE_SIZE;
520 n = copy_page_from_iter(page, 0, n, iter);
521 if (!n)
522 return -EFAULT;
523 size -= n;
274dcf55 524 }
34204fde 525 return 0;
274dcf55 526
274dcf55
MM
527}
528
529/*
4d1c4404
MM
530 * Iterate through the array of pages containing the bytes from
531 * a file being read.
274dcf55 532 *
274dcf55 533 */
8bb8aefd 534int orangefs_bufmap_copy_to_iovec(struct orangefs_bufmap *bufmap,
4d1c4404 535 struct iov_iter *iter,
5c278228
AV
536 int buffer_index,
537 size_t size)
274dcf55 538{
8bb8aefd 539 struct orangefs_bufmap_desc *from = &bufmap->desc_array[buffer_index];
4d1c4404 540 int i;
274dcf55
MM
541
542 gossip_debug(GOSSIP_BUFMAP_DEBUG,
5c278228
AV
543 "%s: buffer_index:%d: size:%zu:\n",
544 __func__, buffer_index, size);
274dcf55 545
274dcf55 546
5c278228
AV
547 for (i = 0; size; i++) {
548 struct page *page = from->page_array[i];
549 size_t n = size;
550 if (n > PAGE_SIZE)
551 n = PAGE_SIZE;
552 n = copy_page_to_iter(page, 0, n, iter);
553 if (!n)
554 return -EFAULT;
555 size -= n;
274dcf55 556 }
5c278228 557 return 0;
274dcf55 558}