Orangefs: do not finalize bufmap if it was never initialized.
[linux-2.6-block.git] / fs / orangefs / devorangefs-req.c
CommitLineData
5db11c21
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * Changes by Acxiom Corporation to add protocol version to kernel
5 * communication, Copyright Acxiom Corporation, 2005.
6 *
7 * See COPYING in top-level directory.
8 */
9
10#include "protocol.h"
575e9461
MM
11#include "orangefs-kernel.h"
12#include "orangefs-dev-proto.h"
13#include "orangefs-bufmap.h"
5db11c21
MM
14
15#include <linux/debugfs.h>
16#include <linux/slab.h>
17
18/* this file implements the /dev/pvfs2-req device node */
19
20static int open_access_count;
21
22#define DUMP_DEVICE_ERROR() \
23do { \
24 gossip_err("*****************************************************\n");\
8bb8aefd 25 gossip_err("ORANGEFS Device Error: You cannot open the device file "); \
5db11c21 26 gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \
8bb8aefd 27 "are no ", ORANGEFS_REQDEVICE_NAME); \
5db11c21
MM
28 gossip_err("instances of a program using this device\ncurrently " \
29 "running. (You must verify this!)\n"); \
30 gossip_err("For example, you can use the lsof program as follows:\n");\
31 gossip_err("'lsof | grep %s' (run this as root)\n", \
8bb8aefd 32 ORANGEFS_REQDEVICE_NAME); \
5db11c21
MM
33 gossip_err(" open_access_count = %d\n", open_access_count); \
34 gossip_err("*****************************************************\n");\
35} while (0)
36
37static int hash_func(__u64 tag, int table_size)
38{
2c590d5f 39 return do_div(tag, (unsigned int)table_size);
5db11c21
MM
40}
41
8bb8aefd 42static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
5db11c21
MM
43{
44 int index = hash_func(op->tag, hash_table_size);
45
46 spin_lock(&htable_ops_in_progress_lock);
47 list_add_tail(&op->list, &htable_ops_in_progress[index]);
48 spin_unlock(&htable_ops_in_progress_lock);
49}
50
8bb8aefd 51static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
5db11c21 52{
8bb8aefd 53 struct orangefs_kernel_op_s *op, *next;
5db11c21
MM
54 int index;
55
56 index = hash_func(tag, hash_table_size);
57
58 spin_lock(&htable_ops_in_progress_lock);
59 list_for_each_entry_safe(op,
60 next,
61 &htable_ops_in_progress[index],
62 list) {
63 if (op->tag == tag) {
64 list_del(&op->list);
65 spin_unlock(&htable_ops_in_progress_lock);
66 return op;
67 }
68 }
69
70 spin_unlock(&htable_ops_in_progress_lock);
71 return NULL;
72}
73
8bb8aefd 74static int orangefs_devreq_open(struct inode *inode, struct file *file)
5db11c21
MM
75{
76 int ret = -EINVAL;
77
78 if (!(file->f_flags & O_NONBLOCK)) {
97f10027
MM
79 gossip_err("%s: device cannot be opened in blocking mode\n",
80 __func__);
5db11c21
MM
81 goto out;
82 }
83 ret = -EACCES;
97f10027 84 gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
5db11c21
MM
85 mutex_lock(&devreq_mutex);
86
87 if (open_access_count == 0) {
88 ret = generic_file_open(inode, file);
89 if (ret == 0)
90 open_access_count++;
91 } else {
92 DUMP_DEVICE_ERROR();
93 }
94 mutex_unlock(&devreq_mutex);
95
96out:
97
98 gossip_debug(GOSSIP_DEV_DEBUG,
99 "pvfs2-client-core: open device complete (ret = %d)\n",
100 ret);
101 return ret;
102}
103
97f10027 104/* Function for read() callers into the device */
8bb8aefd 105static ssize_t orangefs_devreq_read(struct file *file,
5db11c21
MM
106 char __user *buf,
107 size_t count, loff_t *offset)
108{
8bb8aefd
YL
109 struct orangefs_kernel_op_s *op, *temp;
110 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
111 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
112 struct orangefs_kernel_op_s *cur_op = NULL;
24c8d080 113 unsigned long ret;
5db11c21 114
24c8d080 115 /* We do not support blocking IO. */
5db11c21 116 if (!(file->f_flags & O_NONBLOCK)) {
97f10027
MM
117 gossip_err("%s: blocking read from client-core.\n",
118 __func__);
5db11c21 119 return -EINVAL;
24c8d080
MB
120 }
121
122 /*
123 * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then
124 * always read with that size buffer.
125 */
126 if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) {
127 gossip_err("orangefs: client-core tried to read wrong size\n");
128 return -EINVAL;
129 }
130
131 /* Get next op (if any) from top of list. */
8bb8aefd
YL
132 spin_lock(&orangefs_request_list_lock);
133 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
24c8d080
MB
134 __s32 fsid;
135 /* This lock is held past the end of the loop when we break. */
136 spin_lock(&op->lock);
137
138 fsid = fsid_of_op(op);
8bb8aefd 139 if (fsid != ORANGEFS_FS_ID_NULL) {
24c8d080
MB
140 int ret;
141 /* Skip ops whose filesystem needs to be mounted. */
142 ret = fs_mount_pending(fsid);
143 if (ret == 1) {
5db11c21 144 gossip_debug(GOSSIP_DEV_DEBUG,
24c8d080
MB
145 "orangefs: skipping op tag %llu %s\n",
146 llu(op->tag), get_opname_string(op));
147 spin_unlock(&op->lock);
148 continue;
97f10027
MM
149 /*
150 * Skip ops whose filesystem we don't know about unless
151 * it is being mounted.
152 */
24c8d080
MB
153 /* XXX: is there a better way to detect this? */
154 } else if (ret == -1 &&
97f10027
MM
155 !(op->upcall.type ==
156 ORANGEFS_VFS_OP_FS_MOUNT ||
157 op->upcall.type ==
158 ORANGEFS_VFS_OP_GETATTR)) {
24c8d080
MB
159 gossip_debug(GOSSIP_DEV_DEBUG,
160 "orangefs: skipping op tag %llu %s\n",
161 llu(op->tag), get_opname_string(op));
162 gossip_err(
163 "orangefs: ERROR: fs_mount_pending %d\n",
164 fsid);
165 spin_unlock(&op->lock);
5db11c21 166 continue;
5db11c21
MM
167 }
168 }
24c8d080
MB
169 /*
170 * Either this op does not pertain to a filesystem, is mounting
171 * a filesystem, or pertains to a mounted filesystem. Let it
172 * through.
173 */
174 cur_op = op;
175 break;
176 }
177
178 /*
179 * At this point we either have a valid op and can continue or have not
180 * found an op and must ask the client to try again later.
181 */
182 if (!cur_op) {
8bb8aefd 183 spin_unlock(&orangefs_request_list_lock);
24c8d080 184 return -EAGAIN;
5db11c21
MM
185 }
186
24c8d080
MB
187 gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n",
188 llu(cur_op->tag), get_opname_string(cur_op));
5db11c21 189
24c8d080
MB
190 /*
191 * Such an op should never be on the list in the first place. If so, we
192 * will abort.
193 */
194 if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
195 gossip_err("orangefs: ERROR: Current op already queued.\n");
196 list_del(&cur_op->list);
5db11c21 197 spin_unlock(&cur_op->lock);
8bb8aefd 198 spin_unlock(&orangefs_request_list_lock);
24c8d080 199 return -EAGAIN;
5db11c21 200 }
24c8d080
MB
201
202 /*
203 * Set the operation to be in progress and move it between lists since
204 * it has been sent to the client.
205 */
206 set_op_state_inprogress(cur_op);
207
208 list_del(&cur_op->list);
8bb8aefd
YL
209 spin_unlock(&orangefs_request_list_lock);
210 orangefs_devreq_add_op(cur_op);
24c8d080
MB
211 spin_unlock(&cur_op->lock);
212
213 /* Push the upcall out. */
214 ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
215 if (ret != 0)
216 goto error;
217 ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
218 if (ret != 0)
219 goto error;
220 ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
221 if (ret != 0)
222 goto error;
223 ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
8bb8aefd 224 sizeof(struct orangefs_upcall_s));
24c8d080
MB
225 if (ret != 0)
226 goto error;
227
228 /* The client only asks to read one size buffer. */
229 return MAX_ALIGNED_DEV_REQ_UPSIZE;
230error:
231 /*
232 * We were unable to copy the op data to the client. Put the op back in
233 * list. If client has crashed, the op will be purged later when the
234 * device is released.
235 */
236 gossip_err("orangefs: Failed to copy data to user space\n");
8bb8aefd 237 spin_lock(&orangefs_request_list_lock);
24c8d080
MB
238 spin_lock(&cur_op->lock);
239 set_op_state_waiting(cur_op);
8bb8aefd
YL
240 orangefs_devreq_remove_op(cur_op->tag);
241 list_add(&cur_op->list, &orangefs_request_list);
24c8d080 242 spin_unlock(&cur_op->lock);
8bb8aefd 243 spin_unlock(&orangefs_request_list_lock);
24c8d080 244 return -EFAULT;
5db11c21
MM
245}
246
97f10027
MM
247/*
248 * Function for writev() callers into the device. Readdir related
249 * operations have an extra iovec containing info about objects
250 * contained in directories.
251 */
8bb8aefd 252static ssize_t orangefs_devreq_writev(struct file *file,
5db11c21
MM
253 const struct iovec *iov,
254 size_t count,
255 loff_t *offset)
256{
8bb8aefd 257 struct orangefs_kernel_op_s *op = NULL;
5db11c21
MM
258 void *buffer = NULL;
259 void *ptr = NULL;
260 unsigned long i = 0;
97f10027
MM
261 int num_remaining = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
262 int ret = 0;
263 /* num elements in iovec without trailer */
264 int notrailer_count = 4;
265 /*
266 * If there's a trailer, its iov index will be equal to
267 * notrailer_count.
268 */
269 int trailer_index = notrailer_count;
5db11c21 270 int payload_size = 0;
97f10027 271 int returned_downcall_size = 0;
5db11c21
MM
272 __s32 magic = 0;
273 __s32 proto_ver = 0;
274 __u64 tag = 0;
275 ssize_t total_returned_size = 0;
276
97f10027
MM
277 /*
278 * There will always be at least notrailer_count iovecs, and
279 * when there's a trailer, one more than notrailer_count. Check
280 * count's sanity.
281 */
5db11c21 282 if (count != notrailer_count && count != (notrailer_count + 1)) {
97f10027
MM
283 gossip_err("%s: count:%zu: notrailer_count :%d:\n",
284 __func__,
5db11c21
MM
285 count,
286 notrailer_count);
287 return -EPROTO;
288 }
97f10027
MM
289
290
291 /* Copy the non-trailer iovec data into a device request buffer. */
5db11c21 292 buffer = dev_req_alloc();
97f10027
MM
293 if (!buffer) {
294 gossip_err("%s: dev_req_alloc failed.\n", __func__);
5db11c21 295 return -ENOMEM;
97f10027 296 }
5db11c21 297 ptr = buffer;
5db11c21
MM
298 for (i = 0; i < notrailer_count; i++) {
299 if (iov[i].iov_len > num_remaining) {
300 gossip_err
301 ("writev error: Freeing buffer and returning\n");
302 dev_req_release(buffer);
303 return -EMSGSIZE;
304 }
305 ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
306 if (ret) {
307 gossip_err("Failed to copy data from user space\n");
308 dev_req_release(buffer);
309 return -EIO;
310 }
311 num_remaining -= iov[i].iov_len;
312 ptr += iov[i].iov_len;
313 payload_size += iov[i].iov_len;
314 }
315 total_returned_size = payload_size;
316
317 /* these elements are currently 8 byte aligned (8 bytes for (version +
318 * magic) 8 bytes for tag). If you add another element, either
319 * make it 8 bytes big, or use get_unaligned when asigning.
320 */
321 ptr = buffer;
97f10027 322 proto_ver = *((__s32 *) ptr); /* unused */
5db11c21
MM
323 ptr += sizeof(__s32);
324
325 magic = *((__s32 *) ptr);
326 ptr += sizeof(__s32);
327
328 tag = *((__u64 *) ptr);
329 ptr += sizeof(__u64);
330
8bb8aefd 331 if (magic != ORANGEFS_DEVREQ_MAGIC) {
5db11c21
MM
332 gossip_err("Error: Device magic number does not match.\n");
333 dev_req_release(buffer);
334 return -EPROTO;
335 }
336
8bb8aefd 337 op = orangefs_devreq_remove_op(tag);
5db11c21
MM
338 if (op) {
339 /* Increase ref count! */
340 get_op(op);
97f10027
MM
341
342 /* calculate the size of the returned downcall. */
343 returned_downcall_size =
344 payload_size - (2 * sizeof(__s32) + sizeof(__u64));
345
346 /* copy the passed in downcall into the op */
347 if (returned_downcall_size ==
348 sizeof(struct orangefs_downcall_s)) {
5db11c21
MM
349 memcpy(&op->downcall,
350 ptr,
8bb8aefd 351 sizeof(struct orangefs_downcall_s));
97f10027
MM
352 } else {
353 gossip_err("%s: returned downcall size:%d: \n",
354 __func__,
355 returned_downcall_size);
356 dev_req_release(buffer);
357 put_op(op);
358 return -EMSGSIZE;
359 }
360
361 /* Don't tolerate an unexpected trailer iovec. */
362 if ((op->downcall.trailer_size == 0) &&
363 (count != notrailer_count)) {
364 gossip_err("%s: unexpected trailer iovec.\n",
365 __func__);
366 dev_req_release(buffer);
367 put_op(op);
368 return -EPROTO;
369 }
370
371 /* Don't consider the trailer if there's a bad status. */
372 if (op->downcall.status != 0)
373 goto no_trailer;
374
375 /* get the trailer if there is one. */
376 if (op->downcall.trailer_size == 0)
377 goto no_trailer;
378
379 gossip_debug(GOSSIP_DEV_DEBUG,
380 "%s: op->downcall.trailer_size %lld\n",
381 __func__,
382 op->downcall.trailer_size);
5db11c21 383
97f10027
MM
384 /*
385 * Bail if we think think there should be a trailer, but
386 * there's no iovec for it.
5db11c21 387 */
97f10027
MM
388 if (count != (notrailer_count + 1)) {
389 gossip_err("%s: trailer_size:%lld: count:%zu:\n",
390 __func__,
391 op->downcall.trailer_size,
392 count);
393 dev_req_release(buffer);
394 put_op(op);
395 return -EPROTO;
396 }
397
398 /* Verify that trailer_size is accurate. */
399 if (op->downcall.trailer_size != iov[trailer_index].iov_len) {
400 gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n",
401 __func__,
402 op->downcall.trailer_size,
403 iov[trailer_index].iov_len);
404 dev_req_release(buffer);
405 put_op(op);
406 return -EMSGSIZE;
407 }
408
409 total_returned_size += iov[trailer_index].iov_len;
410
411 /*
412 * Allocate a buffer, copy the trailer bytes into it and
413 * attach it to the downcall.
414 */
415 op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len);
416 if (op->downcall.trailer_buf != NULL) {
417 gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
418 op->downcall.trailer_buf);
419 ret = copy_from_user(op->downcall.trailer_buf,
420 iov[trailer_index].iov_base,
421 iov[trailer_index].iov_len);
422 if (ret) {
423 gossip_err("%s: Failed to copy trailer.\n",
424 __func__);
5db11c21 425 dev_req_release(buffer);
97f10027
MM
426 gossip_debug(GOSSIP_DEV_DEBUG,
427 "vfree: %p\n",
5db11c21 428 op->downcall.trailer_buf);
97f10027
MM
429 vfree(op->downcall.trailer_buf);
430 op->downcall.trailer_buf = NULL;
431 put_op(op);
432 return -EIO;
5db11c21 433 }
97f10027 434 } else {
97f10027
MM
435 gossip_err("writev: could not vmalloc for trailer!\n");
436 dev_req_release(buffer);
437 put_op(op);
438 return -ENOMEM;
5db11c21
MM
439 }
440
97f10027
MM
441no_trailer:
442
443 /* if this operation is an I/O operation we need to wait
5db11c21
MM
444 * for all data to be copied before we can return to avoid
445 * buffer corruption and races that can pull the buffers
446 * out from under us.
447 *
448 * Essentially we're synchronizing with other parts of the
449 * vfs implicitly by not allowing the user space
450 * application reading/writing this device to return until
451 * the buffers are done being used.
452 */
97f10027 453 if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) {
5db11c21 454 int timed_out = 0;
ce6c414e 455 DEFINE_WAIT(wait_entry);
5db11c21 456
97f10027
MM
457 /*
458 * tell the vfs op waiting on a waitqueue
5db11c21
MM
459 * that this op is done
460 */
461 spin_lock(&op->lock);
462 set_op_state_serviced(op);
463 spin_unlock(&op->lock);
464
5db11c21
MM
465 wake_up_interruptible(&op->waitq);
466
467 while (1) {
5db11c21 468 spin_lock(&op->lock);
ce6c414e
MM
469 prepare_to_wait_exclusive(
470 &op->io_completion_waitq,
471 &wait_entry,
472 TASK_INTERRUPTIBLE);
5db11c21
MM
473 if (op->io_completed) {
474 spin_unlock(&op->lock);
475 break;
476 }
477 spin_unlock(&op->lock);
478
479 if (!signal_pending(current)) {
480 int timeout =
481 MSECS_TO_JIFFIES(1000 *
482 op_timeout_secs);
483 if (!schedule_timeout(timeout)) {
97f10027
MM
484 gossip_debug(GOSSIP_DEV_DEBUG,
485 "%s: timed out.\n",
486 __func__);
5db11c21
MM
487 timed_out = 1;
488 break;
489 }
490 continue;
491 }
492
97f10027
MM
493 gossip_debug(GOSSIP_DEV_DEBUG,
494 "%s: signal on I/O wait, aborting\n",
495 __func__);
5db11c21
MM
496 break;
497 }
498
ce6c414e
MM
499 spin_lock(&op->lock);
500 finish_wait(&op->io_completion_waitq, &wait_entry);
501 spin_unlock(&op->lock);
5db11c21
MM
502
503 /* NOTE: for I/O operations we handle releasing the op
504 * object except in the case of timeout. the reason we
505 * can't free the op in timeout cases is that the op
506 * service logic in the vfs retries operations using
507 * the same op ptr, thus it can't be freed.
508 */
509 if (!timed_out)
510 op_release(op);
511 } else {
512
513 /*
514 * tell the vfs op waiting on a waitqueue that
515 * this op is done
516 */
517 spin_lock(&op->lock);
518 set_op_state_serviced(op);
519 spin_unlock(&op->lock);
520 /*
54804949
MM
521 * for every other operation (i.e. non-I/O), we need to
522 * wake up the callers for downcall completion
523 * notification
5db11c21
MM
524 */
525 wake_up_interruptible(&op->waitq);
526 }
527 } else {
528 /* ignore downcalls that we're not interested in */
529 gossip_debug(GOSSIP_DEV_DEBUG,
530 "WARNING: No one's waiting for tag %llu\n",
531 llu(tag));
532 }
97f10027 533 /* put_op? */
5db11c21
MM
534 dev_req_release(buffer);
535
536 return total_returned_size;
537}
538
8bb8aefd 539static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
5db11c21
MM
540 struct iov_iter *iter)
541{
8bb8aefd 542 return orangefs_devreq_writev(iocb->ki_filp,
5db11c21
MM
543 iter->iov,
544 iter->nr_segs,
545 &iocb->ki_pos);
546}
547
548/* Returns whether any FS are still pending remounted */
549static int mark_all_pending_mounts(void)
550{
551 int unmounted = 1;
8bb8aefd 552 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21 553
8bb8aefd
YL
554 spin_lock(&orangefs_superblocks_lock);
555 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
5db11c21 556 /* All of these file system require a remount */
8bb8aefd 557 orangefs_sb->mount_pending = 1;
5db11c21
MM
558 unmounted = 0;
559 }
8bb8aefd 560 spin_unlock(&orangefs_superblocks_lock);
5db11c21
MM
561 return unmounted;
562}
563
564/*
565 * Determine if a given file system needs to be remounted or not
566 * Returns -1 on error
567 * 0 if already mounted
568 * 1 if needs remount
569 */
570int fs_mount_pending(__s32 fsid)
571{
572 int mount_pending = -1;
8bb8aefd 573 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21 574
8bb8aefd
YL
575 spin_lock(&orangefs_superblocks_lock);
576 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
577 if (orangefs_sb->fs_id == fsid) {
578 mount_pending = orangefs_sb->mount_pending;
5db11c21
MM
579 break;
580 }
581 }
8bb8aefd 582 spin_unlock(&orangefs_superblocks_lock);
5db11c21
MM
583 return mount_pending;
584}
585
586/*
587 * NOTE: gets called when the last reference to this device is dropped.
588 * Using the open_access_count variable, we enforce a reference count
589 * on this file so that it can be opened by only one process at a time.
590 * the devreq_mutex is used to make sure all i/o has completed
8bb8aefd 591 * before we call orangefs_bufmap_finalize, and similar such tricky
5db11c21
MM
592 * situations
593 */
8bb8aefd 594static int orangefs_devreq_release(struct inode *inode, struct file *file)
5db11c21
MM
595{
596 int unmounted = 0;
597
598 gossip_debug(GOSSIP_DEV_DEBUG,
599 "%s:pvfs2-client-core: exiting, closing device\n",
600 __func__);
601
602 mutex_lock(&devreq_mutex);
90d26aa8
MB
603 if (get_bufmap_init())
604 orangefs_bufmap_finalize();
5db11c21
MM
605
606 open_access_count--;
607
608 unmounted = mark_all_pending_mounts();
8bb8aefd 609 gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
5db11c21
MM
610 (unmounted ? "UNMOUNTED" : "MOUNTED"));
611 mutex_unlock(&devreq_mutex);
612
613 /*
614 * Walk through the list of ops in the request list, mark them
615 * as purged and wake them up.
616 */
617 purge_waiting_ops();
618 /*
619 * Walk through the hash table of in progress operations; mark
620 * them as purged and wake them up
621 */
622 purge_inprogress_ops();
623 gossip_debug(GOSSIP_DEV_DEBUG,
624 "pvfs2-client-core: device close complete\n");
625 return 0;
626}
627
628int is_daemon_in_service(void)
629{
630 int in_service;
631
632 /*
633 * What this function does is checks if client-core is alive
634 * based on the access count we maintain on the device.
635 */
636 mutex_lock(&devreq_mutex);
637 in_service = open_access_count == 1 ? 0 : -EIO;
638 mutex_unlock(&devreq_mutex);
639 return in_service;
640}
641
642static inline long check_ioctl_command(unsigned int command)
643{
644 /* Check for valid ioctl codes */
8bb8aefd 645 if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
5db11c21
MM
646 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
647 command,
648 _IOC_TYPE(command),
8bb8aefd 649 ORANGEFS_DEV_MAGIC);
5db11c21
MM
650 return -EINVAL;
651 }
652 /* and valid ioctl commands */
8bb8aefd 653 if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
5db11c21 654 gossip_err("Invalid ioctl command number [%d >= %d]\n",
8bb8aefd 655 _IOC_NR(command), ORANGEFS_DEV_MAXNR);
5db11c21
MM
656 return -ENOIOCTLCMD;
657 }
658 return 0;
659}
660
661static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
662{
8bb8aefd 663 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
5db11c21
MM
664 static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE;
665 static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
8bb8aefd 666 struct ORANGEFS_dev_map_desc user_desc;
5db11c21
MM
667 int ret = 0;
668 struct dev_mask_info_s mask_info = { 0 };
669 struct dev_mask2_info_s mask2_info = { 0, 0 };
670 int upstream_kmod = 1;
671 struct list_head *tmp = NULL;
8bb8aefd 672 struct orangefs_sb_info_s *orangefs_sb = NULL;
5db11c21
MM
673
674 /* mtmoore: add locking here */
675
676 switch (command) {
8bb8aefd 677 case ORANGEFS_DEV_GET_MAGIC:
5db11c21
MM
678 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
679 -EIO :
680 0);
8bb8aefd 681 case ORANGEFS_DEV_GET_MAX_UPSIZE:
5db11c21
MM
682 return ((put_user(max_up_size,
683 (__s32 __user *) arg) == -EFAULT) ?
684 -EIO :
685 0);
8bb8aefd 686 case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
5db11c21
MM
687 return ((put_user(max_down_size,
688 (__s32 __user *) arg) == -EFAULT) ?
689 -EIO :
690 0);
8bb8aefd 691 case ORANGEFS_DEV_MAP:
5db11c21 692 ret = copy_from_user(&user_desc,
8bb8aefd 693 (struct ORANGEFS_dev_map_desc __user *)
5db11c21 694 arg,
8bb8aefd 695 sizeof(struct ORANGEFS_dev_map_desc));
90d26aa8
MB
696 if (get_bufmap_init()) {
697 return -EINVAL;
698 } else {
699 return ret ?
700 -EIO :
701 orangefs_bufmap_initialize(&user_desc);
702 }
8bb8aefd 703 case ORANGEFS_DEV_REMOUNT_ALL:
5db11c21 704 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
705 "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
706 __func__);
5db11c21
MM
707
708 /*
8bb8aefd 709 * remount all mounted orangefs volumes to regain the lost
5db11c21
MM
710 * dynamic mount tables (if any) -- NOTE: this is done
711 * without keeping the superblock list locked due to the
712 * upcall/downcall waiting. also, the request semaphore is
713 * used to ensure that no operations will be serviced until
714 * all of the remounts are serviced (to avoid ops between
715 * mounts to fail)
716 */
717 ret = mutex_lock_interruptible(&request_mutex);
718 if (ret < 0)
719 return ret;
720 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
721 "%s: priority remount in progress\n",
722 __func__);
8bb8aefd
YL
723 list_for_each(tmp, &orangefs_superblocks) {
724 orangefs_sb =
97f10027
MM
725 list_entry(tmp,
726 struct orangefs_sb_info_s,
727 list);
8bb8aefd 728 if (orangefs_sb && (orangefs_sb->sb)) {
5db11c21 729 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
730 "%s: Remounting SB %p\n",
731 __func__,
8bb8aefd 732 orangefs_sb);
5db11c21 733
8bb8aefd 734 ret = orangefs_remount(orangefs_sb->sb);
5db11c21
MM
735 if (ret) {
736 gossip_debug(GOSSIP_DEV_DEBUG,
737 "SB %p remount failed\n",
8bb8aefd 738 orangefs_sb);
97f10027 739 break;
5db11c21
MM
740 }
741 }
742 }
743 gossip_debug(GOSSIP_DEV_DEBUG,
97f10027
MM
744 "%s: priority remount complete\n",
745 __func__);
5db11c21
MM
746 mutex_unlock(&request_mutex);
747 return ret;
748
8bb8aefd 749 case ORANGEFS_DEV_UPSTREAM:
5db11c21
MM
750 ret = copy_to_user((void __user *)arg,
751 &upstream_kmod,
752 sizeof(upstream_kmod));
753
754 if (ret != 0)
755 return -EIO;
756 else
757 return ret;
758
8bb8aefd 759 case ORANGEFS_DEV_CLIENT_MASK:
5db11c21
MM
760 ret = copy_from_user(&mask2_info,
761 (void __user *)arg,
762 sizeof(struct dev_mask2_info_s));
763
764 if (ret != 0)
765 return -EIO;
766
767 client_debug_mask.mask1 = mask2_info.mask1_value;
768 client_debug_mask.mask2 = mask2_info.mask2_value;
769
770 pr_info("%s: client debug mask has been been received "
771 ":%llx: :%llx:\n",
772 __func__,
773 (unsigned long long)client_debug_mask.mask1,
774 (unsigned long long)client_debug_mask.mask2);
775
776 return ret;
777
8bb8aefd 778 case ORANGEFS_DEV_CLIENT_STRING:
5db11c21
MM
779 ret = copy_from_user(&client_debug_array_string,
780 (void __user *)arg,
8bb8aefd 781 ORANGEFS_MAX_DEBUG_STRING_LEN);
5db11c21 782 if (ret != 0) {
97f10027 783 pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
5db11c21
MM
784 __func__);
785 return -EIO;
786 }
787
97f10027 788 pr_info("%s: client debug array string has been received.\n",
5db11c21
MM
789 __func__);
790
791 if (!help_string_initialized) {
792
793 /* Free the "we don't know yet" default string... */
794 kfree(debug_help_string);
795
796 /* build a proper debug help string */
797 if (orangefs_prepare_debugfs_help_string(0)) {
97f10027 798 gossip_err("%s: no debug help string \n",
5db11c21
MM
799 __func__);
800 return -EIO;
801 }
802
803 /* Replace the boilerplate boot-time debug-help file. */
804 debugfs_remove(help_file_dentry);
805
806 help_file_dentry =
807 debugfs_create_file(
808 ORANGEFS_KMOD_DEBUG_HELP_FILE,
809 0444,
810 debug_dir,
811 debug_help_string,
812 &debug_help_fops);
813
814 if (!help_file_dentry) {
815 gossip_err("%s: debugfs_create_file failed for"
816 " :%s:!\n",
817 __func__,
818 ORANGEFS_KMOD_DEBUG_HELP_FILE);
819 return -EIO;
820 }
821 }
822
823 debug_mask_to_string(&client_debug_mask, 1);
824
825 debugfs_remove(client_debug_dentry);
826
8bb8aefd 827 orangefs_client_debug_init();
5db11c21
MM
828
829 help_string_initialized++;
830
831 return ret;
832
8bb8aefd 833 case ORANGEFS_DEV_DEBUG:
5db11c21
MM
834 ret = copy_from_user(&mask_info,
835 (void __user *)arg,
836 sizeof(mask_info));
837
838 if (ret != 0)
839 return -EIO;
840
841 if (mask_info.mask_type == KERNEL_MASK) {
842 if ((mask_info.mask_value == 0)
843 && (kernel_mask_set_mod_init)) {
844 /*
845 * the kernel debug mask was set when the
846 * kernel module was loaded; don't override
847 * it if the client-core was started without
8bb8aefd 848 * a value for ORANGEFS_KMODMASK.
5db11c21
MM
849 */
850 return 0;
851 }
852 debug_mask_to_string(&mask_info.mask_value,
853 mask_info.mask_type);
854 gossip_debug_mask = mask_info.mask_value;
97f10027 855 pr_info("%s: kernel debug mask has been modified to "
5db11c21 856 ":%s: :%llx:\n",
97f10027 857 __func__,
5db11c21
MM
858 kernel_debug_string,
859 (unsigned long long)gossip_debug_mask);
860 } else if (mask_info.mask_type == CLIENT_MASK) {
861 debug_mask_to_string(&mask_info.mask_value,
862 mask_info.mask_type);
97f10027 863 pr_info("%s: client debug mask has been modified to"
5db11c21 864 ":%s: :%llx:\n",
97f10027 865 __func__,
5db11c21
MM
866 client_debug_string,
867 llu(mask_info.mask_value));
868 } else {
869 gossip_lerr("Invalid mask type....\n");
870 return -EINVAL;
871 }
872
873 return ret;
874
875 default:
876 return -ENOIOCTLCMD;
877 }
878 return -ENOIOCTLCMD;
879}
880
8bb8aefd 881static long orangefs_devreq_ioctl(struct file *file,
5db11c21
MM
882 unsigned int command, unsigned long arg)
883{
884 long ret;
885
886 /* Check for properly constructed commands */
887 ret = check_ioctl_command(command);
888 if (ret < 0)
889 return (int)ret;
890
891 return (int)dispatch_ioctl_command(command, arg);
892}
893
894#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
895
8bb8aefd
YL
896/* Compat structure for the ORANGEFS_DEV_MAP ioctl */
897struct ORANGEFS_dev_map_desc32 {
5db11c21
MM
898 compat_uptr_t ptr;
899 __s32 total_size;
900 __s32 size;
901 __s32 count;
902};
903
904static unsigned long translate_dev_map26(unsigned long args, long *error)
905{
8bb8aefd 906 struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
5db11c21
MM
907 /*
908 * Depending on the architecture, allocate some space on the
909 * user-call-stack based on our expected layout.
910 */
8bb8aefd 911 struct ORANGEFS_dev_map_desc __user *p =
5db11c21 912 compat_alloc_user_space(sizeof(*p));
84d02150 913 compat_uptr_t addr;
5db11c21
MM
914
915 *error = 0;
916 /* get the ptr from the 32 bit user-space */
917 if (get_user(addr, &p32->ptr))
918 goto err;
919 /* try to put that into a 64-bit layout */
920 if (put_user(compat_ptr(addr), &p->ptr))
921 goto err;
922 /* copy the remaining fields */
923 if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
924 goto err;
925 if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
926 goto err;
927 if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
928 goto err;
929 return (unsigned long)p;
930err:
931 *error = -EFAULT;
932 return 0;
933}
934
935/*
936 * 32 bit user-space apps' ioctl handlers when kernel modules
937 * is compiled as a 64 bit one
938 */
8bb8aefd 939static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
5db11c21
MM
940 unsigned long args)
941{
942 long ret;
943 unsigned long arg = args;
944
945 /* Check for properly constructed commands */
946 ret = check_ioctl_command(cmd);
947 if (ret < 0)
948 return ret;
8bb8aefd 949 if (cmd == ORANGEFS_DEV_MAP) {
5db11c21
MM
950 /*
951 * convert the arguments to what we expect internally
952 * in kernel space
953 */
954 arg = translate_dev_map26(args, &ret);
955 if (ret < 0) {
956 gossip_err("Could not translate dev map\n");
957 return ret;
958 }
959 }
960 /* no other ioctl requires translation */
961 return dispatch_ioctl_command(cmd, arg);
962}
963
2c590d5f
MM
964#endif /* CONFIG_COMPAT is in .config */
965
966/*
967 * The following two ioctl32 functions had been refactored into the above
968 * CONFIG_COMPAT ifdef, but that was an over simplification that was
969 * not noticed until we tried to compile on power pc...
970 */
971#if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT)
8bb8aefd 972static int orangefs_ioctl32_init(void)
5db11c21
MM
973{
974 return 0;
975}
976
8bb8aefd 977static void orangefs_ioctl32_cleanup(void)
5db11c21
MM
978{
979 return;
980}
2c590d5f 981#endif
5db11c21
MM
982
983/* the assigned character device major number */
8bb8aefd 984static int orangefs_dev_major;
5db11c21
MM
985
986/*
8bb8aefd 987 * Initialize orangefs device specific state:
5db11c21
MM
988 * Must be called at module load time only
989 */
8bb8aefd 990int orangefs_dev_init(void)
5db11c21
MM
991{
992 int ret;
993
994 /* register the ioctl32 sub-system */
8bb8aefd 995 ret = orangefs_ioctl32_init();
5db11c21
MM
996 if (ret < 0)
997 return ret;
998
8bb8aefd
YL
999 /* register orangefs-req device */
1000 orangefs_dev_major = register_chrdev(0,
1001 ORANGEFS_REQDEVICE_NAME,
1002 &orangefs_devreq_file_operations);
1003 if (orangefs_dev_major < 0) {
5db11c21
MM
1004 gossip_debug(GOSSIP_DEV_DEBUG,
1005 "Failed to register /dev/%s (error %d)\n",
8bb8aefd
YL
1006 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
1007 orangefs_ioctl32_cleanup();
1008 return orangefs_dev_major;
5db11c21
MM
1009 }
1010
1011 gossip_debug(GOSSIP_DEV_DEBUG,
1012 "*** /dev/%s character device registered ***\n",
8bb8aefd 1013 ORANGEFS_REQDEVICE_NAME);
5db11c21 1014 gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
8bb8aefd 1015 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
5db11c21
MM
1016 return 0;
1017}
1018
8bb8aefd 1019void orangefs_dev_cleanup(void)
5db11c21 1020{
8bb8aefd 1021 unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
5db11c21
MM
1022 gossip_debug(GOSSIP_DEV_DEBUG,
1023 "*** /dev/%s character device unregistered ***\n",
8bb8aefd 1024 ORANGEFS_REQDEVICE_NAME);
5db11c21 1025 /* unregister the ioctl32 sub-system */
8bb8aefd 1026 orangefs_ioctl32_cleanup();
5db11c21
MM
1027}
1028
8bb8aefd 1029static unsigned int orangefs_devreq_poll(struct file *file,
5db11c21
MM
1030 struct poll_table_struct *poll_table)
1031{
1032 int poll_revent_mask = 0;
1033
1034 if (open_access_count == 1) {
8bb8aefd 1035 poll_wait(file, &orangefs_request_list_waitq, poll_table);
5db11c21 1036
8bb8aefd
YL
1037 spin_lock(&orangefs_request_list_lock);
1038 if (!list_empty(&orangefs_request_list))
5db11c21 1039 poll_revent_mask |= POLL_IN;
8bb8aefd 1040 spin_unlock(&orangefs_request_list_lock);
5db11c21
MM
1041 }
1042 return poll_revent_mask;
1043}
1044
8bb8aefd 1045const struct file_operations orangefs_devreq_file_operations = {
5db11c21 1046 .owner = THIS_MODULE,
8bb8aefd
YL
1047 .read = orangefs_devreq_read,
1048 .write_iter = orangefs_devreq_write_iter,
1049 .open = orangefs_devreq_open,
1050 .release = orangefs_devreq_release,
1051 .unlocked_ioctl = orangefs_devreq_ioctl,
5db11c21
MM
1052
1053#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
8bb8aefd 1054 .compat_ioctl = orangefs_devreq_compat_ioctl,
5db11c21 1055#endif
8bb8aefd 1056 .poll = orangefs_devreq_poll
5db11c21 1057};