Orangefs: kernel client update 1.
[linux-2.6-block.git] / fs / orangefs / devpvfs2-req.c
CommitLineData
5db11c21
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * Changes by Acxiom Corporation to add protocol version to kernel
5 * communication, Copyright Acxiom Corporation, 2005.
6 *
7 * See COPYING in top-level directory.
8 */
9
10#include "protocol.h"
11#include "pvfs2-kernel.h"
12#include "pvfs2-dev-proto.h"
13#include "pvfs2-bufmap.h"
14
15#include <linux/debugfs.h>
16#include <linux/slab.h>
17
18/* this file implements the /dev/pvfs2-req device node */
19
20static int open_access_count;
21
22#define DUMP_DEVICE_ERROR() \
23do { \
24 gossip_err("*****************************************************\n");\
25 gossip_err("PVFS2 Device Error: You cannot open the device file "); \
26 gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \
27 "are no ", PVFS2_REQDEVICE_NAME); \
28 gossip_err("instances of a program using this device\ncurrently " \
29 "running. (You must verify this!)\n"); \
30 gossip_err("For example, you can use the lsof program as follows:\n");\
31 gossip_err("'lsof | grep %s' (run this as root)\n", \
32 PVFS2_REQDEVICE_NAME); \
33 gossip_err(" open_access_count = %d\n", open_access_count); \
34 gossip_err("*****************************************************\n");\
35} while (0)
36
37static int hash_func(__u64 tag, int table_size)
38{
2c590d5f 39 return do_div(tag, (unsigned int)table_size);
5db11c21
MM
40}
41
42static void pvfs2_devreq_add_op(struct pvfs2_kernel_op_s *op)
43{
44 int index = hash_func(op->tag, hash_table_size);
45
46 spin_lock(&htable_ops_in_progress_lock);
47 list_add_tail(&op->list, &htable_ops_in_progress[index]);
48 spin_unlock(&htable_ops_in_progress_lock);
49}
50
51static struct pvfs2_kernel_op_s *pvfs2_devreq_remove_op(__u64 tag)
52{
53 struct pvfs2_kernel_op_s *op, *next;
54 int index;
55
56 index = hash_func(tag, hash_table_size);
57
58 spin_lock(&htable_ops_in_progress_lock);
59 list_for_each_entry_safe(op,
60 next,
61 &htable_ops_in_progress[index],
62 list) {
63 if (op->tag == tag) {
64 list_del(&op->list);
65 spin_unlock(&htable_ops_in_progress_lock);
66 return op;
67 }
68 }
69
70 spin_unlock(&htable_ops_in_progress_lock);
71 return NULL;
72}
73
74static int pvfs2_devreq_open(struct inode *inode, struct file *file)
75{
76 int ret = -EINVAL;
77
78 if (!(file->f_flags & O_NONBLOCK)) {
79 gossip_err("pvfs2: device cannot be opened in blocking mode\n");
80 goto out;
81 }
82 ret = -EACCES;
83 gossip_debug(GOSSIP_DEV_DEBUG, "pvfs2-client-core: opening device\n");
84 mutex_lock(&devreq_mutex);
85
86 if (open_access_count == 0) {
87 ret = generic_file_open(inode, file);
88 if (ret == 0)
89 open_access_count++;
90 } else {
91 DUMP_DEVICE_ERROR();
92 }
93 mutex_unlock(&devreq_mutex);
94
95out:
96
97 gossip_debug(GOSSIP_DEV_DEBUG,
98 "pvfs2-client-core: open device complete (ret = %d)\n",
99 ret);
100 return ret;
101}
102
103static ssize_t pvfs2_devreq_read(struct file *file,
104 char __user *buf,
105 size_t count, loff_t *offset)
106{
107 int ret = 0;
108 ssize_t len = 0;
109 struct pvfs2_kernel_op_s *cur_op = NULL;
110 static __s32 magic = PVFS2_DEVREQ_MAGIC;
111 __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION;
112
113 if (!(file->f_flags & O_NONBLOCK)) {
114 /* We do not support blocking reads/opens any more */
115 gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n");
116 return -EINVAL;
117 } else {
118 struct pvfs2_kernel_op_s *op = NULL, *temp = NULL;
119 /* get next op (if any) from top of list */
120 spin_lock(&pvfs2_request_list_lock);
121 list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) {
122 __s32 fsid = fsid_of_op(op);
123 /*
124 * Check if this op's fsid is known and needs
125 * remounting
126 */
127 if (fsid != PVFS_FS_ID_NULL &&
128 fs_mount_pending(fsid) == 1) {
129 gossip_debug(GOSSIP_DEV_DEBUG,
130 "Skipping op tag %llu %s\n",
131 llu(op->tag),
132 get_opname_string(op));
133 continue;
134 } else {
135 /*
136 * op does not belong to any particular fsid
137 * or already mounted.. let it through
138 */
139 cur_op = op;
140 spin_lock(&cur_op->lock);
141 list_del(&cur_op->list);
142 cur_op->op_linger_tmp--;
143 /*
144 * if there is a trailer, re-add it to
145 * the request list.
146 */
147 if (cur_op->op_linger == 2 &&
148 cur_op->op_linger_tmp == 1) {
149 if (cur_op->upcall.trailer_size <= 0 ||
150 cur_op->upcall.trailer_buf == NULL)
151 gossip_err("BUG:trailer_size is %ld and trailer buf is %p\n", (long)cur_op->upcall.trailer_size, cur_op->upcall.trailer_buf);
152 /* re-add it to the head of the list */
153 list_add(&cur_op->list,
154 &pvfs2_request_list);
155 }
156 spin_unlock(&cur_op->lock);
157 break;
158 }
159 }
160 spin_unlock(&pvfs2_request_list_lock);
161 }
162
163 if (cur_op) {
164 spin_lock(&cur_op->lock);
165
166 gossip_debug(GOSSIP_DEV_DEBUG,
167 "client-core: reading op tag %llu %s\n",
168 llu(cur_op->tag), get_opname_string(cur_op));
169 if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
170 if (cur_op->op_linger == 1)
171 gossip_err("WARNING: Current op already queued...skipping\n");
172 } else if (cur_op->op_linger == 1 ||
173 (cur_op->op_linger == 2 &&
174 cur_op->op_linger_tmp == 0)) {
175 /*
176 * atomically move the operation to the
177 * htable_ops_in_progress
178 */
179 set_op_state_inprogress(cur_op);
180 pvfs2_devreq_add_op(cur_op);
181 }
182
183 spin_unlock(&cur_op->lock);
184
185 /* 2 cases
186 * a) OPs with no trailers
187 * b) OPs with trailers, Stage 1
188 * Either way push the upcall out
189 */
190 if (cur_op->op_linger == 1 ||
191 (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 1)) {
192 len = MAX_ALIGNED_DEV_REQ_UPSIZE;
193 if ((size_t) len <= count) {
194 ret = copy_to_user(buf,
195 &proto_ver,
196 sizeof(__s32));
197 if (ret == 0) {
198 ret = copy_to_user(buf + sizeof(__s32),
199 &magic,
200 sizeof(__s32));
201 if (ret == 0) {
202 ret = copy_to_user(buf+2 * sizeof(__s32),
203 &cur_op->tag,
204 sizeof(__u64));
205 if (ret == 0) {
206 ret = copy_to_user(
207 buf +
208 2 *
209 sizeof(__s32) +
210 sizeof(__u64),
211 &cur_op->upcall,
212 sizeof(struct pvfs2_upcall_s));
213 }
214 }
215 }
216
217 if (ret) {
218 gossip_err("Failed to copy data to user space\n");
219 len = -EFAULT;
220 }
221 } else {
222 gossip_err
223 ("Failed to copy data to user space\n");
224 len = -EIO;
225 }
226 }
227 /* Stage 2: Push the trailer out */
228 else if (cur_op->op_linger == 2 && cur_op->op_linger_tmp == 0) {
229 len = cur_op->upcall.trailer_size;
230 if ((size_t) len <= count) {
231 ret = copy_to_user(buf,
232 cur_op->upcall.trailer_buf,
233 len);
234 if (ret) {
235 gossip_err("Failed to copy trailer to user space\n");
236 len = -EFAULT;
237 }
238 } else {
239 gossip_err("Read buffer for trailer is too small (%ld as opposed to %ld)\n",
240 (long)count,
241 (long)len);
242 len = -EIO;
243 }
244 } else {
245 gossip_err("cur_op: %p (op_linger %d), (op_linger_tmp %d), erroneous request list?\n",
246 cur_op,
247 cur_op->op_linger,
248 cur_op->op_linger_tmp);
249 len = 0;
250 }
251 } else if (file->f_flags & O_NONBLOCK) {
252 /*
253 * if in non-blocking mode, return EAGAIN since no requests are
254 * ready yet
255 */
256 len = -EAGAIN;
257 }
258 return len;
259}
260
261/* Function for writev() callers into the device */
262static ssize_t pvfs2_devreq_writev(struct file *file,
263 const struct iovec *iov,
264 size_t count,
265 loff_t *offset)
266{
267 struct pvfs2_kernel_op_s *op = NULL;
268 void *buffer = NULL;
269 void *ptr = NULL;
270 unsigned long i = 0;
271 static int max_downsize = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
272 int ret = 0, num_remaining = max_downsize;
273 int notrailer_count = 4; /* num elements in iovec without trailer */
274 int payload_size = 0;
275 __s32 magic = 0;
276 __s32 proto_ver = 0;
277 __u64 tag = 0;
278 ssize_t total_returned_size = 0;
279
280 /* Either there is a trailer or there isn't */
281 if (count != notrailer_count && count != (notrailer_count + 1)) {
2c590d5f 282 gossip_err("Error: Number of iov vectors is (%zu) and notrailer count is %d\n",
5db11c21
MM
283 count,
284 notrailer_count);
285 return -EPROTO;
286 }
287 buffer = dev_req_alloc();
288 if (!buffer)
289 return -ENOMEM;
290 ptr = buffer;
291
292 for (i = 0; i < notrailer_count; i++) {
293 if (iov[i].iov_len > num_remaining) {
294 gossip_err
295 ("writev error: Freeing buffer and returning\n");
296 dev_req_release(buffer);
297 return -EMSGSIZE;
298 }
299 ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
300 if (ret) {
301 gossip_err("Failed to copy data from user space\n");
302 dev_req_release(buffer);
303 return -EIO;
304 }
305 num_remaining -= iov[i].iov_len;
306 ptr += iov[i].iov_len;
307 payload_size += iov[i].iov_len;
308 }
309 total_returned_size = payload_size;
310
311 /* these elements are currently 8 byte aligned (8 bytes for (version +
312 * magic) 8 bytes for tag). If you add another element, either
313 * make it 8 bytes big, or use get_unaligned when asigning.
314 */
315 ptr = buffer;
316 proto_ver = *((__s32 *) ptr);
317 ptr += sizeof(__s32);
318
319 magic = *((__s32 *) ptr);
320 ptr += sizeof(__s32);
321
322 tag = *((__u64 *) ptr);
323 ptr += sizeof(__u64);
324
325 if (magic != PVFS2_DEVREQ_MAGIC) {
326 gossip_err("Error: Device magic number does not match.\n");
327 dev_req_release(buffer);
328 return -EPROTO;
329 }
330
331 /*
332 * proto_ver = 20902 for 2.9.2
333 */
334
335 op = pvfs2_devreq_remove_op(tag);
336 if (op) {
337 /* Increase ref count! */
338 get_op(op);
339 /* cut off magic and tag from payload size */
340 payload_size -= (2 * sizeof(__s32) + sizeof(__u64));
341 if (payload_size <= sizeof(struct pvfs2_downcall_s))
342 /* copy the passed in downcall into the op */
343 memcpy(&op->downcall,
344 ptr,
345 sizeof(struct pvfs2_downcall_s));
346 else
347 gossip_debug(GOSSIP_DEV_DEBUG,
348 "writev: Ignoring %d bytes\n",
349 payload_size);
350
351 /* Do not allocate needlessly if client-core forgets
352 * to reset trailer size on op errors.
353 */
354 if (op->downcall.status == 0 && op->downcall.trailer_size > 0) {
355 gossip_debug(GOSSIP_DEV_DEBUG,
356 "writev: trailer size %ld\n",
357 (unsigned long)op->downcall.trailer_size);
358 if (count != (notrailer_count + 1)) {
2c590d5f 359 gossip_err("Error: trailer size (%ld) is non-zero, no trailer elements though? (%zu)\n", (unsigned long)op->downcall.trailer_size, count);
5db11c21
MM
360 dev_req_release(buffer);
361 put_op(op);
362 return -EPROTO;
363 }
364 if (iov[notrailer_count].iov_len >
365 op->downcall.trailer_size) {
366 gossip_err("writev error: trailer size (%ld) != iov_len (%ld)\n", (unsigned long)op->downcall.trailer_size, (unsigned long)iov[notrailer_count].iov_len);
367 dev_req_release(buffer);
368 put_op(op);
369 return -EMSGSIZE;
370 }
371 /* Allocate a buffer large enough to hold the
372 * trailer bytes.
373 */
374 op->downcall.trailer_buf =
375 vmalloc(op->downcall.trailer_size);
376 if (op->downcall.trailer_buf != NULL) {
377 gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n",
378 op->downcall.trailer_buf);
379 ret = copy_from_user(op->downcall.trailer_buf,
380 iov[notrailer_count].
381 iov_base,
382 iov[notrailer_count].
383 iov_len);
384 if (ret) {
385 gossip_err("Failed to copy trailer data from user space\n");
386 dev_req_release(buffer);
387 gossip_debug(GOSSIP_DEV_DEBUG,
388 "vfree: %p\n",
389 op->downcall.trailer_buf);
390 vfree(op->downcall.trailer_buf);
391 op->downcall.trailer_buf = NULL;
392 put_op(op);
393 return -EIO;
394 }
395 } else {
396 /* Change downcall status */
397 op->downcall.status = -ENOMEM;
398 gossip_err("writev: could not vmalloc for trailer!\n");
399 }
400 }
401
402 /* if this operation is an I/O operation and if it was
403 * initiated on behalf of a *synchronous* VFS I/O operation,
404 * only then we need to wait
405 * for all data to be copied before we can return to avoid
406 * buffer corruption and races that can pull the buffers
407 * out from under us.
408 *
409 * Essentially we're synchronizing with other parts of the
410 * vfs implicitly by not allowing the user space
411 * application reading/writing this device to return until
412 * the buffers are done being used.
413 */
414 if ((op->upcall.type == PVFS2_VFS_OP_FILE_IO &&
415 op->upcall.req.io.async_vfs_io == PVFS_VFS_SYNC_IO) ||
416 op->upcall.type == PVFS2_VFS_OP_FILE_IOX) {
417 int timed_out = 0;
418 DECLARE_WAITQUEUE(wait_entry, current);
419
420 /* tell the vfs op waiting on a waitqueue
421 * that this op is done
422 */
423 spin_lock(&op->lock);
424 set_op_state_serviced(op);
425 spin_unlock(&op->lock);
426
427 add_wait_queue_exclusive(&op->io_completion_waitq,
428 &wait_entry);
429 wake_up_interruptible(&op->waitq);
430
431 while (1) {
432 set_current_state(TASK_INTERRUPTIBLE);
433
434 spin_lock(&op->lock);
435 if (op->io_completed) {
436 spin_unlock(&op->lock);
437 break;
438 }
439 spin_unlock(&op->lock);
440
441 if (!signal_pending(current)) {
442 int timeout =
443 MSECS_TO_JIFFIES(1000 *
444 op_timeout_secs);
445 if (!schedule_timeout(timeout)) {
446 gossip_debug(GOSSIP_DEV_DEBUG, "*** I/O wait time is up\n");
447 timed_out = 1;
448 break;
449 }
450 continue;
451 }
452
453 gossip_debug(GOSSIP_DEV_DEBUG, "*** signal on I/O wait -- aborting\n");
454 break;
455 }
456
457 set_current_state(TASK_RUNNING);
458 remove_wait_queue(&op->io_completion_waitq,
459 &wait_entry);
460
461 /* NOTE: for I/O operations we handle releasing the op
462 * object except in the case of timeout. the reason we
463 * can't free the op in timeout cases is that the op
464 * service logic in the vfs retries operations using
465 * the same op ptr, thus it can't be freed.
466 */
467 if (!timed_out)
468 op_release(op);
469 } else {
470
471 /*
472 * tell the vfs op waiting on a waitqueue that
473 * this op is done
474 */
475 spin_lock(&op->lock);
476 set_op_state_serviced(op);
477 spin_unlock(&op->lock);
478 /*
479 for every other operation (i.e. non-I/O), we need to
480 wake up the callers for downcall completion
481 notification
482 */
483 wake_up_interruptible(&op->waitq);
484 }
485 } else {
486 /* ignore downcalls that we're not interested in */
487 gossip_debug(GOSSIP_DEV_DEBUG,
488 "WARNING: No one's waiting for tag %llu\n",
489 llu(tag));
490 }
491 dev_req_release(buffer);
492
493 return total_returned_size;
494}
495
496static ssize_t pvfs2_devreq_write_iter(struct kiocb *iocb,
497 struct iov_iter *iter)
498{
499 return pvfs2_devreq_writev(iocb->ki_filp,
500 iter->iov,
501 iter->nr_segs,
502 &iocb->ki_pos);
503}
504
505/* Returns whether any FS are still pending remounted */
506static int mark_all_pending_mounts(void)
507{
508 int unmounted = 1;
509 struct pvfs2_sb_info_s *pvfs2_sb = NULL;
510
511 spin_lock(&pvfs2_superblocks_lock);
512 list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
513 /* All of these file system require a remount */
514 pvfs2_sb->mount_pending = 1;
515 unmounted = 0;
516 }
517 spin_unlock(&pvfs2_superblocks_lock);
518 return unmounted;
519}
520
521/*
522 * Determine if a given file system needs to be remounted or not
523 * Returns -1 on error
524 * 0 if already mounted
525 * 1 if needs remount
526 */
527int fs_mount_pending(__s32 fsid)
528{
529 int mount_pending = -1;
530 struct pvfs2_sb_info_s *pvfs2_sb = NULL;
531
532 spin_lock(&pvfs2_superblocks_lock);
533 list_for_each_entry(pvfs2_sb, &pvfs2_superblocks, list) {
534 if (pvfs2_sb->fs_id == fsid) {
535 mount_pending = pvfs2_sb->mount_pending;
536 break;
537 }
538 }
539 spin_unlock(&pvfs2_superblocks_lock);
540 return mount_pending;
541}
542
543/*
544 * NOTE: gets called when the last reference to this device is dropped.
545 * Using the open_access_count variable, we enforce a reference count
546 * on this file so that it can be opened by only one process at a time.
547 * the devreq_mutex is used to make sure all i/o has completed
548 * before we call pvfs_bufmap_finalize, and similar such tricky
549 * situations
550 */
551static int pvfs2_devreq_release(struct inode *inode, struct file *file)
552{
553 int unmounted = 0;
554
555 gossip_debug(GOSSIP_DEV_DEBUG,
556 "%s:pvfs2-client-core: exiting, closing device\n",
557 __func__);
558
559 mutex_lock(&devreq_mutex);
560 pvfs_bufmap_finalize();
561
562 open_access_count--;
563
564 unmounted = mark_all_pending_mounts();
565 gossip_debug(GOSSIP_DEV_DEBUG, "PVFS2 Device Close: Filesystem(s) %s\n",
566 (unmounted ? "UNMOUNTED" : "MOUNTED"));
567 mutex_unlock(&devreq_mutex);
568
569 /*
570 * Walk through the list of ops in the request list, mark them
571 * as purged and wake them up.
572 */
573 purge_waiting_ops();
574 /*
575 * Walk through the hash table of in progress operations; mark
576 * them as purged and wake them up
577 */
578 purge_inprogress_ops();
579 gossip_debug(GOSSIP_DEV_DEBUG,
580 "pvfs2-client-core: device close complete\n");
581 return 0;
582}
583
584int is_daemon_in_service(void)
585{
586 int in_service;
587
588 /*
589 * What this function does is checks if client-core is alive
590 * based on the access count we maintain on the device.
591 */
592 mutex_lock(&devreq_mutex);
593 in_service = open_access_count == 1 ? 0 : -EIO;
594 mutex_unlock(&devreq_mutex);
595 return in_service;
596}
597
598static inline long check_ioctl_command(unsigned int command)
599{
600 /* Check for valid ioctl codes */
601 if (_IOC_TYPE(command) != PVFS_DEV_MAGIC) {
602 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
603 command,
604 _IOC_TYPE(command),
605 PVFS_DEV_MAGIC);
606 return -EINVAL;
607 }
608 /* and valid ioctl commands */
609 if (_IOC_NR(command) >= PVFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
610 gossip_err("Invalid ioctl command number [%d >= %d]\n",
611 _IOC_NR(command), PVFS_DEV_MAXNR);
612 return -ENOIOCTLCMD;
613 }
614 return 0;
615}
616
617static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
618{
619 static __s32 magic = PVFS2_DEVREQ_MAGIC;
620 static __s32 max_up_size = MAX_ALIGNED_DEV_REQ_UPSIZE;
621 static __s32 max_down_size = MAX_ALIGNED_DEV_REQ_DOWNSIZE;
622 struct PVFS_dev_map_desc user_desc;
623 int ret = 0;
624 struct dev_mask_info_s mask_info = { 0 };
625 struct dev_mask2_info_s mask2_info = { 0, 0 };
626 int upstream_kmod = 1;
627 struct list_head *tmp = NULL;
628 struct pvfs2_sb_info_s *pvfs2_sb = NULL;
629
630 /* mtmoore: add locking here */
631
632 switch (command) {
633 case PVFS_DEV_GET_MAGIC:
634 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
635 -EIO :
636 0);
637 case PVFS_DEV_GET_MAX_UPSIZE:
638 return ((put_user(max_up_size,
639 (__s32 __user *) arg) == -EFAULT) ?
640 -EIO :
641 0);
642 case PVFS_DEV_GET_MAX_DOWNSIZE:
643 return ((put_user(max_down_size,
644 (__s32 __user *) arg) == -EFAULT) ?
645 -EIO :
646 0);
647 case PVFS_DEV_MAP:
648 ret = copy_from_user(&user_desc,
649 (struct PVFS_dev_map_desc __user *)
650 arg,
651 sizeof(struct PVFS_dev_map_desc));
652 return ret ? -EIO : pvfs_bufmap_initialize(&user_desc);
653 case PVFS_DEV_REMOUNT_ALL:
654 gossip_debug(GOSSIP_DEV_DEBUG,
655 "pvfs2_devreq_ioctl: got PVFS_DEV_REMOUNT_ALL\n");
656
657 /*
658 * remount all mounted pvfs2 volumes to regain the lost
659 * dynamic mount tables (if any) -- NOTE: this is done
660 * without keeping the superblock list locked due to the
661 * upcall/downcall waiting. also, the request semaphore is
662 * used to ensure that no operations will be serviced until
663 * all of the remounts are serviced (to avoid ops between
664 * mounts to fail)
665 */
666 ret = mutex_lock_interruptible(&request_mutex);
667 if (ret < 0)
668 return ret;
669 gossip_debug(GOSSIP_DEV_DEBUG,
670 "pvfs2_devreq_ioctl: priority remount in progress\n");
671 list_for_each(tmp, &pvfs2_superblocks) {
672 pvfs2_sb =
673 list_entry(tmp, struct pvfs2_sb_info_s, list);
674 if (pvfs2_sb && (pvfs2_sb->sb)) {
675 gossip_debug(GOSSIP_DEV_DEBUG,
676 "Remounting SB %p\n",
677 pvfs2_sb);
678
679 ret = pvfs2_remount(pvfs2_sb->sb);
680 if (ret) {
681 gossip_debug(GOSSIP_DEV_DEBUG,
682 "SB %p remount failed\n",
683 pvfs2_sb);
684 break;
685 }
686 }
687 }
688 gossip_debug(GOSSIP_DEV_DEBUG,
689 "pvfs2_devreq_ioctl: priority remount complete\n");
690 mutex_unlock(&request_mutex);
691 return ret;
692
693 case PVFS_DEV_UPSTREAM:
694 ret = copy_to_user((void __user *)arg,
695 &upstream_kmod,
696 sizeof(upstream_kmod));
697
698 if (ret != 0)
699 return -EIO;
700 else
701 return ret;
702
703 case PVFS_DEV_CLIENT_MASK:
704 ret = copy_from_user(&mask2_info,
705 (void __user *)arg,
706 sizeof(struct dev_mask2_info_s));
707
708 if (ret != 0)
709 return -EIO;
710
711 client_debug_mask.mask1 = mask2_info.mask1_value;
712 client_debug_mask.mask2 = mask2_info.mask2_value;
713
714 pr_info("%s: client debug mask has been been received "
715 ":%llx: :%llx:\n",
716 __func__,
717 (unsigned long long)client_debug_mask.mask1,
718 (unsigned long long)client_debug_mask.mask2);
719
720 return ret;
721
722 case PVFS_DEV_CLIENT_STRING:
723 ret = copy_from_user(&client_debug_array_string,
724 (void __user *)arg,
725 PVFS2_MAX_DEBUG_STRING_LEN);
726 if (ret != 0) {
727 pr_info("%s: "
728 "PVFS_DEV_CLIENT_STRING: copy_from_user failed"
729 "\n",
730 __func__);
731 return -EIO;
732 }
733
734 pr_info("%s: client debug array string has been been received."
735 "\n",
736 __func__);
737
738 if (!help_string_initialized) {
739
740 /* Free the "we don't know yet" default string... */
741 kfree(debug_help_string);
742
743 /* build a proper debug help string */
744 if (orangefs_prepare_debugfs_help_string(0)) {
745 gossip_err("%s: "
746 "prepare_debugfs_help_string failed"
747 "\n",
748 __func__);
749 return -EIO;
750 }
751
752 /* Replace the boilerplate boot-time debug-help file. */
753 debugfs_remove(help_file_dentry);
754
755 help_file_dentry =
756 debugfs_create_file(
757 ORANGEFS_KMOD_DEBUG_HELP_FILE,
758 0444,
759 debug_dir,
760 debug_help_string,
761 &debug_help_fops);
762
763 if (!help_file_dentry) {
764 gossip_err("%s: debugfs_create_file failed for"
765 " :%s:!\n",
766 __func__,
767 ORANGEFS_KMOD_DEBUG_HELP_FILE);
768 return -EIO;
769 }
770 }
771
772 debug_mask_to_string(&client_debug_mask, 1);
773
774 debugfs_remove(client_debug_dentry);
775
776 pvfs2_client_debug_init();
777
778 help_string_initialized++;
779
780 return ret;
781
782 case PVFS_DEV_DEBUG:
783 ret = copy_from_user(&mask_info,
784 (void __user *)arg,
785 sizeof(mask_info));
786
787 if (ret != 0)
788 return -EIO;
789
790 if (mask_info.mask_type == KERNEL_MASK) {
791 if ((mask_info.mask_value == 0)
792 && (kernel_mask_set_mod_init)) {
793 /*
794 * the kernel debug mask was set when the
795 * kernel module was loaded; don't override
796 * it if the client-core was started without
797 * a value for PVFS2_KMODMASK.
798 */
799 return 0;
800 }
801 debug_mask_to_string(&mask_info.mask_value,
802 mask_info.mask_type);
803 gossip_debug_mask = mask_info.mask_value;
804 pr_info("PVFS: kernel debug mask has been modified to "
805 ":%s: :%llx:\n",
806 kernel_debug_string,
807 (unsigned long long)gossip_debug_mask);
808 } else if (mask_info.mask_type == CLIENT_MASK) {
809 debug_mask_to_string(&mask_info.mask_value,
810 mask_info.mask_type);
811 pr_info("PVFS: client debug mask has been modified to"
812 ":%s: :%llx:\n",
813 client_debug_string,
814 llu(mask_info.mask_value));
815 } else {
816 gossip_lerr("Invalid mask type....\n");
817 return -EINVAL;
818 }
819
820 return ret;
821
822 default:
823 return -ENOIOCTLCMD;
824 }
825 return -ENOIOCTLCMD;
826}
827
828static long pvfs2_devreq_ioctl(struct file *file,
829 unsigned int command, unsigned long arg)
830{
831 long ret;
832
833 /* Check for properly constructed commands */
834 ret = check_ioctl_command(command);
835 if (ret < 0)
836 return (int)ret;
837
838 return (int)dispatch_ioctl_command(command, arg);
839}
840
841#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
842
843/* Compat structure for the PVFS_DEV_MAP ioctl */
844struct PVFS_dev_map_desc32 {
845 compat_uptr_t ptr;
846 __s32 total_size;
847 __s32 size;
848 __s32 count;
849};
850
851static unsigned long translate_dev_map26(unsigned long args, long *error)
852{
853 struct PVFS_dev_map_desc32 __user *p32 = (void __user *)args;
854 /*
855 * Depending on the architecture, allocate some space on the
856 * user-call-stack based on our expected layout.
857 */
858 struct PVFS_dev_map_desc __user *p =
859 compat_alloc_user_space(sizeof(*p));
860 u32 addr;
861
862 *error = 0;
863 /* get the ptr from the 32 bit user-space */
864 if (get_user(addr, &p32->ptr))
865 goto err;
866 /* try to put that into a 64-bit layout */
867 if (put_user(compat_ptr(addr), &p->ptr))
868 goto err;
869 /* copy the remaining fields */
870 if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
871 goto err;
872 if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
873 goto err;
874 if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
875 goto err;
876 return (unsigned long)p;
877err:
878 *error = -EFAULT;
879 return 0;
880}
881
882/*
883 * 32 bit user-space apps' ioctl handlers when kernel modules
884 * is compiled as a 64 bit one
885 */
886static long pvfs2_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
887 unsigned long args)
888{
889 long ret;
890 unsigned long arg = args;
891
892 /* Check for properly constructed commands */
893 ret = check_ioctl_command(cmd);
894 if (ret < 0)
895 return ret;
896 if (cmd == PVFS_DEV_MAP) {
897 /*
898 * convert the arguments to what we expect internally
899 * in kernel space
900 */
901 arg = translate_dev_map26(args, &ret);
902 if (ret < 0) {
903 gossip_err("Could not translate dev map\n");
904 return ret;
905 }
906 }
907 /* no other ioctl requires translation */
908 return dispatch_ioctl_command(cmd, arg);
909}
910
2c590d5f
MM
911#endif /* CONFIG_COMPAT is in .config */
912
913/*
914 * The following two ioctl32 functions had been refactored into the above
915 * CONFIG_COMPAT ifdef, but that was an over simplification that was
916 * not noticed until we tried to compile on power pc...
917 */
918#if (defined(CONFIG_COMPAT) && !defined(HAVE_REGISTER_IOCTL32_CONVERSION)) || !defined(CONFIG_COMPAT)
5db11c21
MM
919static int pvfs2_ioctl32_init(void)
920{
921 return 0;
922}
923
924static void pvfs2_ioctl32_cleanup(void)
925{
926 return;
927}
2c590d5f 928#endif
5db11c21
MM
929
930/* the assigned character device major number */
931static int pvfs2_dev_major;
932
933/*
934 * Initialize pvfs2 device specific state:
935 * Must be called at module load time only
936 */
937int pvfs2_dev_init(void)
938{
939 int ret;
940
941 /* register the ioctl32 sub-system */
942 ret = pvfs2_ioctl32_init();
943 if (ret < 0)
944 return ret;
945
946 /* register pvfs2-req device */
947 pvfs2_dev_major = register_chrdev(0,
948 PVFS2_REQDEVICE_NAME,
949 &pvfs2_devreq_file_operations);
950 if (pvfs2_dev_major < 0) {
951 gossip_debug(GOSSIP_DEV_DEBUG,
952 "Failed to register /dev/%s (error %d)\n",
953 PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
954 pvfs2_ioctl32_cleanup();
955 return pvfs2_dev_major;
956 }
957
958 gossip_debug(GOSSIP_DEV_DEBUG,
959 "*** /dev/%s character device registered ***\n",
960 PVFS2_REQDEVICE_NAME);
961 gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
962 PVFS2_REQDEVICE_NAME, pvfs2_dev_major);
963 return 0;
964}
965
966void pvfs2_dev_cleanup(void)
967{
968 unregister_chrdev(pvfs2_dev_major, PVFS2_REQDEVICE_NAME);
969 gossip_debug(GOSSIP_DEV_DEBUG,
970 "*** /dev/%s character device unregistered ***\n",
971 PVFS2_REQDEVICE_NAME);
972 /* unregister the ioctl32 sub-system */
973 pvfs2_ioctl32_cleanup();
974}
975
976static unsigned int pvfs2_devreq_poll(struct file *file,
977 struct poll_table_struct *poll_table)
978{
979 int poll_revent_mask = 0;
980
981 if (open_access_count == 1) {
982 poll_wait(file, &pvfs2_request_list_waitq, poll_table);
983
984 spin_lock(&pvfs2_request_list_lock);
985 if (!list_empty(&pvfs2_request_list))
986 poll_revent_mask |= POLL_IN;
987 spin_unlock(&pvfs2_request_list_lock);
988 }
989 return poll_revent_mask;
990}
991
992const struct file_operations pvfs2_devreq_file_operations = {
993 .owner = THIS_MODULE,
994 .read = pvfs2_devreq_read,
995 .write_iter = pvfs2_devreq_write_iter,
996 .open = pvfs2_devreq_open,
997 .release = pvfs2_devreq_release,
998 .unlocked_ioctl = pvfs2_devreq_ioctl,
999
1000#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
1001 .compat_ioctl = pvfs2_devreq_compat_ioctl,
1002#endif
1003 .poll = pvfs2_devreq_poll
1004};