[PATCH] splice: add support for sys_tee()
authorJens Axboe <axboe@suse.de>
Tue, 11 Apr 2006 13:51:17 +0000 (15:51 +0200)
committerJens Axboe <axboe@suse.de>
Tue, 11 Apr 2006 13:51:17 +0000 (15:51 +0200)
Basically an in-kernel implementation of tee, which uses splice and the
pipe buffers as an intelligent way to pass data around by reference.

Where the user space tee consumes the input and produces a stdout and
file output, this syscall merely duplicates the data inside a pipe to
another pipe. No data is copied, the output just grabs a reference to the
input pipe data.

Signed-off-by: Jens Axboe <axboe@suse.de>
arch/i386/kernel/syscall_table.S
arch/ia64/kernel/entry.S
arch/powerpc/kernel/systbl.S
fs/pipe.c
fs/splice.c
include/asm-i386/unistd.h
include/asm-ia64/unistd.h
include/asm-powerpc/unistd.h
include/asm-x86_64/unistd.h
include/linux/pipe_fs_i.h
include/linux/syscalls.h

index 4f58b9c0efe3cbb5f57a2bfda39371153b3eff2f..f48bef15b4f0dd5de72d4bbcc39f8c4f881947b6 100644 (file)
@@ -314,3 +314,4 @@ ENTRY(sys_call_table)
        .long sys_get_robust_list
        .long sys_splice
        .long sys_sync_file_range
+       .long sys_tee                   /* 315 */
index 6e16f6b35bd33c3739dbcdd1dba0dae7744807a2..e3079881121667346e96d8141a838fded794bf15 100644 (file)
@@ -1609,5 +1609,6 @@ sys_call_table:
        data8 sys_set_robust_list
        data8 sys_get_robust_list
        data8 sys_sync_file_range               // 1300
+       data8 sys_tee
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
index 1424eab450ee915322478f21ab00cefb5fd8a04b..a14c964038403fa6d6271967df95fa5727a41b1f 100644 (file)
@@ -323,3 +323,4 @@ COMPAT_SYS(pselect6)
 COMPAT_SYS(ppoll)
 SYSCALL(unshare)
 SYSCALL(splice)
+SYSCALL(tee)
index e984beb93a0ea88c436037f6e17c0edfbad37d54..7fefb10db8d9d6ad2a187d3c352cd0557beb30f5 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -131,12 +131,19 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
        return 0;
 }
 
+static void anon_pipe_buf_get(struct pipe_inode_info *info,
+                             struct pipe_buffer *buf)
+{
+       page_cache_get(buf->page);
+}
+
 static struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
        .map = anon_pipe_buf_map,
        .unmap = anon_pipe_buf_unmap,
        .release = anon_pipe_buf_release,
        .steal = anon_pipe_buf_steal,
+       .get = anon_pipe_buf_get,
 };
 
 static ssize_t
index 5d3eda64703b10e703e225b1ecb9157fd1607866..8d57e89924a68990efa6a72d279dd35efa0fc7a7 100644 (file)
@@ -125,12 +125,19 @@ static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
        kunmap(buf->page);
 }
 
+static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
+                                   struct pipe_buffer *buf)
+{
+       page_cache_get(buf->page);
+}
+
 static struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
        .map = page_cache_pipe_buf_map,
        .unmap = page_cache_pipe_buf_unmap,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
+       .get = page_cache_pipe_buf_get,
 };
 
 /*
@@ -963,3 +970,182 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 
        return error;
 }
+
+/*
+ * Link contents of ipipe to opipe.
+ */
+static int link_pipe(struct pipe_inode_info *ipipe,
+                    struct pipe_inode_info *opipe,
+                    size_t len, unsigned int flags)
+{
+       struct pipe_buffer *ibuf, *obuf;
+       int ret = 0, do_wakeup = 0, i;
+
+       /*
+        * Potential ABBA deadlock, work around it by ordering lock
+        * grabbing by inode address. Otherwise two different processes
+        * could deadlock (one doing tee from A -> B, the other from B -> A).
+        */
+       if (ipipe->inode < opipe->inode) {
+               mutex_lock(&ipipe->inode->i_mutex);
+               mutex_lock(&opipe->inode->i_mutex);
+       } else {
+               mutex_lock(&opipe->inode->i_mutex);
+               mutex_lock(&ipipe->inode->i_mutex);
+       }
+
+       for (i = 0;; i++) {
+               if (!opipe->readers) {
+                       send_sig(SIGPIPE, current, 0);
+                       if (!ret)
+                               ret = -EPIPE;
+                       break;
+               }
+               if (ipipe->nrbufs - i) {
+                       ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
+
+                       /*
+                        * If we have room, fill this buffer
+                        */
+                       if (opipe->nrbufs < PIPE_BUFFERS) {
+                               int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
+
+                               /*
+                                * Get a reference to this pipe buffer,
+                                * so we can copy the contents over.
+                                */
+                               ibuf->ops->get(ipipe, ibuf);
+
+                               obuf = opipe->bufs + nbuf;
+                               *obuf = *ibuf;
+
+                               if (obuf->len > len)
+                                       obuf->len = len;
+
+                               opipe->nrbufs++;
+                               do_wakeup = 1;
+                               ret += obuf->len;
+                               len -= obuf->len;
+
+                               if (!len)
+                                       break;
+                               if (opipe->nrbufs < PIPE_BUFFERS)
+                                       continue;
+                       }
+
+                       /*
+                        * We have input available, but no output room.
+                        * If we already copied data, return that.
+                        */
+                       if (flags & SPLICE_F_NONBLOCK) {
+                               if (!ret)
+                                       ret = -EAGAIN;
+                               break;
+                       }
+                       if (signal_pending(current)) {
+                               if (!ret)
+                                       ret = -ERESTARTSYS;
+                               break;
+                       }
+                       if (do_wakeup) {
+                               smp_mb();
+                               if (waitqueue_active(&opipe->wait))
+                                       wake_up_interruptible(&opipe->wait);
+                               kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+                               do_wakeup = 0;
+                       }
+
+                       opipe->waiting_writers++;
+                       pipe_wait(opipe);
+                       opipe->waiting_writers--;
+                       continue;
+               }
+
+               /*
+                * No input buffers, do the usual checks for available
+                * writers and blocking and wait if necessary
+                */
+               if (!ipipe->writers)
+                       break;
+               if (!ipipe->waiting_writers) {
+                       if (ret)
+                               break;
+               }
+               if (flags & SPLICE_F_NONBLOCK) {
+                       if (!ret)
+                               ret = -EAGAIN;
+                       break;
+               }
+               if (signal_pending(current)) {
+                       if (!ret)
+                               ret = -ERESTARTSYS;
+                       break;
+               }
+
+               if (waitqueue_active(&ipipe->wait))
+                       wake_up_interruptible_sync(&ipipe->wait);
+               kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+
+               pipe_wait(ipipe);
+       }
+
+       mutex_unlock(&ipipe->inode->i_mutex);
+       mutex_unlock(&opipe->inode->i_mutex);
+
+       if (do_wakeup) {
+               smp_mb();
+               if (waitqueue_active(&opipe->wait))
+                       wake_up_interruptible(&opipe->wait);
+               kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
+       }
+
+       return ret;
+}
+
+/*
+ * This is a tee(1) implementation that works on pipes. It doesn't copy
+ * any data, it simply references the 'in' pages on the 'out' pipe.
+ * The 'flags' used are the SPLICE_F_* variants, currently the only
+ * applicable one is SPLICE_F_NONBLOCK.
+ */
+static long do_tee(struct file *in, struct file *out, size_t len,
+                  unsigned int flags)
+{
+       struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
+       struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+
+       /*
+        * Link ipipe to the two output pipes, consuming as we go along.
+        */
+       if (ipipe && opipe)
+               return link_pipe(ipipe, opipe, len, flags);
+
+       return -EINVAL;
+}
+
+asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
+{
+       struct file *in;
+       int error, fput_in;
+
+       if (unlikely(!len))
+               return 0;
+
+       error = -EBADF;
+       in = fget_light(fdin, &fput_in);
+       if (in) {
+               if (in->f_mode & FMODE_READ) {
+                       int fput_out;
+                       struct file *out = fget_light(fdout, &fput_out);
+
+                       if (out) {
+                               if (out->f_mode & FMODE_WRITE)
+                                       error = do_tee(in, out, len, flags);
+                               fput_light(out, fput_out);
+                       }
+               }
+               fput_light(in, fput_in);
+       }
+
+       return error;
+}
index 6a8dd83c350f9cbc3e2c5fdf8e6a489a4326e0da..d81d6cfc1bb4a81f40b213cab6a14eed67f371c9 100644 (file)
 #define __NR_get_robust_list   312
 #define __NR_splice            313
 #define __NR_sync_file_range   314
+#define __NR_tee               315
 
-#define NR_syscalls 315
+#define NR_syscalls 316
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
index 1c749acca021b7581f91ddecfc83c9b5a16efa0b..a40ebec6aeebe234a91dc84767e266177a9c058d 100644 (file)
 #define __NR_set_robust_list           1298
 #define __NR_get_robust_list           1299
 #define __NR_sync_file_range           1300
+#define __NR_tee                       1301
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls                    277 /* length of syscall table */
+#define NR_syscalls                    278 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
index 536ba0873052c92be7e314446f69b1ae8b5b2e0c..c612f1a6277240edd9ae5526fe1678415fd2f48b 100644 (file)
 #define __NR_ppoll             281
 #define __NR_unshare           282
 #define __NR_splice            283
+#define __NR_tee               284
 
-#define __NR_syscalls          284
+#define __NR_syscalls          285
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
index f21ff2c1e960bfcafeb735b058857c14f0529a2e..d86494e23b638460557133d1f388f5c18030b17b 100644 (file)
@@ -611,8 +611,10 @@ __SYSCALL(__NR_set_robust_list, sys_set_robust_list)
 __SYSCALL(__NR_get_robust_list, sys_get_robust_list)
 #define __NR_splice            275
 __SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee               276
+__SYSCALL(__NR_tee, sys_tee)
 
-#define __NR_syscall_max __NR_splice
+#define __NR_syscall_max __NR_tee
 
 #ifndef __NO_STUBS
 
index 123a7c24bc72b18c6192223a703b1cd8711c204c..ef7f33c0be19190590cc205f5828ba1fc6ea9170 100644 (file)
@@ -21,6 +21,7 @@ struct pipe_buf_operations {
        void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
        void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
        int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
+       void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 struct pipe_inode_info {
index f001bad28d9acc06941a1044e9a86f8d10bce910..d3ebc0e68b2b4cbc20d847cc86598a6e12bedaa0 100644 (file)
@@ -574,6 +574,8 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
                           int fd_out, loff_t __user *off_out,
                           size_t len, unsigned int flags);
 
+asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
+
 asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
                                        unsigned int flags);