orangefs: support llseek on directories
[linux-2.6-block.git] / fs / orangefs / dir.c
CommitLineData
5db11c21 1/*
382f4581 2 * Copyright 2017 Omnibond Systems, L.L.C.
5db11c21
MM
3 */
4
5#include "protocol.h"
575e9461
MM
6#include "orangefs-kernel.h"
7#include "orangefs-bufmap.h"
5db11c21 8
5db11c21 9/*
382f4581
MB
10 * There can be up to 512 directory entries. Each entry is encoded as
11 * follows:
12 * 4 bytes: string size (n)
13 * n bytes: string
14 * 1 byte: trailing zero
15 * padding to 8 bytes
16 * 16 bytes: khandle
17 * padding to 8 bytes
5db11c21 18 */
382f4581 19#define MAX_DIRECTORY ((4 + 257 + 3 + 16)*512)
1808f8cc 20
382f4581
MB
21struct orangefs_dir {
22 __u64 token;
23 void *directory;
72f66b83 24 size_t len;
382f4581
MB
25 int error;
26};
5db11c21 27
5db11c21 28/*
382f4581
MB
29 * The userspace component sends several directory entries of the
30 * following format. The first four bytes are the string length not
31 * including a trailing zero byte. This is followed by the string and a
32 * trailing zero padded to the next four byte boundry. This is followed
33 * by the sixteen byte khandle padded to the next eight byte boundry.
34 *
35 * The trailer_buf starts with a struct orangefs_readdir_response_s
36 * which must be skipped to get to the directory data.
5db11c21 37 */
5db11c21 38
382f4581
MB
39static int orangefs_dir_more(struct orangefs_inode_s *oi,
40 struct orangefs_dir *od, struct dentry *dentry)
41{
42 const size_t offset =
43 sizeof(struct orangefs_readdir_response_s);
44 struct orangefs_readdir_response_s *resp;
45 struct orangefs_kernel_op_s *op;
46 int bufi, r;
47
48 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
49 if (!op) {
50 od->error = -ENOMEM;
5db11c21 51 return -ENOMEM;
382f4581 52 }
5db11c21 53
ee3b8d37 54 /*
382f4581
MB
55 * Despite the badly named field, readdir does not use shared
56 * memory. However, there are a limited number of readdir
57 * slots, which must be allocated here. This flag simply tells
58 * the op scheduler to return the op here for retry.
ee3b8d37 59 */
382f4581
MB
60 op->uses_shared_memory = 1;
61 op->upcall.req.readdir.refn = oi->refn;
62 op->upcall.req.readdir.token = od->token;
63 op->upcall.req.readdir.max_dirent_count =
7d221485 64 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
5db11c21 65
382f4581
MB
66again:
67 bufi = orangefs_readdir_index_get();
68 if (bufi < 0) {
69 op_release(op);
70 od->error = bufi;
71 return bufi;
5db11c21 72 }
5db11c21 73
382f4581 74 op->upcall.req.readdir.buf_index = bufi;
5db11c21 75
382f4581
MB
76 r = service_operation(op, "orangefs_readdir",
77 get_interruptible_flag(dentry->d_inode));
5db11c21 78
382f4581 79 orangefs_readdir_index_put(bufi);
ee3b8d37 80
382f4581
MB
81 if (op_state_purged(op)) {
82 if (r == -EAGAIN) {
83 vfree(op->downcall.trailer_buf);
84 goto again;
85 } else if (r == -EIO) {
86 vfree(op->downcall.trailer_buf);
87 op_release(op);
88 od->error = r;
89 return r;
90 }
5db11c21
MM
91 }
92
382f4581
MB
93 if (r < 0) {
94 vfree(op->downcall.trailer_buf);
95 op_release(op);
96 od->error = r;
97 return r;
98 } else if (op->downcall.status) {
99 vfree(op->downcall.trailer_buf);
100 op_release(op);
101 od->error = op->downcall.status;
102 return op->downcall.status;
103 }
104
105 resp = (struct orangefs_readdir_response_s *)
106 op->downcall.trailer_buf;
107 od->token = resp->token;
108
109 if (od->len + op->downcall.trailer_size - offset <=
110 MAX_DIRECTORY) {
111 memcpy(od->directory + od->len,
112 op->downcall.trailer_buf + offset,
113 op->downcall.trailer_size - offset);
114 od->len += op->downcall.trailer_size - offset;
115 } else {
116 /* This limit was chosen based on protocol limits. */
117 gossip_err("orangefs_dir_more: userspace sent too much data\n");
118 vfree(op->downcall.trailer_buf);
119 op_release(op);
120 od->error = -EIO;
121 return -EIO;
122 }
123
124 vfree(op->downcall.trailer_buf);
125 op_release(op);
126 return 0;
127}
9f5e2f7f 128
382f4581
MB
129static int orangefs_dir_fill(struct orangefs_inode_s *oi,
130 struct orangefs_dir *od, struct dentry *dentry,
131 struct dir_context *ctx)
132{
133 struct orangefs_khandle *khandle;
134 __u32 *len, padlen;
72f66b83 135 loff_t i;
382f4581 136 char *s;
72f66b83
MB
137 i = ctx->pos - 2;
138 while (i < od->len) {
139 if (od->len < i + sizeof *len)
382f4581 140 goto eio;
72f66b83 141 len = od->directory + i;
382f4581
MB
142 /*
143 * len is the size of the string itself. padlen is the
144 * total size of the encoded string.
145 */
146 padlen = (sizeof *len + *len + 1) +
147 (4 - (sizeof *len + *len + 1)%8)%8;
72f66b83 148 if (od->len < i + padlen + sizeof *khandle)
382f4581 149 goto eio;
72f66b83 150 s = od->directory + i + sizeof *len;
382f4581
MB
151 if (s[*len] != 0)
152 goto eio;
72f66b83 153 khandle = od->directory + i + padlen;
382f4581
MB
154
155 if (!dir_emit(ctx, s, *len,
156 orangefs_khandle_to_ino(khandle), DT_UNKNOWN))
157 return 0;
72f66b83
MB
158 i += padlen + sizeof *khandle;
159 i = i + (8 - i%8)%8;
160 ctx->pos = i + 2;
382f4581 161 }
72f66b83 162 BUG_ON(i > od->len);
382f4581
MB
163 return 0;
164eio:
72f66b83
MB
165 /*
166 * Here either data from userspace is corrupt or the application
167 * has sought to an invalid location.
168 */
382f4581
MB
169 od->error = -EIO;
170 return -EIO;
171}
5db11c21 172
382f4581
MB
173static int orangefs_dir_iterate(struct file *file,
174 struct dir_context *ctx)
175{
176 struct orangefs_inode_s *oi;
177 struct orangefs_dir *od;
178 struct dentry *dentry;
179 int r;
5db11c21 180
382f4581
MB
181 dentry = file->f_path.dentry;
182 oi = ORANGEFS_I(dentry->d_inode);
183 od = file->private_data;
5db11c21 184
382f4581
MB
185 if (od->error)
186 return od->error;
5db11c21 187
382f4581
MB
188 if (ctx->pos == 0) {
189 if (!dir_emit_dot(file, ctx))
190 return 0;
191 ctx->pos++;
5db11c21 192 }
382f4581
MB
193 if (ctx->pos == 1) {
194 if (!dir_emit_dotdot(file, ctx))
195 return 0;
5db11c21 196 ctx->pos++;
5db11c21
MM
197 }
198
382f4581
MB
199 r = 0;
200
72f66b83
MB
201 /*
202 * Must read more if the user has sought past what has been read
203 * so far. Stop a user who has sought past the end.
204 */
205 while (od->token != ORANGEFS_READDIR_END && ctx->pos - 2 >
206 od->len) {
207 r = orangefs_dir_more(oi, od, dentry);
208 if (r)
209 return r;
210 }
211 if (od->token == ORANGEFS_READDIR_END && ctx->pos - 2 >
212 od->len) {
213 return -EIO;
214 }
215
216 /* Then try to fill if there's any left in the buffer. */
217 if (ctx->pos - 2 < od->len) {
382f4581
MB
218 r = orangefs_dir_fill(oi, od, dentry, ctx);
219 if (r)
220 return r;
5db11c21
MM
221 }
222
72f66b83 223 /* Finally get some more and try to fill. */
382f4581
MB
224 if (od->token != ORANGEFS_READDIR_END) {
225 r = orangefs_dir_more(oi, od, dentry);
226 if (r)
227 return r;
228 r = orangefs_dir_fill(oi, od, dentry, ctx);
5db11c21
MM
229 }
230
382f4581 231 return r;
5db11c21
MM
232}
233
8bb8aefd 234static int orangefs_dir_open(struct inode *inode, struct file *file)
5db11c21 235{
382f4581
MB
236 struct orangefs_dir *od;
237 file->private_data = kmalloc(sizeof(struct orangefs_dir),
238 GFP_KERNEL);
5db11c21
MM
239 if (!file->private_data)
240 return -ENOMEM;
382f4581
MB
241 od = file->private_data;
242 od->token = ORANGEFS_READDIR_START;
243 /*
244 * XXX: It seems wasteful to allocate such a large buffer for
245 * each request. Most will be much smaller.
246 */
247 od->directory = alloc_pages_exact(MAX_DIRECTORY, GFP_KERNEL);
248 if (!od->directory) {
249 kfree(file->private_data);
250 return -ENOMEM;
251 }
382f4581
MB
252 od->len = 0;
253 od->error = 0;
5db11c21
MM
254 return 0;
255}
256
8bb8aefd 257static int orangefs_dir_release(struct inode *inode, struct file *file)
5db11c21 258{
382f4581 259 struct orangefs_dir *od = file->private_data;
8bb8aefd 260 orangefs_flush_inode(inode);
382f4581
MB
261 free_pages_exact(od->directory, MAX_DIRECTORY);
262 kfree(od);
5db11c21
MM
263 return 0;
264}
265
8bb8aefd 266const struct file_operations orangefs_dir_operations = {
72f66b83 267 .llseek = default_llseek,
5db11c21 268 .read = generic_read_dir,
382f4581 269 .iterate = orangefs_dir_iterate,
8bb8aefd 270 .open = orangefs_dir_open,
382f4581 271 .release = orangefs_dir_release
5db11c21 272};