blk-mq: really fix plug list flushing for nomerge queues
[linux-2.6-block.git] / fs / orangefs / dir.c
CommitLineData
5db11c21
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7#include "protocol.h"
575e9461
MM
8#include "orangefs-kernel.h"
9#include "orangefs-bufmap.h"
5db11c21 10
5db11c21 11/*
1808f8cc
MM
12 * decode routine used by kmod to deal with the blob sent from
13 * userspace for readdirs. The blob contains zero or more of these
14 * sub-blobs:
15 * __u32 - represents length of the character string that follows.
16 * string - between 1 and ORANGEFS_NAME_MAX bytes long.
17 * padding - (if needed) to cause the __u32 plus the string to be
18 * eight byte aligned.
19 * khandle - sizeof(khandle) bytes.
5db11c21 20 */
8092895f 21static long decode_dirents(char *ptr, size_t size,
8bb8aefd 22 struct orangefs_readdir_response_s *readdir)
5db11c21
MM
23{
24 int i;
8bb8aefd
YL
25 struct orangefs_readdir_response_s *rd =
26 (struct orangefs_readdir_response_s *) ptr;
5db11c21 27 char *buf = ptr;
1808f8cc
MM
28 int khandle_size = sizeof(struct orangefs_khandle);
29 size_t offset = offsetof(struct orangefs_readdir_response_s,
30 dirent_array);
31 /* 8 reflects eight byte alignment */
32 int smallest_blob = khandle_size + 8;
33 __u32 len;
34 int aligned_len;
35 int sizeof_u32 = sizeof(__u32);
36 long ret;
5db11c21 37
1808f8cc
MM
38 gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
39
40 /* size is = offset on empty dirs, > offset on non-empty dirs... */
41 if (size < offset) {
42 gossip_err("%s: size:%zu: offset:%zu:\n",
43 __func__,
44 size,
45 offset);
46 ret = -EINVAL;
47 goto out;
48 }
49
50 if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
51 gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
52 __func__,
53 size,
54 readdir->orangefs_dirent_outcount);
55 ret = -EINVAL;
56 goto out;
57 }
8092895f 58
5db11c21 59 readdir->token = rd->token;
8bb8aefd
YL
60 readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
61 readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
5db11c21
MM
62 sizeof(*readdir->dirent_array),
63 GFP_KERNEL);
1808f8cc
MM
64 if (readdir->dirent_array == NULL) {
65 gossip_err("%s: kcalloc failed.\n", __func__);
66 ret = -ENOMEM;
67 goto out;
68 }
8092895f 69
1808f8cc
MM
70 buf += offset;
71 size -= offset;
8092895f 72
8bb8aefd 73 for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
1808f8cc
MM
74 if (size < smallest_blob) {
75 gossip_err("%s: size:%zu: smallest_blob:%d:\n",
76 __func__,
77 size,
78 smallest_blob);
79 ret = -EINVAL;
80 goto free;
81 }
8092895f
AV
82
83 len = *(__u32 *)buf;
1808f8cc
MM
84 if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
85 gossip_err("%s: len:%d:\n", __func__, len);
86 ret = -EINVAL;
87 goto free;
88 }
89
90 gossip_debug(GOSSIP_DIR_DEBUG,
91 "%s: size:%zu: len:%d:\n",
92 __func__,
93 size,
94 len);
95
96 readdir->dirent_array[i].d_name = buf + sizeof_u32;
9be68b08 97 readdir->dirent_array[i].d_length = len;
8092895f 98
7d221485 99 /*
1808f8cc
MM
100 * Calculate "aligned" length of this string and its
101 * associated __u32 descriptor.
102 */
103 aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
104 gossip_debug(GOSSIP_DIR_DEBUG,
105 "%s: aligned_len:%d:\n",
106 __func__,
107 aligned_len);
108
109 /*
110 * The end of the blob should coincide with the end
111 * of the last sub-blob.
7d221485 112 */
1808f8cc
MM
113 if (size < aligned_len + khandle_size) {
114 gossip_err("%s: ran off the end of the blob.\n",
115 __func__);
116 ret = -EINVAL;
117 goto free;
118 }
119 size -= aligned_len + khandle_size;
8092895f 120
1808f8cc 121 buf += aligned_len;
8092895f 122
5db11c21 123 readdir->dirent_array[i].khandle =
8bb8aefd 124 *(struct orangefs_khandle *) buf;
1808f8cc 125 buf += khandle_size;
5db11c21 126 }
1808f8cc
MM
127 ret = buf - ptr;
128 gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
129 goto out;
130
131free:
8092895f
AV
132 kfree(readdir->dirent_array);
133 readdir->dirent_array = NULL;
1808f8cc
MM
134
135out:
136 return ret;
5db11c21
MM
137}
138
5db11c21
MM
139/*
140 * Read directory entries from an instance of an open directory.
5db11c21 141 */
8bb8aefd 142static int orangefs_readdir(struct file *file, struct dir_context *ctx)
5db11c21 143{
5db11c21
MM
144 int ret = 0;
145 int buffer_index;
88309aae
MM
146 /*
147 * ptoken supports Orangefs' distributed directory logic, added
148 * in 2.9.2.
149 */
5db11c21
MM
150 __u64 *ptoken = file->private_data;
151 __u64 pos = 0;
152 ino_t ino = 0;
153 struct dentry *dentry = file->f_path.dentry;
8bb8aefd
YL
154 struct orangefs_kernel_op_s *new_op = NULL;
155 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
9f5e2f7f
AV
156 struct orangefs_readdir_response_s readdir_response;
157 void *dents_buf;
5db11c21
MM
158 int i = 0;
159 int len = 0;
160 ino_t current_ino = 0;
161 char *current_entry = NULL;
162 long bytes_decoded;
163
88309aae
MM
164 gossip_debug(GOSSIP_DIR_DEBUG,
165 "%s: ctx->pos:%lld, ptoken = %llu\n",
166 __func__,
167 lld(ctx->pos),
168 llu(*ptoken));
5db11c21
MM
169
170 pos = (__u64) ctx->pos;
171
172 /* are we done? */
8bb8aefd 173 if (pos == ORANGEFS_READDIR_END) {
5db11c21
MM
174 gossip_debug(GOSSIP_DIR_DEBUG,
175 "Skipping to termination path\n");
176 return 0;
177 }
178
179 gossip_debug(GOSSIP_DIR_DEBUG,
8bb8aefd 180 "orangefs_readdir called on %s (pos=%llu)\n",
5db11c21
MM
181 dentry->d_name.name, llu(pos));
182
9f5e2f7f 183 memset(&readdir_response, 0, sizeof(readdir_response));
5db11c21 184
8bb8aefd 185 new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
5db11c21
MM
186 if (!new_op)
187 return -ENOMEM;
188
ee3b8d37
MB
189 /*
190 * Only the indices are shared. No memory is actually shared, but the
191 * mechanism is used.
192 */
5db11c21 193 new_op->uses_shared_memory = 1;
8bb8aefd 194 new_op->upcall.req.readdir.refn = orangefs_inode->refn;
7d221485
MB
195 new_op->upcall.req.readdir.max_dirent_count =
196 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
5db11c21
MM
197
198 gossip_debug(GOSSIP_DIR_DEBUG,
199 "%s: upcall.req.readdir.refn.khandle: %pU\n",
200 __func__,
201 &new_op->upcall.req.readdir.refn.khandle);
202
5db11c21
MM
203 new_op->upcall.req.readdir.token = *ptoken;
204
205get_new_buffer_index:
b8a99a8f
AV
206 buffer_index = orangefs_readdir_index_get();
207 if (buffer_index < 0) {
208 ret = buffer_index;
7d221485 209 gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
5db11c21
MM
210 ret);
211 goto out_free_op;
212 }
213 new_op->upcall.req.readdir.buf_index = buffer_index;
214
215 ret = service_operation(new_op,
8bb8aefd 216 "orangefs_readdir",
5db11c21
MM
217 get_interruptible_flag(dentry->d_inode));
218
219 gossip_debug(GOSSIP_DIR_DEBUG,
220 "Readdir downcall status is %d. ret:%d\n",
221 new_op->downcall.status,
222 ret);
223
ee3b8d37
MB
224 orangefs_readdir_index_put(buffer_index);
225
5db11c21 226 if (ret == -EAGAIN && op_state_purged(new_op)) {
ee3b8d37 227 /* Client-core indices are invalid after it restarted. */
5db11c21
MM
228 gossip_debug(GOSSIP_DIR_DEBUG,
229 "%s: Getting new buffer_index for retry of readdir..\n",
230 __func__);
5db11c21
MM
231 goto get_new_buffer_index;
232 }
233
234 if (ret == -EIO && op_state_purged(new_op)) {
235 gossip_err("%s: Client is down. Aborting readdir call.\n",
236 __func__);
641bb324 237 goto out_free_op;
5db11c21
MM
238 }
239
240 if (ret < 0 || new_op->downcall.status != 0) {
241 gossip_debug(GOSSIP_DIR_DEBUG,
242 "Readdir request failed. Status:%d\n",
243 new_op->downcall.status);
5db11c21
MM
244 if (ret >= 0)
245 ret = new_op->downcall.status;
641bb324 246 goto out_free_op;
9f5e2f7f
AV
247 }
248
249 dents_buf = new_op->downcall.trailer_buf;
250 if (dents_buf == NULL) {
251 gossip_err("Invalid NULL buffer in readdir response\n");
252 ret = -ENOMEM;
641bb324 253 goto out_free_op;
5db11c21
MM
254 }
255
9f5e2f7f
AV
256 bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
257 &readdir_response);
5db11c21 258 if (bytes_decoded < 0) {
5db11c21 259 ret = bytes_decoded;
9f5e2f7f
AV
260 gossip_err("Could not decode readdir from buffer %d\n", ret);
261 goto out_vfree;
5db11c21
MM
262 }
263
264 if (bytes_decoded != new_op->downcall.trailer_size) {
8bb8aefd 265 gossip_err("orangefs_readdir: # bytes decoded (%ld) "
88309aae
MM
266 "!= trailer size (%ld)\n",
267 bytes_decoded,
268 (long)new_op->downcall.trailer_size);
5db11c21
MM
269 ret = -EINVAL;
270 goto out_destroy_handle;
271 }
272
88309aae 273 /*
8bb8aefd 274 * orangefs doesn't actually store dot and dot-dot, but
88309aae
MM
275 * we need to have them represented.
276 */
5db11c21
MM
277 if (pos == 0) {
278 ino = get_ino_from_khandle(dentry->d_inode);
279 gossip_debug(GOSSIP_DIR_DEBUG,
280 "%s: calling dir_emit of \".\" with pos = %llu\n",
281 __func__,
282 llu(pos));
283 ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
88309aae 284 pos += 1;
5db11c21
MM
285 }
286
287 if (pos == 1) {
288 ino = get_parent_ino_from_dentry(dentry);
289 gossip_debug(GOSSIP_DIR_DEBUG,
290 "%s: calling dir_emit of \"..\" with pos = %llu\n",
291 __func__,
292 llu(pos));
293 ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
88309aae 294 pos += 1;
5db11c21
MM
295 }
296
88309aae 297 /*
8bb8aefd 298 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
88309aae
MM
299 * to prevent "finding" dot and dot-dot on any iteration
300 * other than the first.
301 */
8bb8aefd 302 if (ctx->pos == ORANGEFS_ITERATE_NEXT)
88309aae
MM
303 ctx->pos = 0;
304
cf07c0bf
MM
305 gossip_debug(GOSSIP_DIR_DEBUG,
306 "%s: dirent_outcount:%d:\n",
307 __func__,
9f5e2f7f 308 readdir_response.orangefs_dirent_outcount);
88309aae 309 for (i = ctx->pos;
9f5e2f7f 310 i < readdir_response.orangefs_dirent_outcount;
88309aae 311 i++) {
9f5e2f7f
AV
312 len = readdir_response.dirent_array[i].d_length;
313 current_entry = readdir_response.dirent_array[i].d_name;
8bb8aefd 314 current_ino = orangefs_khandle_to_ino(
9f5e2f7f 315 &readdir_response.dirent_array[i].khandle);
5db11c21
MM
316
317 gossip_debug(GOSSIP_DIR_DEBUG,
88309aae
MM
318 "calling dir_emit for %s with len %d"
319 ", ctx->pos %ld\n",
5db11c21
MM
320 current_entry,
321 len,
88309aae
MM
322 (unsigned long)ctx->pos);
323 /*
324 * type is unknown. We don't return object type
325 * in the dirent_array. This leaves getdents
326 * clueless about type.
327 */
5db11c21
MM
328 ret =
329 dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
88309aae
MM
330 if (!ret)
331 break;
5db11c21 332 ctx->pos++;
88309aae 333 gossip_debug(GOSSIP_DIR_DEBUG,
5db11c21
MM
334 "%s: ctx->pos:%lld\n",
335 __func__,
336 lld(ctx->pos));
337
5db11c21
MM
338 }
339
54804949 340 /*
88309aae
MM
341 * we ran all the way through the last batch, set up for
342 * getting another batch...
343 */
344 if (ret) {
9f5e2f7f 345 *ptoken = readdir_response.token;
8bb8aefd 346 ctx->pos = ORANGEFS_ITERATE_NEXT;
5db11c21
MM
347 }
348
349 /*
350 * Did we hit the end of the directory?
351 */
e56f4981 352 if (readdir_response.token == ORANGEFS_READDIR_END) {
88309aae 353 gossip_debug(GOSSIP_DIR_DEBUG,
8bb8aefd
YL
354 "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
355 ctx->pos = ORANGEFS_READDIR_END;
5db11c21
MM
356 }
357
5db11c21 358out_destroy_handle:
9f5e2f7f
AV
359 /* kfree(NULL) is safe */
360 kfree(readdir_response.dirent_array);
361out_vfree:
362 gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
363 vfree(dents_buf);
5db11c21
MM
364out_free_op:
365 op_release(new_op);
8bb8aefd 366 gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
5db11c21
MM
367 return ret;
368}
369
8bb8aefd 370static int orangefs_dir_open(struct inode *inode, struct file *file)
5db11c21
MM
371{
372 __u64 *ptoken;
373
374 file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
375 if (!file->private_data)
376 return -ENOMEM;
377
378 ptoken = file->private_data;
8bb8aefd 379 *ptoken = ORANGEFS_READDIR_START;
5db11c21
MM
380 return 0;
381}
382
8bb8aefd 383static int orangefs_dir_release(struct inode *inode, struct file *file)
5db11c21 384{
8bb8aefd 385 orangefs_flush_inode(inode);
5db11c21
MM
386 kfree(file->private_data);
387 return 0;
388}
389
8bb8aefd
YL
390/** ORANGEFS implementation of VFS directory operations */
391const struct file_operations orangefs_dir_operations = {
5db11c21 392 .read = generic_read_dir,
8bb8aefd
YL
393 .iterate = orangefs_readdir,
394 .open = orangefs_dir_open,
395 .release = orangefs_dir_release,
5db11c21 396};