Orangefs: kernel client part 2
[linux-2.6-block.git] / fs / orangefs / dir.c
CommitLineData
5db11c21
MM
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7#include "protocol.h"
8#include "pvfs2-kernel.h"
9#include "pvfs2-bufmap.h"
10
11struct readdir_handle_s {
12 int buffer_index;
13 struct pvfs2_readdir_response_s readdir_response;
14 void *dents_buf;
15};
16
17/*
18 * decode routine needed by kmod to make sense of the shared page for readdirs.
19 */
20static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir)
21{
22 int i;
23 struct pvfs2_readdir_response_s *rd =
24 (struct pvfs2_readdir_response_s *) ptr;
25 char *buf = ptr;
26 char **pptr = &buf;
27
28 readdir->token = rd->token;
29 readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount;
30 readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount *
31 sizeof(*readdir->dirent_array),
32 GFP_KERNEL);
33 if (readdir->dirent_array == NULL)
34 return -ENOMEM;
35 *pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array);
36 for (i = 0; i < readdir->pvfs_dirent_outcount; i++) {
37 dec_string(pptr, &readdir->dirent_array[i].d_name,
38 &readdir->dirent_array[i].d_length);
39 readdir->dirent_array[i].khandle =
40 *(struct pvfs2_khandle *) *pptr;
41 *pptr += 16;
42 }
43 return (unsigned long)*pptr - (unsigned long)ptr;
44}
45
46static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf,
47 int buffer_index)
48{
49 long ret;
50
51 if (buf == NULL) {
52 gossip_err
53 ("Invalid NULL buffer specified in readdir_handle_ctor\n");
54 return -ENOMEM;
55 }
56 if (buffer_index < 0) {
57 gossip_err
58 ("Invalid buffer index specified in readdir_handle_ctor\n");
59 return -EINVAL;
60 }
61 rhandle->buffer_index = buffer_index;
62 rhandle->dents_buf = buf;
63 ret = decode_dirents(buf, &rhandle->readdir_response);
64 if (ret < 0) {
65 gossip_err("Could not decode readdir from buffer %ld\n", ret);
66 rhandle->buffer_index = -1;
67 gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf);
68 vfree(buf);
69 rhandle->dents_buf = NULL;
70 }
71 return ret;
72}
73
74static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap,
75 struct readdir_handle_s *rhandle)
76{
77 if (rhandle == NULL)
78 return;
79
80 /* kfree(NULL) is safe */
81 kfree(rhandle->readdir_response.dirent_array);
82 rhandle->readdir_response.dirent_array = NULL;
83
84 if (rhandle->buffer_index >= 0) {
85 readdir_index_put(bufmap, rhandle->buffer_index);
86 rhandle->buffer_index = -1;
87 }
88 if (rhandle->dents_buf) {
89 gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n",
90 rhandle->dents_buf);
91 vfree(rhandle->dents_buf);
92 rhandle->dents_buf = NULL;
93 }
94}
95
96/*
97 * Read directory entries from an instance of an open directory.
98 *
99 * \note This routine was converted for the readdir to iterate change
100 * in "struct file_operations". "converted" mostly amounts to
101 * changing occurrences of "readdir" and "filldir" in the
102 * comments to "iterate" and "dir_emit". Also filldir calls
103 * were changed to dir_emit calls.
104 *
105 * \param dir_emit callback function called for each entry read.
106 *
107 * \retval <0 on error
108 * \retval 0 when directory has been completely traversed
109 * \retval >0 if we don't call dir_emit for all entries
110 *
111 * \note If the dir_emit call-back returns non-zero, then iterate should
112 * assume that it has had enough, and should return as well.
113 */
114static int pvfs2_readdir(struct file *file, struct dir_context *ctx)
115{
116 struct pvfs2_bufmap *bufmap = NULL;
117 int ret = 0;
118 int buffer_index;
119 __u64 *ptoken = file->private_data;
120 __u64 pos = 0;
121 ino_t ino = 0;
122 struct dentry *dentry = file->f_path.dentry;
123 struct pvfs2_kernel_op_s *new_op = NULL;
124 struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode);
125 int buffer_full = 0;
126 struct readdir_handle_s rhandle;
127 int i = 0;
128 int len = 0;
129 ino_t current_ino = 0;
130 char *current_entry = NULL;
131 long bytes_decoded;
132
133 gossip_ldebug(GOSSIP_DIR_DEBUG,
134 "%s: ctx->pos:%lld, token = %llu\n",
135 __func__,
136 lld(ctx->pos),
137 llu(*ptoken));
138
139 pos = (__u64) ctx->pos;
140
141 /* are we done? */
142 if (pos == PVFS_READDIR_END) {
143 gossip_debug(GOSSIP_DIR_DEBUG,
144 "Skipping to termination path\n");
145 return 0;
146 }
147
148 gossip_debug(GOSSIP_DIR_DEBUG,
149 "pvfs2_readdir called on %s (pos=%llu)\n",
150 dentry->d_name.name, llu(pos));
151
152 rhandle.buffer_index = -1;
153 rhandle.dents_buf = NULL;
154 memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response));
155
156 new_op = op_alloc(PVFS2_VFS_OP_READDIR);
157 if (!new_op)
158 return -ENOMEM;
159
160 new_op->uses_shared_memory = 1;
161 new_op->upcall.req.readdir.refn = pvfs2_inode->refn;
162 new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR;
163
164 gossip_debug(GOSSIP_DIR_DEBUG,
165 "%s: upcall.req.readdir.refn.khandle: %pU\n",
166 __func__,
167 &new_op->upcall.req.readdir.refn.khandle);
168
169 /*
170 * NOTE: the position we send to the readdir upcall is out of
171 * sync with ctx->pos since:
172 * 1. pvfs2 doesn't include the "." and ".." entries that are
173 * added below.
174 * 2. the introduction of distributed directory logic makes token no
175 * longer be related to f_pos and pos. Instead an independent
176 * variable is used inside the function and stored in the
177 * private_data of the file structure.
178 */
179 new_op->upcall.req.readdir.token = *ptoken;
180
181get_new_buffer_index:
182 ret = readdir_index_get(&bufmap, &buffer_index);
183 if (ret < 0) {
184 gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n",
185 ret);
186 goto out_free_op;
187 }
188 new_op->upcall.req.readdir.buf_index = buffer_index;
189
190 ret = service_operation(new_op,
191 "pvfs2_readdir",
192 get_interruptible_flag(dentry->d_inode));
193
194 gossip_debug(GOSSIP_DIR_DEBUG,
195 "Readdir downcall status is %d. ret:%d\n",
196 new_op->downcall.status,
197 ret);
198
199 if (ret == -EAGAIN && op_state_purged(new_op)) {
200 /*
201 * readdir shared memory aread has been wiped due to
202 * pvfs2-client-core restarting, so we must get a new
203 * index into the shared memory.
204 */
205 gossip_debug(GOSSIP_DIR_DEBUG,
206 "%s: Getting new buffer_index for retry of readdir..\n",
207 __func__);
208 readdir_index_put(bufmap, buffer_index);
209 goto get_new_buffer_index;
210 }
211
212 if (ret == -EIO && op_state_purged(new_op)) {
213 gossip_err("%s: Client is down. Aborting readdir call.\n",
214 __func__);
215 readdir_index_put(bufmap, buffer_index);
216 goto out_free_op;
217 }
218
219 if (ret < 0 || new_op->downcall.status != 0) {
220 gossip_debug(GOSSIP_DIR_DEBUG,
221 "Readdir request failed. Status:%d\n",
222 new_op->downcall.status);
223 readdir_index_put(bufmap, buffer_index);
224 if (ret >= 0)
225 ret = new_op->downcall.status;
226 goto out_free_op;
227 }
228
229 bytes_decoded =
230 readdir_handle_ctor(&rhandle,
231 new_op->downcall.trailer_buf,
232 buffer_index);
233 if (bytes_decoded < 0) {
234 gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n",
235 ret);
236 ret = bytes_decoded;
237 readdir_index_put(bufmap, buffer_index);
238 goto out_free_op;
239 }
240
241 if (bytes_decoded != new_op->downcall.trailer_size) {
242 gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n",
243 bytes_decoded,
244 (long)new_op->downcall.trailer_size);
245 ret = -EINVAL;
246 goto out_destroy_handle;
247 }
248
249 if (pos == 0) {
250 ino = get_ino_from_khandle(dentry->d_inode);
251 gossip_debug(GOSSIP_DIR_DEBUG,
252 "%s: calling dir_emit of \".\" with pos = %llu\n",
253 __func__,
254 llu(pos));
255 ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
256 if (ret < 0)
257 goto out_destroy_handle;
258 ctx->pos++;
259 gossip_ldebug(GOSSIP_DIR_DEBUG,
260 "%s: ctx->pos:%lld\n",
261 __func__,
262 lld(ctx->pos));
263 pos++;
264 }
265
266 if (pos == 1) {
267 ino = get_parent_ino_from_dentry(dentry);
268 gossip_debug(GOSSIP_DIR_DEBUG,
269 "%s: calling dir_emit of \"..\" with pos = %llu\n",
270 __func__,
271 llu(pos));
272 ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
273 if (ret < 0)
274 goto out_destroy_handle;
275 ctx->pos++;
276 gossip_ldebug(GOSSIP_DIR_DEBUG,
277 "%s: ctx->pos:%lld\n",
278 __func__,
279 lld(ctx->pos));
280 pos++;
281 }
282
283 for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) {
284 len = rhandle.readdir_response.dirent_array[i].d_length;
285 current_entry = rhandle.readdir_response.dirent_array[i].d_name;
286 current_ino = pvfs2_khandle_to_ino(
287 &(rhandle.readdir_response.dirent_array[i].khandle));
288
289 gossip_debug(GOSSIP_DIR_DEBUG,
290 "calling dir_emit for %s with len %d, pos %ld\n",
291 current_entry,
292 len,
293 (unsigned long)pos);
294 ret =
295 dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
296 if (ret < 0) {
297 gossip_debug(GOSSIP_DIR_DEBUG,
298 "dir_emit() failed. ret:%d\n",
299 ret);
300 if (i < 2) {
301 gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n");
302 gossip_err("Duplicate entries may appear.\n");
303 }
304 buffer_full = 1;
305 break;
306 }
307 ctx->pos++;
308 gossip_ldebug(GOSSIP_DIR_DEBUG,
309 "%s: ctx->pos:%lld\n",
310 __func__,
311 lld(ctx->pos));
312
313 pos++;
314 }
315
316 /* this means that all of the dir_emit calls succeeded */
317 if (i == rhandle.readdir_response.pvfs_dirent_outcount) {
318 /* update token */
319 *ptoken = rhandle.readdir_response.token;
320 } else {
321 /* this means a dir_emit call failed */
322 if (rhandle.readdir_response.token == PVFS_READDIR_END) {
323 /*
324 * If PVFS hit end of directory, then there
325 * is no way to do math on the token that it
326 * returned. Instead we go by ctx->pos but
327 * back up to account for the artificial .
328 * and .. entries.
329 */
330 ctx->pos -= 3;
331 } else {
332 /*
333 * this means a dir_emit call failed. !!! need to set
334 * back to previous ctx->pos, no middle value allowed
335 */
336 pos -= (i - 1);
337 ctx->pos -= (i - 1);
338 }
339 gossip_debug(GOSSIP_DIR_DEBUG,
340 "at least one dir_emit call failed. Setting ctx->pos to: %lld\n",
341 lld(ctx->pos));
342 }
343
344 /*
345 * Did we hit the end of the directory?
346 */
347 if (rhandle.readdir_response.token == PVFS_READDIR_END &&
348 !buffer_full) {
349 gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n");
350 ctx->pos = PVFS_READDIR_END;
351 }
352
353 gossip_debug(GOSSIP_DIR_DEBUG,
354 "pos = %llu, token = %llu"
355 ", ctx->pos should have been %lld\n",
356 llu(pos),
357 llu(*ptoken),
358 lld(ctx->pos));
359
360out_destroy_handle:
361 readdir_handle_dtor(bufmap, &rhandle);
362out_free_op:
363 op_release(new_op);
364 gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret);
365 return ret;
366}
367
368static int pvfs2_dir_open(struct inode *inode, struct file *file)
369{
370 __u64 *ptoken;
371
372 file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
373 if (!file->private_data)
374 return -ENOMEM;
375
376 ptoken = file->private_data;
377 *ptoken = PVFS_READDIR_START;
378 return 0;
379}
380
381static int pvfs2_dir_release(struct inode *inode, struct file *file)
382{
383 pvfs2_flush_inode(inode);
384 kfree(file->private_data);
385 return 0;
386}
387
388/** PVFS2 implementation of VFS directory operations */
389const struct file_operations pvfs2_dir_operations = {
390 .read = generic_read_dir,
391 .iterate = pvfs2_readdir,
392 .open = pvfs2_dir_open,
393 .release = pvfs2_dir_release,
394};