Commit | Line | Data |
---|---|---|
5db11c21 MM |
1 | /* |
2 | * (C) 2001 Clemson University and The University of Chicago | |
3 | * | |
4 | * See COPYING in top-level directory. | |
5 | */ | |
6 | ||
7 | #include "protocol.h" | |
8 | #include "pvfs2-kernel.h" | |
9 | #include "pvfs2-bufmap.h" | |
10 | ||
11 | struct readdir_handle_s { | |
12 | int buffer_index; | |
13 | struct pvfs2_readdir_response_s readdir_response; | |
14 | void *dents_buf; | |
15 | }; | |
16 | ||
17 | /* | |
18 | * decode routine needed by kmod to make sense of the shared page for readdirs. | |
19 | */ | |
20 | static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) | |
21 | { | |
22 | int i; | |
23 | struct pvfs2_readdir_response_s *rd = | |
24 | (struct pvfs2_readdir_response_s *) ptr; | |
25 | char *buf = ptr; | |
26 | char **pptr = &buf; | |
27 | ||
28 | readdir->token = rd->token; | |
29 | readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; | |
30 | readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount * | |
31 | sizeof(*readdir->dirent_array), | |
32 | GFP_KERNEL); | |
33 | if (readdir->dirent_array == NULL) | |
34 | return -ENOMEM; | |
35 | *pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array); | |
36 | for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { | |
37 | dec_string(pptr, &readdir->dirent_array[i].d_name, | |
38 | &readdir->dirent_array[i].d_length); | |
39 | readdir->dirent_array[i].khandle = | |
40 | *(struct pvfs2_khandle *) *pptr; | |
41 | *pptr += 16; | |
42 | } | |
43 | return (unsigned long)*pptr - (unsigned long)ptr; | |
44 | } | |
45 | ||
46 | static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, | |
47 | int buffer_index) | |
48 | { | |
49 | long ret; | |
50 | ||
51 | if (buf == NULL) { | |
52 | gossip_err | |
53 | ("Invalid NULL buffer specified in readdir_handle_ctor\n"); | |
54 | return -ENOMEM; | |
55 | } | |
56 | if (buffer_index < 0) { | |
57 | gossip_err | |
58 | ("Invalid buffer index specified in readdir_handle_ctor\n"); | |
59 | return -EINVAL; | |
60 | } | |
61 | rhandle->buffer_index = buffer_index; | |
62 | rhandle->dents_buf = buf; | |
63 | ret = decode_dirents(buf, &rhandle->readdir_response); | |
64 | if (ret < 0) { | |
65 | gossip_err("Could not decode readdir from buffer %ld\n", ret); | |
66 | rhandle->buffer_index = -1; | |
67 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); | |
68 | vfree(buf); | |
69 | rhandle->dents_buf = NULL; | |
70 | } | |
71 | return ret; | |
72 | } | |
73 | ||
74 | static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, | |
75 | struct readdir_handle_s *rhandle) | |
76 | { | |
77 | if (rhandle == NULL) | |
78 | return; | |
79 | ||
80 | /* kfree(NULL) is safe */ | |
81 | kfree(rhandle->readdir_response.dirent_array); | |
82 | rhandle->readdir_response.dirent_array = NULL; | |
83 | ||
84 | if (rhandle->buffer_index >= 0) { | |
85 | readdir_index_put(bufmap, rhandle->buffer_index); | |
86 | rhandle->buffer_index = -1; | |
87 | } | |
88 | if (rhandle->dents_buf) { | |
89 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", | |
90 | rhandle->dents_buf); | |
91 | vfree(rhandle->dents_buf); | |
92 | rhandle->dents_buf = NULL; | |
93 | } | |
94 | } | |
95 | ||
96 | /* | |
97 | * Read directory entries from an instance of an open directory. | |
98 | * | |
99 | * \note This routine was converted for the readdir to iterate change | |
100 | * in "struct file_operations". "converted" mostly amounts to | |
101 | * changing occurrences of "readdir" and "filldir" in the | |
102 | * comments to "iterate" and "dir_emit". Also filldir calls | |
103 | * were changed to dir_emit calls. | |
104 | * | |
105 | * \param dir_emit callback function called for each entry read. | |
106 | * | |
107 | * \retval <0 on error | |
108 | * \retval 0 when directory has been completely traversed | |
109 | * \retval >0 if we don't call dir_emit for all entries | |
110 | * | |
111 | * \note If the dir_emit call-back returns non-zero, then iterate should | |
112 | * assume that it has had enough, and should return as well. | |
113 | */ | |
114 | static int pvfs2_readdir(struct file *file, struct dir_context *ctx) | |
115 | { | |
116 | struct pvfs2_bufmap *bufmap = NULL; | |
117 | int ret = 0; | |
118 | int buffer_index; | |
119 | __u64 *ptoken = file->private_data; | |
120 | __u64 pos = 0; | |
121 | ino_t ino = 0; | |
122 | struct dentry *dentry = file->f_path.dentry; | |
123 | struct pvfs2_kernel_op_s *new_op = NULL; | |
124 | struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode); | |
125 | int buffer_full = 0; | |
126 | struct readdir_handle_s rhandle; | |
127 | int i = 0; | |
128 | int len = 0; | |
129 | ino_t current_ino = 0; | |
130 | char *current_entry = NULL; | |
131 | long bytes_decoded; | |
132 | ||
133 | gossip_ldebug(GOSSIP_DIR_DEBUG, | |
134 | "%s: ctx->pos:%lld, token = %llu\n", | |
135 | __func__, | |
136 | lld(ctx->pos), | |
137 | llu(*ptoken)); | |
138 | ||
139 | pos = (__u64) ctx->pos; | |
140 | ||
141 | /* are we done? */ | |
142 | if (pos == PVFS_READDIR_END) { | |
143 | gossip_debug(GOSSIP_DIR_DEBUG, | |
144 | "Skipping to termination path\n"); | |
145 | return 0; | |
146 | } | |
147 | ||
148 | gossip_debug(GOSSIP_DIR_DEBUG, | |
149 | "pvfs2_readdir called on %s (pos=%llu)\n", | |
150 | dentry->d_name.name, llu(pos)); | |
151 | ||
152 | rhandle.buffer_index = -1; | |
153 | rhandle.dents_buf = NULL; | |
154 | memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); | |
155 | ||
156 | new_op = op_alloc(PVFS2_VFS_OP_READDIR); | |
157 | if (!new_op) | |
158 | return -ENOMEM; | |
159 | ||
160 | new_op->uses_shared_memory = 1; | |
161 | new_op->upcall.req.readdir.refn = pvfs2_inode->refn; | |
162 | new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; | |
163 | ||
164 | gossip_debug(GOSSIP_DIR_DEBUG, | |
165 | "%s: upcall.req.readdir.refn.khandle: %pU\n", | |
166 | __func__, | |
167 | &new_op->upcall.req.readdir.refn.khandle); | |
168 | ||
169 | /* | |
170 | * NOTE: the position we send to the readdir upcall is out of | |
171 | * sync with ctx->pos since: | |
172 | * 1. pvfs2 doesn't include the "." and ".." entries that are | |
173 | * added below. | |
174 | * 2. the introduction of distributed directory logic makes token no | |
175 | * longer be related to f_pos and pos. Instead an independent | |
176 | * variable is used inside the function and stored in the | |
177 | * private_data of the file structure. | |
178 | */ | |
179 | new_op->upcall.req.readdir.token = *ptoken; | |
180 | ||
181 | get_new_buffer_index: | |
182 | ret = readdir_index_get(&bufmap, &buffer_index); | |
183 | if (ret < 0) { | |
184 | gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", | |
185 | ret); | |
186 | goto out_free_op; | |
187 | } | |
188 | new_op->upcall.req.readdir.buf_index = buffer_index; | |
189 | ||
190 | ret = service_operation(new_op, | |
191 | "pvfs2_readdir", | |
192 | get_interruptible_flag(dentry->d_inode)); | |
193 | ||
194 | gossip_debug(GOSSIP_DIR_DEBUG, | |
195 | "Readdir downcall status is %d. ret:%d\n", | |
196 | new_op->downcall.status, | |
197 | ret); | |
198 | ||
199 | if (ret == -EAGAIN && op_state_purged(new_op)) { | |
200 | /* | |
201 | * readdir shared memory aread has been wiped due to | |
202 | * pvfs2-client-core restarting, so we must get a new | |
203 | * index into the shared memory. | |
204 | */ | |
205 | gossip_debug(GOSSIP_DIR_DEBUG, | |
206 | "%s: Getting new buffer_index for retry of readdir..\n", | |
207 | __func__); | |
208 | readdir_index_put(bufmap, buffer_index); | |
209 | goto get_new_buffer_index; | |
210 | } | |
211 | ||
212 | if (ret == -EIO && op_state_purged(new_op)) { | |
213 | gossip_err("%s: Client is down. Aborting readdir call.\n", | |
214 | __func__); | |
215 | readdir_index_put(bufmap, buffer_index); | |
216 | goto out_free_op; | |
217 | } | |
218 | ||
219 | if (ret < 0 || new_op->downcall.status != 0) { | |
220 | gossip_debug(GOSSIP_DIR_DEBUG, | |
221 | "Readdir request failed. Status:%d\n", | |
222 | new_op->downcall.status); | |
223 | readdir_index_put(bufmap, buffer_index); | |
224 | if (ret >= 0) | |
225 | ret = new_op->downcall.status; | |
226 | goto out_free_op; | |
227 | } | |
228 | ||
229 | bytes_decoded = | |
230 | readdir_handle_ctor(&rhandle, | |
231 | new_op->downcall.trailer_buf, | |
232 | buffer_index); | |
233 | if (bytes_decoded < 0) { | |
234 | gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", | |
235 | ret); | |
236 | ret = bytes_decoded; | |
237 | readdir_index_put(bufmap, buffer_index); | |
238 | goto out_free_op; | |
239 | } | |
240 | ||
241 | if (bytes_decoded != new_op->downcall.trailer_size) { | |
242 | gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n", | |
243 | bytes_decoded, | |
244 | (long)new_op->downcall.trailer_size); | |
245 | ret = -EINVAL; | |
246 | goto out_destroy_handle; | |
247 | } | |
248 | ||
249 | if (pos == 0) { | |
250 | ino = get_ino_from_khandle(dentry->d_inode); | |
251 | gossip_debug(GOSSIP_DIR_DEBUG, | |
252 | "%s: calling dir_emit of \".\" with pos = %llu\n", | |
253 | __func__, | |
254 | llu(pos)); | |
255 | ret = dir_emit(ctx, ".", 1, ino, DT_DIR); | |
256 | if (ret < 0) | |
257 | goto out_destroy_handle; | |
258 | ctx->pos++; | |
259 | gossip_ldebug(GOSSIP_DIR_DEBUG, | |
260 | "%s: ctx->pos:%lld\n", | |
261 | __func__, | |
262 | lld(ctx->pos)); | |
263 | pos++; | |
264 | } | |
265 | ||
266 | if (pos == 1) { | |
267 | ino = get_parent_ino_from_dentry(dentry); | |
268 | gossip_debug(GOSSIP_DIR_DEBUG, | |
269 | "%s: calling dir_emit of \"..\" with pos = %llu\n", | |
270 | __func__, | |
271 | llu(pos)); | |
272 | ret = dir_emit(ctx, "..", 2, ino, DT_DIR); | |
273 | if (ret < 0) | |
274 | goto out_destroy_handle; | |
275 | ctx->pos++; | |
276 | gossip_ldebug(GOSSIP_DIR_DEBUG, | |
277 | "%s: ctx->pos:%lld\n", | |
278 | __func__, | |
279 | lld(ctx->pos)); | |
280 | pos++; | |
281 | } | |
282 | ||
283 | for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) { | |
284 | len = rhandle.readdir_response.dirent_array[i].d_length; | |
285 | current_entry = rhandle.readdir_response.dirent_array[i].d_name; | |
286 | current_ino = pvfs2_khandle_to_ino( | |
287 | &(rhandle.readdir_response.dirent_array[i].khandle)); | |
288 | ||
289 | gossip_debug(GOSSIP_DIR_DEBUG, | |
290 | "calling dir_emit for %s with len %d, pos %ld\n", | |
291 | current_entry, | |
292 | len, | |
293 | (unsigned long)pos); | |
294 | ret = | |
295 | dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); | |
296 | if (ret < 0) { | |
297 | gossip_debug(GOSSIP_DIR_DEBUG, | |
298 | "dir_emit() failed. ret:%d\n", | |
299 | ret); | |
300 | if (i < 2) { | |
301 | gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n"); | |
302 | gossip_err("Duplicate entries may appear.\n"); | |
303 | } | |
304 | buffer_full = 1; | |
305 | break; | |
306 | } | |
307 | ctx->pos++; | |
308 | gossip_ldebug(GOSSIP_DIR_DEBUG, | |
309 | "%s: ctx->pos:%lld\n", | |
310 | __func__, | |
311 | lld(ctx->pos)); | |
312 | ||
313 | pos++; | |
314 | } | |
315 | ||
316 | /* this means that all of the dir_emit calls succeeded */ | |
317 | if (i == rhandle.readdir_response.pvfs_dirent_outcount) { | |
318 | /* update token */ | |
319 | *ptoken = rhandle.readdir_response.token; | |
320 | } else { | |
321 | /* this means a dir_emit call failed */ | |
322 | if (rhandle.readdir_response.token == PVFS_READDIR_END) { | |
323 | /* | |
324 | * If PVFS hit end of directory, then there | |
325 | * is no way to do math on the token that it | |
326 | * returned. Instead we go by ctx->pos but | |
327 | * back up to account for the artificial . | |
328 | * and .. entries. | |
329 | */ | |
330 | ctx->pos -= 3; | |
331 | } else { | |
332 | /* | |
333 | * this means a dir_emit call failed. !!! need to set | |
334 | * back to previous ctx->pos, no middle value allowed | |
335 | */ | |
336 | pos -= (i - 1); | |
337 | ctx->pos -= (i - 1); | |
338 | } | |
339 | gossip_debug(GOSSIP_DIR_DEBUG, | |
340 | "at least one dir_emit call failed. Setting ctx->pos to: %lld\n", | |
341 | lld(ctx->pos)); | |
342 | } | |
343 | ||
344 | /* | |
345 | * Did we hit the end of the directory? | |
346 | */ | |
347 | if (rhandle.readdir_response.token == PVFS_READDIR_END && | |
348 | !buffer_full) { | |
349 | gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); | |
350 | ctx->pos = PVFS_READDIR_END; | |
351 | } | |
352 | ||
353 | gossip_debug(GOSSIP_DIR_DEBUG, | |
354 | "pos = %llu, token = %llu" | |
355 | ", ctx->pos should have been %lld\n", | |
356 | llu(pos), | |
357 | llu(*ptoken), | |
358 | lld(ctx->pos)); | |
359 | ||
360 | out_destroy_handle: | |
361 | readdir_handle_dtor(bufmap, &rhandle); | |
362 | out_free_op: | |
363 | op_release(new_op); | |
364 | gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret); | |
365 | return ret; | |
366 | } | |
367 | ||
368 | static int pvfs2_dir_open(struct inode *inode, struct file *file) | |
369 | { | |
370 | __u64 *ptoken; | |
371 | ||
372 | file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); | |
373 | if (!file->private_data) | |
374 | return -ENOMEM; | |
375 | ||
376 | ptoken = file->private_data; | |
377 | *ptoken = PVFS_READDIR_START; | |
378 | return 0; | |
379 | } | |
380 | ||
381 | static int pvfs2_dir_release(struct inode *inode, struct file *file) | |
382 | { | |
383 | pvfs2_flush_inode(inode); | |
384 | kfree(file->private_data); | |
385 | return 0; | |
386 | } | |
387 | ||
388 | /** PVFS2 implementation of VFS directory operations */ | |
389 | const struct file_operations pvfs2_dir_operations = { | |
390 | .read = generic_read_dir, | |
391 | .iterate = pvfs2_readdir, | |
392 | .open = pvfs2_dir_open, | |
393 | .release = pvfs2_dir_release, | |
394 | }; |