Commit | Line | Data |
---|---|---|
5db11c21 MM |
1 | /* |
2 | * (C) 2001 Clemson University and The University of Chicago | |
3 | * | |
4 | * See COPYING in top-level directory. | |
5 | */ | |
6 | ||
7 | #include "protocol.h" | |
575e9461 MM |
8 | #include "orangefs-kernel.h" |
9 | #include "orangefs-bufmap.h" | |
5db11c21 MM |
10 | |
11 | struct readdir_handle_s { | |
8bb8aefd | 12 | struct orangefs_readdir_response_s readdir_response; |
5db11c21 MM |
13 | void *dents_buf; |
14 | }; | |
15 | ||
16 | /* | |
1808f8cc MM |
17 | * decode routine used by kmod to deal with the blob sent from |
18 | * userspace for readdirs. The blob contains zero or more of these | |
19 | * sub-blobs: | |
20 | * __u32 - represents length of the character string that follows. | |
21 | * string - between 1 and ORANGEFS_NAME_MAX bytes long. | |
22 | * padding - (if needed) to cause the __u32 plus the string to be | |
23 | * eight byte aligned. | |
24 | * khandle - sizeof(khandle) bytes. | |
5db11c21 | 25 | */ |
8092895f | 26 | static long decode_dirents(char *ptr, size_t size, |
8bb8aefd | 27 | struct orangefs_readdir_response_s *readdir) |
5db11c21 MM |
28 | { |
29 | int i; | |
8bb8aefd YL |
30 | struct orangefs_readdir_response_s *rd = |
31 | (struct orangefs_readdir_response_s *) ptr; | |
5db11c21 | 32 | char *buf = ptr; |
1808f8cc MM |
33 | int khandle_size = sizeof(struct orangefs_khandle); |
34 | size_t offset = offsetof(struct orangefs_readdir_response_s, | |
35 | dirent_array); | |
36 | /* 8 reflects eight byte alignment */ | |
37 | int smallest_blob = khandle_size + 8; | |
38 | __u32 len; | |
39 | int aligned_len; | |
40 | int sizeof_u32 = sizeof(__u32); | |
41 | long ret; | |
5db11c21 | 42 | |
1808f8cc MM |
43 | gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size); |
44 | ||
45 | /* size is = offset on empty dirs, > offset on non-empty dirs... */ | |
46 | if (size < offset) { | |
47 | gossip_err("%s: size:%zu: offset:%zu:\n", | |
48 | __func__, | |
49 | size, | |
50 | offset); | |
51 | ret = -EINVAL; | |
52 | goto out; | |
53 | } | |
54 | ||
55 | if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) { | |
56 | gossip_err("%s: size:%zu: dirent_outcount:%d:\n", | |
57 | __func__, | |
58 | size, | |
59 | readdir->orangefs_dirent_outcount); | |
60 | ret = -EINVAL; | |
61 | goto out; | |
62 | } | |
8092895f | 63 | |
5db11c21 | 64 | readdir->token = rd->token; |
8bb8aefd YL |
65 | readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount; |
66 | readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount, | |
5db11c21 MM |
67 | sizeof(*readdir->dirent_array), |
68 | GFP_KERNEL); | |
1808f8cc MM |
69 | if (readdir->dirent_array == NULL) { |
70 | gossip_err("%s: kcalloc failed.\n", __func__); | |
71 | ret = -ENOMEM; | |
72 | goto out; | |
73 | } | |
8092895f | 74 | |
1808f8cc MM |
75 | buf += offset; |
76 | size -= offset; | |
8092895f | 77 | |
8bb8aefd | 78 | for (i = 0; i < readdir->orangefs_dirent_outcount; i++) { |
1808f8cc MM |
79 | if (size < smallest_blob) { |
80 | gossip_err("%s: size:%zu: smallest_blob:%d:\n", | |
81 | __func__, | |
82 | size, | |
83 | smallest_blob); | |
84 | ret = -EINVAL; | |
85 | goto free; | |
86 | } | |
8092895f AV |
87 | |
88 | len = *(__u32 *)buf; | |
1808f8cc MM |
89 | if ((len < 1) || (len > ORANGEFS_NAME_MAX)) { |
90 | gossip_err("%s: len:%d:\n", __func__, len); | |
91 | ret = -EINVAL; | |
92 | goto free; | |
93 | } | |
94 | ||
95 | gossip_debug(GOSSIP_DIR_DEBUG, | |
96 | "%s: size:%zu: len:%d:\n", | |
97 | __func__, | |
98 | size, | |
99 | len); | |
100 | ||
101 | readdir->dirent_array[i].d_name = buf + sizeof_u32; | |
9be68b08 | 102 | readdir->dirent_array[i].d_length = len; |
8092895f | 103 | |
7d221485 | 104 | /* |
1808f8cc MM |
105 | * Calculate "aligned" length of this string and its |
106 | * associated __u32 descriptor. | |
107 | */ | |
108 | aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7; | |
109 | gossip_debug(GOSSIP_DIR_DEBUG, | |
110 | "%s: aligned_len:%d:\n", | |
111 | __func__, | |
112 | aligned_len); | |
113 | ||
114 | /* | |
115 | * The end of the blob should coincide with the end | |
116 | * of the last sub-blob. | |
7d221485 | 117 | */ |
1808f8cc MM |
118 | if (size < aligned_len + khandle_size) { |
119 | gossip_err("%s: ran off the end of the blob.\n", | |
120 | __func__); | |
121 | ret = -EINVAL; | |
122 | goto free; | |
123 | } | |
124 | size -= aligned_len + khandle_size; | |
8092895f | 125 | |
1808f8cc | 126 | buf += aligned_len; |
8092895f | 127 | |
5db11c21 | 128 | readdir->dirent_array[i].khandle = |
8bb8aefd | 129 | *(struct orangefs_khandle *) buf; |
1808f8cc | 130 | buf += khandle_size; |
5db11c21 | 131 | } |
1808f8cc MM |
132 | ret = buf - ptr; |
133 | gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret); | |
134 | goto out; | |
135 | ||
136 | free: | |
8092895f AV |
137 | kfree(readdir->dirent_array); |
138 | readdir->dirent_array = NULL; | |
1808f8cc MM |
139 | |
140 | out: | |
141 | return ret; | |
5db11c21 MM |
142 | } |
143 | ||
144 | static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, | |
ee3b8d37 | 145 | size_t size) |
5db11c21 MM |
146 | { |
147 | long ret; | |
148 | ||
149 | if (buf == NULL) { | |
150 | gossip_err | |
151 | ("Invalid NULL buffer specified in readdir_handle_ctor\n"); | |
152 | return -ENOMEM; | |
153 | } | |
5db11c21 | 154 | rhandle->dents_buf = buf; |
8092895f | 155 | ret = decode_dirents(buf, size, &rhandle->readdir_response); |
5db11c21 MM |
156 | if (ret < 0) { |
157 | gossip_err("Could not decode readdir from buffer %ld\n", ret); | |
5db11c21 MM |
158 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); |
159 | vfree(buf); | |
160 | rhandle->dents_buf = NULL; | |
161 | } | |
162 | return ret; | |
163 | } | |
164 | ||
82d37f19 | 165 | static void readdir_handle_dtor(struct readdir_handle_s *rhandle) |
5db11c21 MM |
166 | { |
167 | if (rhandle == NULL) | |
168 | return; | |
169 | ||
170 | /* kfree(NULL) is safe */ | |
171 | kfree(rhandle->readdir_response.dirent_array); | |
172 | rhandle->readdir_response.dirent_array = NULL; | |
173 | ||
5db11c21 MM |
174 | if (rhandle->dents_buf) { |
175 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", | |
176 | rhandle->dents_buf); | |
177 | vfree(rhandle->dents_buf); | |
178 | rhandle->dents_buf = NULL; | |
179 | } | |
180 | } | |
181 | ||
182 | /* | |
183 | * Read directory entries from an instance of an open directory. | |
5db11c21 | 184 | */ |
8bb8aefd | 185 | static int orangefs_readdir(struct file *file, struct dir_context *ctx) |
5db11c21 | 186 | { |
8bb8aefd | 187 | struct orangefs_bufmap *bufmap = NULL; |
5db11c21 MM |
188 | int ret = 0; |
189 | int buffer_index; | |
88309aae MM |
190 | /* |
191 | * ptoken supports Orangefs' distributed directory logic, added | |
192 | * in 2.9.2. | |
193 | */ | |
5db11c21 MM |
194 | __u64 *ptoken = file->private_data; |
195 | __u64 pos = 0; | |
196 | ino_t ino = 0; | |
197 | struct dentry *dentry = file->f_path.dentry; | |
8bb8aefd YL |
198 | struct orangefs_kernel_op_s *new_op = NULL; |
199 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode); | |
5db11c21 MM |
200 | int buffer_full = 0; |
201 | struct readdir_handle_s rhandle; | |
202 | int i = 0; | |
203 | int len = 0; | |
204 | ino_t current_ino = 0; | |
205 | char *current_entry = NULL; | |
206 | long bytes_decoded; | |
207 | ||
88309aae MM |
208 | gossip_debug(GOSSIP_DIR_DEBUG, |
209 | "%s: ctx->pos:%lld, ptoken = %llu\n", | |
210 | __func__, | |
211 | lld(ctx->pos), | |
212 | llu(*ptoken)); | |
5db11c21 MM |
213 | |
214 | pos = (__u64) ctx->pos; | |
215 | ||
216 | /* are we done? */ | |
8bb8aefd | 217 | if (pos == ORANGEFS_READDIR_END) { |
5db11c21 MM |
218 | gossip_debug(GOSSIP_DIR_DEBUG, |
219 | "Skipping to termination path\n"); | |
220 | return 0; | |
221 | } | |
222 | ||
223 | gossip_debug(GOSSIP_DIR_DEBUG, | |
8bb8aefd | 224 | "orangefs_readdir called on %s (pos=%llu)\n", |
5db11c21 MM |
225 | dentry->d_name.name, llu(pos)); |
226 | ||
5db11c21 MM |
227 | rhandle.dents_buf = NULL; |
228 | memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); | |
229 | ||
8bb8aefd | 230 | new_op = op_alloc(ORANGEFS_VFS_OP_READDIR); |
5db11c21 MM |
231 | if (!new_op) |
232 | return -ENOMEM; | |
233 | ||
ee3b8d37 MB |
234 | /* |
235 | * Only the indices are shared. No memory is actually shared, but the | |
236 | * mechanism is used. | |
237 | */ | |
5db11c21 | 238 | new_op->uses_shared_memory = 1; |
8bb8aefd | 239 | new_op->upcall.req.readdir.refn = orangefs_inode->refn; |
7d221485 MB |
240 | new_op->upcall.req.readdir.max_dirent_count = |
241 | ORANGEFS_MAX_DIRENT_COUNT_READDIR; | |
5db11c21 MM |
242 | |
243 | gossip_debug(GOSSIP_DIR_DEBUG, | |
244 | "%s: upcall.req.readdir.refn.khandle: %pU\n", | |
245 | __func__, | |
246 | &new_op->upcall.req.readdir.refn.khandle); | |
247 | ||
5db11c21 MM |
248 | new_op->upcall.req.readdir.token = *ptoken; |
249 | ||
250 | get_new_buffer_index: | |
7d221485 | 251 | ret = orangefs_readdir_index_get(&bufmap, &buffer_index); |
5db11c21 | 252 | if (ret < 0) { |
7d221485 | 253 | gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n", |
5db11c21 MM |
254 | ret); |
255 | goto out_free_op; | |
256 | } | |
257 | new_op->upcall.req.readdir.buf_index = buffer_index; | |
258 | ||
259 | ret = service_operation(new_op, | |
8bb8aefd | 260 | "orangefs_readdir", |
5db11c21 MM |
261 | get_interruptible_flag(dentry->d_inode)); |
262 | ||
263 | gossip_debug(GOSSIP_DIR_DEBUG, | |
264 | "Readdir downcall status is %d. ret:%d\n", | |
265 | new_op->downcall.status, | |
266 | ret); | |
267 | ||
ee3b8d37 MB |
268 | orangefs_readdir_index_put(buffer_index); |
269 | ||
5db11c21 | 270 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
ee3b8d37 | 271 | /* Client-core indices are invalid after it restarted. */ |
5db11c21 MM |
272 | gossip_debug(GOSSIP_DIR_DEBUG, |
273 | "%s: Getting new buffer_index for retry of readdir..\n", | |
274 | __func__); | |
5db11c21 MM |
275 | goto get_new_buffer_index; |
276 | } | |
277 | ||
278 | if (ret == -EIO && op_state_purged(new_op)) { | |
279 | gossip_err("%s: Client is down. Aborting readdir call.\n", | |
280 | __func__); | |
5db11c21 MM |
281 | goto out_free_op; |
282 | } | |
283 | ||
284 | if (ret < 0 || new_op->downcall.status != 0) { | |
285 | gossip_debug(GOSSIP_DIR_DEBUG, | |
286 | "Readdir request failed. Status:%d\n", | |
287 | new_op->downcall.status); | |
5db11c21 MM |
288 | if (ret >= 0) |
289 | ret = new_op->downcall.status; | |
290 | goto out_free_op; | |
291 | } | |
292 | ||
293 | bytes_decoded = | |
294 | readdir_handle_ctor(&rhandle, | |
295 | new_op->downcall.trailer_buf, | |
ee3b8d37 | 296 | new_op->downcall.trailer_size); |
5db11c21 | 297 | if (bytes_decoded < 0) { |
8bb8aefd | 298 | gossip_err("orangefs_readdir: Could not decode trailer buffer into a readdir response %d\n", |
5db11c21 MM |
299 | ret); |
300 | ret = bytes_decoded; | |
5db11c21 MM |
301 | goto out_free_op; |
302 | } | |
303 | ||
304 | if (bytes_decoded != new_op->downcall.trailer_size) { | |
8bb8aefd | 305 | gossip_err("orangefs_readdir: # bytes decoded (%ld) " |
88309aae MM |
306 | "!= trailer size (%ld)\n", |
307 | bytes_decoded, | |
308 | (long)new_op->downcall.trailer_size); | |
5db11c21 MM |
309 | ret = -EINVAL; |
310 | goto out_destroy_handle; | |
311 | } | |
312 | ||
88309aae | 313 | /* |
8bb8aefd | 314 | * orangefs doesn't actually store dot and dot-dot, but |
88309aae MM |
315 | * we need to have them represented. |
316 | */ | |
5db11c21 MM |
317 | if (pos == 0) { |
318 | ino = get_ino_from_khandle(dentry->d_inode); | |
319 | gossip_debug(GOSSIP_DIR_DEBUG, | |
320 | "%s: calling dir_emit of \".\" with pos = %llu\n", | |
321 | __func__, | |
322 | llu(pos)); | |
323 | ret = dir_emit(ctx, ".", 1, ino, DT_DIR); | |
88309aae | 324 | pos += 1; |
5db11c21 MM |
325 | } |
326 | ||
327 | if (pos == 1) { | |
328 | ino = get_parent_ino_from_dentry(dentry); | |
329 | gossip_debug(GOSSIP_DIR_DEBUG, | |
330 | "%s: calling dir_emit of \"..\" with pos = %llu\n", | |
331 | __func__, | |
332 | llu(pos)); | |
333 | ret = dir_emit(ctx, "..", 2, ino, DT_DIR); | |
88309aae | 334 | pos += 1; |
5db11c21 MM |
335 | } |
336 | ||
88309aae | 337 | /* |
8bb8aefd | 338 | * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around |
88309aae MM |
339 | * to prevent "finding" dot and dot-dot on any iteration |
340 | * other than the first. | |
341 | */ | |
8bb8aefd | 342 | if (ctx->pos == ORANGEFS_ITERATE_NEXT) |
88309aae MM |
343 | ctx->pos = 0; |
344 | ||
cf07c0bf MM |
345 | gossip_debug(GOSSIP_DIR_DEBUG, |
346 | "%s: dirent_outcount:%d:\n", | |
347 | __func__, | |
348 | rhandle.readdir_response.orangefs_dirent_outcount); | |
88309aae | 349 | for (i = ctx->pos; |
8bb8aefd | 350 | i < rhandle.readdir_response.orangefs_dirent_outcount; |
88309aae | 351 | i++) { |
5db11c21 MM |
352 | len = rhandle.readdir_response.dirent_array[i].d_length; |
353 | current_entry = rhandle.readdir_response.dirent_array[i].d_name; | |
8bb8aefd | 354 | current_ino = orangefs_khandle_to_ino( |
5db11c21 MM |
355 | &(rhandle.readdir_response.dirent_array[i].khandle)); |
356 | ||
357 | gossip_debug(GOSSIP_DIR_DEBUG, | |
88309aae MM |
358 | "calling dir_emit for %s with len %d" |
359 | ", ctx->pos %ld\n", | |
5db11c21 MM |
360 | current_entry, |
361 | len, | |
88309aae MM |
362 | (unsigned long)ctx->pos); |
363 | /* | |
364 | * type is unknown. We don't return object type | |
365 | * in the dirent_array. This leaves getdents | |
366 | * clueless about type. | |
367 | */ | |
5db11c21 MM |
368 | ret = |
369 | dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); | |
88309aae MM |
370 | if (!ret) |
371 | break; | |
5db11c21 | 372 | ctx->pos++; |
88309aae | 373 | gossip_debug(GOSSIP_DIR_DEBUG, |
5db11c21 MM |
374 | "%s: ctx->pos:%lld\n", |
375 | __func__, | |
376 | lld(ctx->pos)); | |
377 | ||
5db11c21 MM |
378 | } |
379 | ||
54804949 | 380 | /* |
88309aae MM |
381 | * we ran all the way through the last batch, set up for |
382 | * getting another batch... | |
383 | */ | |
384 | if (ret) { | |
5db11c21 | 385 | *ptoken = rhandle.readdir_response.token; |
8bb8aefd | 386 | ctx->pos = ORANGEFS_ITERATE_NEXT; |
5db11c21 MM |
387 | } |
388 | ||
389 | /* | |
390 | * Did we hit the end of the directory? | |
391 | */ | |
8bb8aefd | 392 | if (rhandle.readdir_response.token == ORANGEFS_READDIR_END && |
5db11c21 | 393 | !buffer_full) { |
88309aae | 394 | gossip_debug(GOSSIP_DIR_DEBUG, |
8bb8aefd YL |
395 | "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n"); |
396 | ctx->pos = ORANGEFS_READDIR_END; | |
5db11c21 MM |
397 | } |
398 | ||
5db11c21 | 399 | out_destroy_handle: |
82d37f19 | 400 | readdir_handle_dtor(&rhandle); |
5db11c21 MM |
401 | out_free_op: |
402 | op_release(new_op); | |
8bb8aefd | 403 | gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret); |
5db11c21 MM |
404 | return ret; |
405 | } | |
406 | ||
8bb8aefd | 407 | static int orangefs_dir_open(struct inode *inode, struct file *file) |
5db11c21 MM |
408 | { |
409 | __u64 *ptoken; | |
410 | ||
411 | file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); | |
412 | if (!file->private_data) | |
413 | return -ENOMEM; | |
414 | ||
415 | ptoken = file->private_data; | |
8bb8aefd | 416 | *ptoken = ORANGEFS_READDIR_START; |
5db11c21 MM |
417 | return 0; |
418 | } | |
419 | ||
8bb8aefd | 420 | static int orangefs_dir_release(struct inode *inode, struct file *file) |
5db11c21 | 421 | { |
8bb8aefd | 422 | orangefs_flush_inode(inode); |
5db11c21 MM |
423 | kfree(file->private_data); |
424 | return 0; | |
425 | } | |
426 | ||
8bb8aefd YL |
427 | /** ORANGEFS implementation of VFS directory operations */ |
428 | const struct file_operations orangefs_dir_operations = { | |
5db11c21 | 429 | .read = generic_read_dir, |
8bb8aefd YL |
430 | .iterate = orangefs_readdir, |
431 | .open = orangefs_dir_open, | |
432 | .release = orangefs_dir_release, | |
5db11c21 | 433 | }; |