| 1 | /* |
| 2 | * (C) 2001 Clemson University and The University of Chicago |
| 3 | * |
| 4 | * See COPYING in top-level directory. |
| 5 | */ |
| 6 | |
| 7 | #include "protocol.h" |
| 8 | #include "pvfs2-kernel.h" |
| 9 | #include "pvfs2-bufmap.h" |
| 10 | |
| 11 | struct readdir_handle_s { |
| 12 | int buffer_index; |
| 13 | struct pvfs2_readdir_response_s readdir_response; |
| 14 | void *dents_buf; |
| 15 | }; |
| 16 | |
| 17 | /* |
| 18 | * decode routine needed by kmod to make sense of the shared page for readdirs. |
| 19 | */ |
| 20 | static long decode_dirents(char *ptr, struct pvfs2_readdir_response_s *readdir) |
| 21 | { |
| 22 | int i; |
| 23 | struct pvfs2_readdir_response_s *rd = |
| 24 | (struct pvfs2_readdir_response_s *) ptr; |
| 25 | char *buf = ptr; |
| 26 | char **pptr = &buf; |
| 27 | |
| 28 | readdir->token = rd->token; |
| 29 | readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; |
| 30 | readdir->dirent_array = kmalloc(readdir->pvfs_dirent_outcount * |
| 31 | sizeof(*readdir->dirent_array), |
| 32 | GFP_KERNEL); |
| 33 | if (readdir->dirent_array == NULL) |
| 34 | return -ENOMEM; |
| 35 | *pptr += offsetof(struct pvfs2_readdir_response_s, dirent_array); |
| 36 | for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { |
| 37 | dec_string(pptr, &readdir->dirent_array[i].d_name, |
| 38 | &readdir->dirent_array[i].d_length); |
| 39 | readdir->dirent_array[i].khandle = |
| 40 | *(struct pvfs2_khandle *) *pptr; |
| 41 | *pptr += 16; |
| 42 | } |
| 43 | return (unsigned long)*pptr - (unsigned long)ptr; |
| 44 | } |
| 45 | |
| 46 | static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, |
| 47 | int buffer_index) |
| 48 | { |
| 49 | long ret; |
| 50 | |
| 51 | if (buf == NULL) { |
| 52 | gossip_err |
| 53 | ("Invalid NULL buffer specified in readdir_handle_ctor\n"); |
| 54 | return -ENOMEM; |
| 55 | } |
| 56 | if (buffer_index < 0) { |
| 57 | gossip_err |
| 58 | ("Invalid buffer index specified in readdir_handle_ctor\n"); |
| 59 | return -EINVAL; |
| 60 | } |
| 61 | rhandle->buffer_index = buffer_index; |
| 62 | rhandle->dents_buf = buf; |
| 63 | ret = decode_dirents(buf, &rhandle->readdir_response); |
| 64 | if (ret < 0) { |
| 65 | gossip_err("Could not decode readdir from buffer %ld\n", ret); |
| 66 | rhandle->buffer_index = -1; |
| 67 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); |
| 68 | vfree(buf); |
| 69 | rhandle->dents_buf = NULL; |
| 70 | } |
| 71 | return ret; |
| 72 | } |
| 73 | |
| 74 | static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, |
| 75 | struct readdir_handle_s *rhandle) |
| 76 | { |
| 77 | if (rhandle == NULL) |
| 78 | return; |
| 79 | |
| 80 | /* kfree(NULL) is safe */ |
| 81 | kfree(rhandle->readdir_response.dirent_array); |
| 82 | rhandle->readdir_response.dirent_array = NULL; |
| 83 | |
| 84 | if (rhandle->buffer_index >= 0) { |
| 85 | readdir_index_put(bufmap, rhandle->buffer_index); |
| 86 | rhandle->buffer_index = -1; |
| 87 | } |
| 88 | if (rhandle->dents_buf) { |
| 89 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", |
| 90 | rhandle->dents_buf); |
| 91 | vfree(rhandle->dents_buf); |
| 92 | rhandle->dents_buf = NULL; |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /* |
| 97 | * Read directory entries from an instance of an open directory. |
| 98 | * |
| 99 | * \note This routine was converted for the readdir to iterate change |
| 100 | * in "struct file_operations". "converted" mostly amounts to |
| 101 | * changing occurrences of "readdir" and "filldir" in the |
| 102 | * comments to "iterate" and "dir_emit". Also filldir calls |
| 103 | * were changed to dir_emit calls. |
| 104 | * |
| 105 | * \param dir_emit callback function called for each entry read. |
| 106 | * |
| 107 | * \retval <0 on error |
| 108 | * \retval 0 when directory has been completely traversed |
| 109 | * \retval >0 if we don't call dir_emit for all entries |
| 110 | * |
| 111 | * \note If the dir_emit call-back returns non-zero, then iterate should |
| 112 | * assume that it has had enough, and should return as well. |
| 113 | */ |
| 114 | static int pvfs2_readdir(struct file *file, struct dir_context *ctx) |
| 115 | { |
| 116 | struct pvfs2_bufmap *bufmap = NULL; |
| 117 | int ret = 0; |
| 118 | int buffer_index; |
| 119 | __u64 *ptoken = file->private_data; |
| 120 | __u64 pos = 0; |
| 121 | ino_t ino = 0; |
| 122 | struct dentry *dentry = file->f_path.dentry; |
| 123 | struct pvfs2_kernel_op_s *new_op = NULL; |
| 124 | struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode); |
| 125 | int buffer_full = 0; |
| 126 | struct readdir_handle_s rhandle; |
| 127 | int i = 0; |
| 128 | int len = 0; |
| 129 | ino_t current_ino = 0; |
| 130 | char *current_entry = NULL; |
| 131 | long bytes_decoded; |
| 132 | |
| 133 | gossip_ldebug(GOSSIP_DIR_DEBUG, |
| 134 | "%s: ctx->pos:%lld, token = %llu\n", |
| 135 | __func__, |
| 136 | lld(ctx->pos), |
| 137 | llu(*ptoken)); |
| 138 | |
| 139 | pos = (__u64) ctx->pos; |
| 140 | |
| 141 | /* are we done? */ |
| 142 | if (pos == PVFS_READDIR_END) { |
| 143 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 144 | "Skipping to termination path\n"); |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 149 | "pvfs2_readdir called on %s (pos=%llu)\n", |
| 150 | dentry->d_name.name, llu(pos)); |
| 151 | |
| 152 | rhandle.buffer_index = -1; |
| 153 | rhandle.dents_buf = NULL; |
| 154 | memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); |
| 155 | |
| 156 | new_op = op_alloc(PVFS2_VFS_OP_READDIR); |
| 157 | if (!new_op) |
| 158 | return -ENOMEM; |
| 159 | |
| 160 | new_op->uses_shared_memory = 1; |
| 161 | new_op->upcall.req.readdir.refn = pvfs2_inode->refn; |
| 162 | new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; |
| 163 | |
| 164 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 165 | "%s: upcall.req.readdir.refn.khandle: %pU\n", |
| 166 | __func__, |
| 167 | &new_op->upcall.req.readdir.refn.khandle); |
| 168 | |
| 169 | /* |
| 170 | * NOTE: the position we send to the readdir upcall is out of |
| 171 | * sync with ctx->pos since: |
| 172 | * 1. pvfs2 doesn't include the "." and ".." entries that are |
| 173 | * added below. |
| 174 | * 2. the introduction of distributed directory logic makes token no |
| 175 | * longer be related to f_pos and pos. Instead an independent |
| 176 | * variable is used inside the function and stored in the |
| 177 | * private_data of the file structure. |
| 178 | */ |
| 179 | new_op->upcall.req.readdir.token = *ptoken; |
| 180 | |
| 181 | get_new_buffer_index: |
| 182 | ret = readdir_index_get(&bufmap, &buffer_index); |
| 183 | if (ret < 0) { |
| 184 | gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", |
| 185 | ret); |
| 186 | goto out_free_op; |
| 187 | } |
| 188 | new_op->upcall.req.readdir.buf_index = buffer_index; |
| 189 | |
| 190 | ret = service_operation(new_op, |
| 191 | "pvfs2_readdir", |
| 192 | get_interruptible_flag(dentry->d_inode)); |
| 193 | |
| 194 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 195 | "Readdir downcall status is %d. ret:%d\n", |
| 196 | new_op->downcall.status, |
| 197 | ret); |
| 198 | |
| 199 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
| 200 | /* |
| 201 | * readdir shared memory aread has been wiped due to |
| 202 | * pvfs2-client-core restarting, so we must get a new |
| 203 | * index into the shared memory. |
| 204 | */ |
| 205 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 206 | "%s: Getting new buffer_index for retry of readdir..\n", |
| 207 | __func__); |
| 208 | readdir_index_put(bufmap, buffer_index); |
| 209 | goto get_new_buffer_index; |
| 210 | } |
| 211 | |
| 212 | if (ret == -EIO && op_state_purged(new_op)) { |
| 213 | gossip_err("%s: Client is down. Aborting readdir call.\n", |
| 214 | __func__); |
| 215 | readdir_index_put(bufmap, buffer_index); |
| 216 | goto out_free_op; |
| 217 | } |
| 218 | |
| 219 | if (ret < 0 || new_op->downcall.status != 0) { |
| 220 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 221 | "Readdir request failed. Status:%d\n", |
| 222 | new_op->downcall.status); |
| 223 | readdir_index_put(bufmap, buffer_index); |
| 224 | if (ret >= 0) |
| 225 | ret = new_op->downcall.status; |
| 226 | goto out_free_op; |
| 227 | } |
| 228 | |
| 229 | bytes_decoded = |
| 230 | readdir_handle_ctor(&rhandle, |
| 231 | new_op->downcall.trailer_buf, |
| 232 | buffer_index); |
| 233 | if (bytes_decoded < 0) { |
| 234 | gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", |
| 235 | ret); |
| 236 | ret = bytes_decoded; |
| 237 | readdir_index_put(bufmap, buffer_index); |
| 238 | goto out_free_op; |
| 239 | } |
| 240 | |
| 241 | if (bytes_decoded != new_op->downcall.trailer_size) { |
| 242 | gossip_err("pvfs2_readdir: # bytes decoded (%ld) != trailer size (%ld)\n", |
| 243 | bytes_decoded, |
| 244 | (long)new_op->downcall.trailer_size); |
| 245 | ret = -EINVAL; |
| 246 | goto out_destroy_handle; |
| 247 | } |
| 248 | |
| 249 | if (pos == 0) { |
| 250 | ino = get_ino_from_khandle(dentry->d_inode); |
| 251 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 252 | "%s: calling dir_emit of \".\" with pos = %llu\n", |
| 253 | __func__, |
| 254 | llu(pos)); |
| 255 | ret = dir_emit(ctx, ".", 1, ino, DT_DIR); |
| 256 | if (ret < 0) |
| 257 | goto out_destroy_handle; |
| 258 | ctx->pos++; |
| 259 | gossip_ldebug(GOSSIP_DIR_DEBUG, |
| 260 | "%s: ctx->pos:%lld\n", |
| 261 | __func__, |
| 262 | lld(ctx->pos)); |
| 263 | pos++; |
| 264 | } |
| 265 | |
| 266 | if (pos == 1) { |
| 267 | ino = get_parent_ino_from_dentry(dentry); |
| 268 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 269 | "%s: calling dir_emit of \"..\" with pos = %llu\n", |
| 270 | __func__, |
| 271 | llu(pos)); |
| 272 | ret = dir_emit(ctx, "..", 2, ino, DT_DIR); |
| 273 | if (ret < 0) |
| 274 | goto out_destroy_handle; |
| 275 | ctx->pos++; |
| 276 | gossip_ldebug(GOSSIP_DIR_DEBUG, |
| 277 | "%s: ctx->pos:%lld\n", |
| 278 | __func__, |
| 279 | lld(ctx->pos)); |
| 280 | pos++; |
| 281 | } |
| 282 | |
| 283 | for (i = 0; i < rhandle.readdir_response.pvfs_dirent_outcount; i++) { |
| 284 | len = rhandle.readdir_response.dirent_array[i].d_length; |
| 285 | current_entry = rhandle.readdir_response.dirent_array[i].d_name; |
| 286 | current_ino = pvfs2_khandle_to_ino( |
| 287 | &(rhandle.readdir_response.dirent_array[i].khandle)); |
| 288 | |
| 289 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 290 | "calling dir_emit for %s with len %d, pos %ld\n", |
| 291 | current_entry, |
| 292 | len, |
| 293 | (unsigned long)pos); |
| 294 | ret = |
| 295 | dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); |
| 296 | if (ret < 0) { |
| 297 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 298 | "dir_emit() failed. ret:%d\n", |
| 299 | ret); |
| 300 | if (i < 2) { |
| 301 | gossip_err("dir_emit failed on one of the first two true PVFS directory entries.\n"); |
| 302 | gossip_err("Duplicate entries may appear.\n"); |
| 303 | } |
| 304 | buffer_full = 1; |
| 305 | break; |
| 306 | } |
| 307 | ctx->pos++; |
| 308 | gossip_ldebug(GOSSIP_DIR_DEBUG, |
| 309 | "%s: ctx->pos:%lld\n", |
| 310 | __func__, |
| 311 | lld(ctx->pos)); |
| 312 | |
| 313 | pos++; |
| 314 | } |
| 315 | |
| 316 | /* this means that all of the dir_emit calls succeeded */ |
| 317 | if (i == rhandle.readdir_response.pvfs_dirent_outcount) { |
| 318 | /* update token */ |
| 319 | *ptoken = rhandle.readdir_response.token; |
| 320 | } else { |
| 321 | /* this means a dir_emit call failed */ |
| 322 | if (rhandle.readdir_response.token == PVFS_READDIR_END) { |
| 323 | /* |
| 324 | * If PVFS hit end of directory, then there |
| 325 | * is no way to do math on the token that it |
| 326 | * returned. Instead we go by ctx->pos but |
| 327 | * back up to account for the artificial . |
| 328 | * and .. entries. |
| 329 | */ |
| 330 | ctx->pos -= 3; |
| 331 | } else { |
| 332 | /* |
| 333 | * this means a dir_emit call failed. !!! need to set |
| 334 | * back to previous ctx->pos, no middle value allowed |
| 335 | */ |
| 336 | pos -= (i - 1); |
| 337 | ctx->pos -= (i - 1); |
| 338 | } |
| 339 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 340 | "at least one dir_emit call failed. Setting ctx->pos to: %lld\n", |
| 341 | lld(ctx->pos)); |
| 342 | } |
| 343 | |
| 344 | /* |
| 345 | * Did we hit the end of the directory? |
| 346 | */ |
| 347 | if (rhandle.readdir_response.token == PVFS_READDIR_END && |
| 348 | !buffer_full) { |
| 349 | gossip_debug(GOSSIP_DIR_DEBUG, "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); |
| 350 | ctx->pos = PVFS_READDIR_END; |
| 351 | } |
| 352 | |
| 353 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 354 | "pos = %llu, token = %llu" |
| 355 | ", ctx->pos should have been %lld\n", |
| 356 | llu(pos), |
| 357 | llu(*ptoken), |
| 358 | lld(ctx->pos)); |
| 359 | |
| 360 | out_destroy_handle: |
| 361 | readdir_handle_dtor(bufmap, &rhandle); |
| 362 | out_free_op: |
| 363 | op_release(new_op); |
| 364 | gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret); |
| 365 | return ret; |
| 366 | } |
| 367 | |
| 368 | static int pvfs2_dir_open(struct inode *inode, struct file *file) |
| 369 | { |
| 370 | __u64 *ptoken; |
| 371 | |
| 372 | file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); |
| 373 | if (!file->private_data) |
| 374 | return -ENOMEM; |
| 375 | |
| 376 | ptoken = file->private_data; |
| 377 | *ptoken = PVFS_READDIR_START; |
| 378 | return 0; |
| 379 | } |
| 380 | |
| 381 | static int pvfs2_dir_release(struct inode *inode, struct file *file) |
| 382 | { |
| 383 | pvfs2_flush_inode(inode); |
| 384 | kfree(file->private_data); |
| 385 | return 0; |
| 386 | } |
| 387 | |
| 388 | /** PVFS2 implementation of VFS directory operations */ |
| 389 | const struct file_operations pvfs2_dir_operations = { |
| 390 | .read = generic_read_dir, |
| 391 | .iterate = pvfs2_readdir, |
| 392 | .open = pvfs2_dir_open, |
| 393 | .release = pvfs2_dir_release, |
| 394 | }; |