Commit | Line | Data |
---|---|---|
5db11c21 | 1 | /* |
382f4581 | 2 | * Copyright 2017 Omnibond Systems, L.L.C. |
5db11c21 MM |
3 | */ |
4 | ||
5 | #include "protocol.h" | |
575e9461 MM |
6 | #include "orangefs-kernel.h" |
7 | #include "orangefs-bufmap.h" | |
5db11c21 | 8 | |
480e3e53 MB |
9 | struct orangefs_dir_part { |
10 | struct orangefs_dir_part *next; | |
11 | size_t len; | |
12 | }; | |
13 | ||
14 | struct orangefs_dir { | |
15 | __u64 token; | |
16 | struct orangefs_dir_part *part; | |
17 | loff_t end; | |
18 | int error; | |
19 | }; | |
20 | ||
21 | #define PART_SHIFT (24) | |
22 | #define PART_SIZE (1<<24) | |
23 | #define PART_MASK (~(PART_SIZE - 1)) | |
24 | ||
5db11c21 | 25 | /* |
382f4581 MB |
26 | * There can be up to 512 directory entries. Each entry is encoded as |
27 | * follows: | |
28 | * 4 bytes: string size (n) | |
29 | * n bytes: string | |
30 | * 1 byte: trailing zero | |
31 | * padding to 8 bytes | |
32 | * 16 bytes: khandle | |
33 | * padding to 8 bytes | |
382f4581 MB |
34 | * |
35 | * The trailer_buf starts with a struct orangefs_readdir_response_s | |
36 | * which must be skipped to get to the directory data. | |
480e3e53 MB |
37 | * |
38 | * The data which is received from the userspace daemon is termed a | |
39 | * part and is stored in a linked list in case more than one part is | |
40 | * needed for a large directory. | |
41 | * | |
42 | * The position pointer (ctx->pos) encodes the part and offset on which | |
43 | * to begin reading at. Bits above PART_SHIFT encode the part and bits | |
44 | * below PART_SHIFT encode the offset. Parts are stored in a linked | |
45 | * list which grows as data is received from the server. The overhead | |
46 | * associated with managing the list is presumed to be small compared to | |
47 | * the overhead of communicating with the server. | |
48 | * | |
49 | * As data is received from the server, it is placed at the end of the | |
50 | * part list. Data is parsed from the current position as it is needed. | |
51 | * When data is determined to be corrupt, it is either because the | |
52 | * userspace component has sent back corrupt data or because the file | |
53 | * pointer has been moved to an invalid location. Since the two cannot | |
54 | * be differentiated, return EIO. | |
55 | * | |
56 | * Part zero is synthesized to contains `.' and `..'. Part one is the | |
57 | * first part of the part list. | |
5db11c21 | 58 | */ |
5db11c21 | 59 | |
480e3e53 MB |
60 | static int do_readdir(struct orangefs_inode_s *oi, |
61 | struct orangefs_dir *od, struct dentry *dentry, | |
62 | struct orangefs_kernel_op_s *op) | |
382f4581 | 63 | { |
382f4581 | 64 | struct orangefs_readdir_response_s *resp; |
382f4581 MB |
65 | int bufi, r; |
66 | ||
ee3b8d37 | 67 | /* |
382f4581 MB |
68 | * Despite the badly named field, readdir does not use shared |
69 | * memory. However, there are a limited number of readdir | |
70 | * slots, which must be allocated here. This flag simply tells | |
71 | * the op scheduler to return the op here for retry. | |
ee3b8d37 | 72 | */ |
382f4581 MB |
73 | op->uses_shared_memory = 1; |
74 | op->upcall.req.readdir.refn = oi->refn; | |
75 | op->upcall.req.readdir.token = od->token; | |
76 | op->upcall.req.readdir.max_dirent_count = | |
7d221485 | 77 | ORANGEFS_MAX_DIRENT_COUNT_READDIR; |
5db11c21 | 78 | |
382f4581 MB |
79 | again: |
80 | bufi = orangefs_readdir_index_get(); | |
81 | if (bufi < 0) { | |
382f4581 MB |
82 | od->error = bufi; |
83 | return bufi; | |
5db11c21 | 84 | } |
5db11c21 | 85 | |
382f4581 | 86 | op->upcall.req.readdir.buf_index = bufi; |
5db11c21 | 87 | |
382f4581 MB |
88 | r = service_operation(op, "orangefs_readdir", |
89 | get_interruptible_flag(dentry->d_inode)); | |
5db11c21 | 90 | |
382f4581 | 91 | orangefs_readdir_index_put(bufi); |
ee3b8d37 | 92 | |
382f4581 MB |
93 | if (op_state_purged(op)) { |
94 | if (r == -EAGAIN) { | |
95 | vfree(op->downcall.trailer_buf); | |
96 | goto again; | |
97 | } else if (r == -EIO) { | |
98 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
99 | od->error = r; |
100 | return r; | |
101 | } | |
5db11c21 MM |
102 | } |
103 | ||
382f4581 MB |
104 | if (r < 0) { |
105 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
106 | od->error = r; |
107 | return r; | |
108 | } else if (op->downcall.status) { | |
109 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
110 | od->error = op->downcall.status; |
111 | return op->downcall.status; | |
112 | } | |
113 | ||
480e3e53 MB |
114 | /* |
115 | * The maximum size is size per entry times the 512 entries plus | |
116 | * the header. This is well under the limit. | |
117 | */ | |
118 | if (op->downcall.trailer_size > PART_SIZE) { | |
119 | vfree(op->downcall.trailer_buf); | |
120 | od->error = -EIO; | |
121 | return -EIO; | |
122 | } | |
123 | ||
382f4581 MB |
124 | resp = (struct orangefs_readdir_response_s *) |
125 | op->downcall.trailer_buf; | |
126 | od->token = resp->token; | |
480e3e53 MB |
127 | return 0; |
128 | } | |
382f4581 | 129 | |
480e3e53 MB |
130 | static int parse_readdir(struct orangefs_dir *od, |
131 | struct orangefs_kernel_op_s *op) | |
132 | { | |
133 | struct orangefs_dir_part *part, *new; | |
134 | size_t count; | |
135 | ||
136 | count = 1; | |
137 | part = od->part; | |
2f713b5c | 138 | while (part) { |
480e3e53 | 139 | count++; |
2f713b5c MB |
140 | if (part->next) |
141 | part = part->next; | |
142 | else | |
143 | break; | |
382f4581 MB |
144 | } |
145 | ||
480e3e53 MB |
146 | new = (void *)op->downcall.trailer_buf; |
147 | new->next = NULL; | |
148 | new->len = op->downcall.trailer_size - | |
149 | sizeof(struct orangefs_readdir_response_s); | |
150 | if (!od->part) | |
151 | od->part = new; | |
152 | else | |
153 | part->next = new; | |
154 | count++; | |
155 | od->end = count << PART_SHIFT; | |
156 | ||
382f4581 MB |
157 | return 0; |
158 | } | |
9f5e2f7f | 159 | |
480e3e53 MB |
160 | static int orangefs_dir_more(struct orangefs_inode_s *oi, |
161 | struct orangefs_dir *od, struct dentry *dentry) | |
162 | { | |
163 | struct orangefs_kernel_op_s *op; | |
164 | int r; | |
165 | ||
166 | op = op_alloc(ORANGEFS_VFS_OP_READDIR); | |
167 | if (!op) { | |
168 | od->error = -ENOMEM; | |
169 | return -ENOMEM; | |
170 | } | |
171 | r = do_readdir(oi, od, dentry, op); | |
172 | if (r) { | |
173 | od->error = r; | |
174 | goto out; | |
175 | } | |
176 | r = parse_readdir(od, op); | |
177 | if (r) { | |
178 | od->error = r; | |
179 | goto out; | |
180 | } | |
181 | ||
182 | od->error = 0; | |
183 | out: | |
184 | op_release(op); | |
185 | return od->error; | |
186 | } | |
187 | ||
188 | static int fill_from_part(struct orangefs_dir_part *part, | |
382f4581 MB |
189 | struct dir_context *ctx) |
190 | { | |
480e3e53 | 191 | const int offset = sizeof(struct orangefs_readdir_response_s); |
382f4581 MB |
192 | struct orangefs_khandle *khandle; |
193 | __u32 *len, padlen; | |
72f66b83 | 194 | loff_t i; |
382f4581 | 195 | char *s; |
480e3e53 MB |
196 | i = ctx->pos & ~PART_MASK; |
197 | ||
198 | /* The file offset from userspace is too large. */ | |
199 | if (i > part->len) | |
bf15ba7c MB |
200 | return 1; |
201 | ||
202 | /* | |
203 | * If the seek pointer is positioned just before an entry it | |
204 | * should find the next entry. | |
205 | */ | |
206 | if (i % 8) | |
207 | i = i + (8 - i%8)%8; | |
480e3e53 MB |
208 | |
209 | while (i < part->len) { | |
210 | if (part->len < i + sizeof *len) | |
bf15ba7c | 211 | break; |
480e3e53 | 212 | len = (void *)part + offset + i; |
382f4581 MB |
213 | /* |
214 | * len is the size of the string itself. padlen is the | |
215 | * total size of the encoded string. | |
216 | */ | |
217 | padlen = (sizeof *len + *len + 1) + | |
480e3e53 MB |
218 | (8 - (sizeof *len + *len + 1)%8)%8; |
219 | if (part->len < i + padlen + sizeof *khandle) | |
bf15ba7c | 220 | goto next; |
480e3e53 | 221 | s = (void *)part + offset + i + sizeof *len; |
382f4581 | 222 | if (s[*len] != 0) |
bf15ba7c | 223 | goto next; |
480e3e53 | 224 | khandle = (void *)part + offset + i + padlen; |
382f4581 | 225 | if (!dir_emit(ctx, s, *len, |
480e3e53 MB |
226 | orangefs_khandle_to_ino(khandle), |
227 | DT_UNKNOWN)) | |
382f4581 | 228 | return 0; |
72f66b83 MB |
229 | i += padlen + sizeof *khandle; |
230 | i = i + (8 - i%8)%8; | |
480e3e53 MB |
231 | BUG_ON(i > part->len); |
232 | ctx->pos = (ctx->pos & PART_MASK) | i; | |
bf15ba7c MB |
233 | continue; |
234 | next: | |
235 | i += 8; | |
480e3e53 MB |
236 | } |
237 | return 1; | |
238 | } | |
239 | ||
240 | static int orangefs_dir_fill(struct orangefs_inode_s *oi, | |
241 | struct orangefs_dir *od, struct dentry *dentry, | |
242 | struct dir_context *ctx) | |
243 | { | |
244 | struct orangefs_dir_part *part; | |
245 | size_t count; | |
246 | ||
247 | count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; | |
248 | ||
249 | part = od->part; | |
250 | while (part->next && count) { | |
251 | count--; | |
252 | part = part->next; | |
253 | } | |
254 | /* This means the userspace file offset is invalid. */ | |
255 | if (count) { | |
256 | od->error = -EIO; | |
257 | return -EIO; | |
258 | } | |
259 | ||
260 | while (part && part->len) { | |
261 | int r; | |
262 | r = fill_from_part(part, ctx); | |
263 | if (r < 0) { | |
264 | od->error = r; | |
265 | return r; | |
266 | } else if (r == 0) { | |
267 | /* Userspace buffer is full. */ | |
268 | break; | |
269 | } else { | |
270 | /* | |
271 | * The part ran out of data. Move to the next | |
272 | * part. */ | |
273 | ctx->pos = (ctx->pos & PART_MASK) + | |
274 | (1 << PART_SHIFT); | |
275 | part = part->next; | |
276 | } | |
382f4581 | 277 | } |
382f4581 | 278 | return 0; |
382f4581 | 279 | } |
5db11c21 | 280 | |
942835d6 MB |
281 | static loff_t orangefs_dir_llseek(struct file *file, loff_t offset, |
282 | int whence) | |
283 | { | |
284 | struct orangefs_dir *od = file->private_data; | |
285 | /* | |
286 | * Delete the stored data so userspace sees new directory | |
287 | * entries. | |
288 | */ | |
289 | if (!whence && offset < od->end) { | |
290 | struct orangefs_dir_part *part = od->part; | |
291 | while (part) { | |
292 | struct orangefs_dir_part *next = part->next; | |
293 | vfree(part); | |
294 | part = next; | |
295 | } | |
296 | od->token = ORANGEFS_ITERATE_START; | |
297 | od->part = NULL; | |
298 | od->end = 1 << PART_SHIFT; | |
299 | } | |
300 | return default_llseek(file, offset, whence); | |
301 | } | |
302 | ||
382f4581 MB |
303 | static int orangefs_dir_iterate(struct file *file, |
304 | struct dir_context *ctx) | |
305 | { | |
306 | struct orangefs_inode_s *oi; | |
307 | struct orangefs_dir *od; | |
308 | struct dentry *dentry; | |
309 | int r; | |
5db11c21 | 310 | |
382f4581 MB |
311 | dentry = file->f_path.dentry; |
312 | oi = ORANGEFS_I(dentry->d_inode); | |
313 | od = file->private_data; | |
5db11c21 | 314 | |
382f4581 MB |
315 | if (od->error) |
316 | return od->error; | |
5db11c21 | 317 | |
382f4581 MB |
318 | if (ctx->pos == 0) { |
319 | if (!dir_emit_dot(file, ctx)) | |
320 | return 0; | |
321 | ctx->pos++; | |
5db11c21 | 322 | } |
382f4581 MB |
323 | if (ctx->pos == 1) { |
324 | if (!dir_emit_dotdot(file, ctx)) | |
325 | return 0; | |
480e3e53 | 326 | ctx->pos = 1 << PART_SHIFT; |
5db11c21 MM |
327 | } |
328 | ||
480e3e53 MB |
329 | /* |
330 | * The seek position is in the first synthesized part but is not | |
331 | * valid. | |
332 | */ | |
333 | if ((ctx->pos & PART_MASK) == 0) | |
334 | return -EIO; | |
335 | ||
382f4581 MB |
336 | r = 0; |
337 | ||
72f66b83 MB |
338 | /* |
339 | * Must read more if the user has sought past what has been read | |
340 | * so far. Stop a user who has sought past the end. | |
341 | */ | |
7b796ae3 | 342 | while (od->token != ORANGEFS_ITERATE_END && |
480e3e53 | 343 | ctx->pos > od->end) { |
72f66b83 MB |
344 | r = orangefs_dir_more(oi, od, dentry); |
345 | if (r) | |
346 | return r; | |
347 | } | |
7b796ae3 | 348 | if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) |
72f66b83 | 349 | return -EIO; |
72f66b83 MB |
350 | |
351 | /* Then try to fill if there's any left in the buffer. */ | |
480e3e53 | 352 | if (ctx->pos < od->end) { |
382f4581 MB |
353 | r = orangefs_dir_fill(oi, od, dentry, ctx); |
354 | if (r) | |
355 | return r; | |
5db11c21 MM |
356 | } |
357 | ||
72f66b83 | 358 | /* Finally get some more and try to fill. */ |
7b796ae3 | 359 | if (od->token != ORANGEFS_ITERATE_END) { |
382f4581 MB |
360 | r = orangefs_dir_more(oi, od, dentry); |
361 | if (r) | |
362 | return r; | |
363 | r = orangefs_dir_fill(oi, od, dentry, ctx); | |
5db11c21 MM |
364 | } |
365 | ||
382f4581 | 366 | return r; |
5db11c21 MM |
367 | } |
368 | ||
8bb8aefd | 369 | static int orangefs_dir_open(struct inode *inode, struct file *file) |
5db11c21 | 370 | { |
382f4581 MB |
371 | struct orangefs_dir *od; |
372 | file->private_data = kmalloc(sizeof(struct orangefs_dir), | |
373 | GFP_KERNEL); | |
5db11c21 MM |
374 | if (!file->private_data) |
375 | return -ENOMEM; | |
382f4581 | 376 | od = file->private_data; |
7b796ae3 | 377 | od->token = ORANGEFS_ITERATE_START; |
480e3e53 MB |
378 | od->part = NULL; |
379 | od->end = 1 << PART_SHIFT; | |
382f4581 | 380 | od->error = 0; |
5db11c21 MM |
381 | return 0; |
382 | } | |
383 | ||
8bb8aefd | 384 | static int orangefs_dir_release(struct inode *inode, struct file *file) |
5db11c21 | 385 | { |
382f4581 | 386 | struct orangefs_dir *od = file->private_data; |
480e3e53 | 387 | struct orangefs_dir_part *part = od->part; |
8bb8aefd | 388 | orangefs_flush_inode(inode); |
480e3e53 MB |
389 | while (part) { |
390 | struct orangefs_dir_part *next = part->next; | |
391 | vfree(part); | |
392 | part = next; | |
393 | } | |
382f4581 | 394 | kfree(od); |
5db11c21 MM |
395 | return 0; |
396 | } | |
397 | ||
8bb8aefd | 398 | const struct file_operations orangefs_dir_operations = { |
942835d6 | 399 | .llseek = orangefs_dir_llseek, |
5db11c21 | 400 | .read = generic_read_dir, |
382f4581 | 401 | .iterate = orangefs_dir_iterate, |
8bb8aefd | 402 | .open = orangefs_dir_open, |
382f4581 | 403 | .release = orangefs_dir_release |
5db11c21 | 404 | }; |