Commit | Line | Data |
---|---|---|
5db11c21 | 1 | /* |
382f4581 | 2 | * Copyright 2017 Omnibond Systems, L.L.C. |
5db11c21 MM |
3 | */ |
4 | ||
5 | #include "protocol.h" | |
575e9461 MM |
6 | #include "orangefs-kernel.h" |
7 | #include "orangefs-bufmap.h" | |
5db11c21 | 8 | |
480e3e53 MB |
9 | struct orangefs_dir_part { |
10 | struct orangefs_dir_part *next; | |
11 | size_t len; | |
12 | }; | |
13 | ||
14 | struct orangefs_dir { | |
15 | __u64 token; | |
16 | struct orangefs_dir_part *part; | |
17 | loff_t end; | |
18 | int error; | |
19 | }; | |
20 | ||
21 | #define PART_SHIFT (24) | |
22 | #define PART_SIZE (1<<24) | |
23 | #define PART_MASK (~(PART_SIZE - 1)) | |
24 | ||
5db11c21 | 25 | /* |
382f4581 MB |
26 | * There can be up to 512 directory entries. Each entry is encoded as |
27 | * follows: | |
28 | * 4 bytes: string size (n) | |
29 | * n bytes: string | |
30 | * 1 byte: trailing zero | |
31 | * padding to 8 bytes | |
32 | * 16 bytes: khandle | |
33 | * padding to 8 bytes | |
382f4581 MB |
34 | * |
35 | * The trailer_buf starts with a struct orangefs_readdir_response_s | |
36 | * which must be skipped to get to the directory data. | |
480e3e53 MB |
37 | * |
38 | * The data which is received from the userspace daemon is termed a | |
39 | * part and is stored in a linked list in case more than one part is | |
40 | * needed for a large directory. | |
41 | * | |
42 | * The position pointer (ctx->pos) encodes the part and offset on which | |
43 | * to begin reading at. Bits above PART_SHIFT encode the part and bits | |
44 | * below PART_SHIFT encode the offset. Parts are stored in a linked | |
45 | * list which grows as data is received from the server. The overhead | |
46 | * associated with managing the list is presumed to be small compared to | |
47 | * the overhead of communicating with the server. | |
48 | * | |
49 | * As data is received from the server, it is placed at the end of the | |
50 | * part list. Data is parsed from the current position as it is needed. | |
51 | * When data is determined to be corrupt, it is either because the | |
52 | * userspace component has sent back corrupt data or because the file | |
53 | * pointer has been moved to an invalid location. Since the two cannot | |
54 | * be differentiated, return EIO. | |
55 | * | |
56 | * Part zero is synthesized to contains `.' and `..'. Part one is the | |
57 | * first part of the part list. | |
5db11c21 | 58 | */ |
5db11c21 | 59 | |
480e3e53 MB |
60 | static int do_readdir(struct orangefs_inode_s *oi, |
61 | struct orangefs_dir *od, struct dentry *dentry, | |
62 | struct orangefs_kernel_op_s *op) | |
382f4581 | 63 | { |
382f4581 | 64 | struct orangefs_readdir_response_s *resp; |
382f4581 MB |
65 | int bufi, r; |
66 | ||
ee3b8d37 | 67 | /* |
382f4581 MB |
68 | * Despite the badly named field, readdir does not use shared |
69 | * memory. However, there are a limited number of readdir | |
70 | * slots, which must be allocated here. This flag simply tells | |
71 | * the op scheduler to return the op here for retry. | |
ee3b8d37 | 72 | */ |
382f4581 MB |
73 | op->uses_shared_memory = 1; |
74 | op->upcall.req.readdir.refn = oi->refn; | |
75 | op->upcall.req.readdir.token = od->token; | |
76 | op->upcall.req.readdir.max_dirent_count = | |
7d221485 | 77 | ORANGEFS_MAX_DIRENT_COUNT_READDIR; |
5db11c21 | 78 | |
382f4581 MB |
79 | again: |
80 | bufi = orangefs_readdir_index_get(); | |
81 | if (bufi < 0) { | |
382f4581 MB |
82 | od->error = bufi; |
83 | return bufi; | |
5db11c21 | 84 | } |
5db11c21 | 85 | |
382f4581 | 86 | op->upcall.req.readdir.buf_index = bufi; |
5db11c21 | 87 | |
382f4581 MB |
88 | r = service_operation(op, "orangefs_readdir", |
89 | get_interruptible_flag(dentry->d_inode)); | |
5db11c21 | 90 | |
382f4581 | 91 | orangefs_readdir_index_put(bufi); |
ee3b8d37 | 92 | |
382f4581 MB |
93 | if (op_state_purged(op)) { |
94 | if (r == -EAGAIN) { | |
95 | vfree(op->downcall.trailer_buf); | |
96 | goto again; | |
97 | } else if (r == -EIO) { | |
98 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
99 | od->error = r; |
100 | return r; | |
101 | } | |
5db11c21 MM |
102 | } |
103 | ||
382f4581 MB |
104 | if (r < 0) { |
105 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
106 | od->error = r; |
107 | return r; | |
108 | } else if (op->downcall.status) { | |
109 | vfree(op->downcall.trailer_buf); | |
382f4581 MB |
110 | od->error = op->downcall.status; |
111 | return op->downcall.status; | |
112 | } | |
113 | ||
480e3e53 MB |
114 | /* |
115 | * The maximum size is size per entry times the 512 entries plus | |
116 | * the header. This is well under the limit. | |
117 | */ | |
118 | if (op->downcall.trailer_size > PART_SIZE) { | |
119 | vfree(op->downcall.trailer_buf); | |
120 | od->error = -EIO; | |
121 | return -EIO; | |
122 | } | |
123 | ||
382f4581 MB |
124 | resp = (struct orangefs_readdir_response_s *) |
125 | op->downcall.trailer_buf; | |
126 | od->token = resp->token; | |
480e3e53 MB |
127 | return 0; |
128 | } | |
382f4581 | 129 | |
480e3e53 MB |
130 | static int parse_readdir(struct orangefs_dir *od, |
131 | struct orangefs_kernel_op_s *op) | |
132 | { | |
133 | struct orangefs_dir_part *part, *new; | |
134 | size_t count; | |
135 | ||
136 | count = 1; | |
137 | part = od->part; | |
138 | while (part && part->next) { | |
139 | part = part->next; | |
140 | count++; | |
382f4581 MB |
141 | } |
142 | ||
480e3e53 MB |
143 | new = (void *)op->downcall.trailer_buf; |
144 | new->next = NULL; | |
145 | new->len = op->downcall.trailer_size - | |
146 | sizeof(struct orangefs_readdir_response_s); | |
147 | if (!od->part) | |
148 | od->part = new; | |
149 | else | |
150 | part->next = new; | |
151 | count++; | |
152 | od->end = count << PART_SHIFT; | |
153 | ||
382f4581 MB |
154 | return 0; |
155 | } | |
9f5e2f7f | 156 | |
480e3e53 MB |
157 | static int orangefs_dir_more(struct orangefs_inode_s *oi, |
158 | struct orangefs_dir *od, struct dentry *dentry) | |
159 | { | |
160 | struct orangefs_kernel_op_s *op; | |
161 | int r; | |
162 | ||
163 | op = op_alloc(ORANGEFS_VFS_OP_READDIR); | |
164 | if (!op) { | |
165 | od->error = -ENOMEM; | |
166 | return -ENOMEM; | |
167 | } | |
168 | r = do_readdir(oi, od, dentry, op); | |
169 | if (r) { | |
170 | od->error = r; | |
171 | goto out; | |
172 | } | |
173 | r = parse_readdir(od, op); | |
174 | if (r) { | |
175 | od->error = r; | |
176 | goto out; | |
177 | } | |
178 | ||
179 | od->error = 0; | |
180 | out: | |
181 | op_release(op); | |
182 | return od->error; | |
183 | } | |
184 | ||
185 | static int fill_from_part(struct orangefs_dir_part *part, | |
382f4581 MB |
186 | struct dir_context *ctx) |
187 | { | |
480e3e53 | 188 | const int offset = sizeof(struct orangefs_readdir_response_s); |
382f4581 MB |
189 | struct orangefs_khandle *khandle; |
190 | __u32 *len, padlen; | |
72f66b83 | 191 | loff_t i; |
382f4581 | 192 | char *s; |
480e3e53 MB |
193 | i = ctx->pos & ~PART_MASK; |
194 | ||
195 | /* The file offset from userspace is too large. */ | |
196 | if (i > part->len) | |
197 | return -EIO; | |
198 | ||
199 | while (i < part->len) { | |
200 | if (part->len < i + sizeof *len) | |
201 | return -EIO; | |
202 | len = (void *)part + offset + i; | |
382f4581 MB |
203 | /* |
204 | * len is the size of the string itself. padlen is the | |
205 | * total size of the encoded string. | |
206 | */ | |
207 | padlen = (sizeof *len + *len + 1) + | |
480e3e53 MB |
208 | (8 - (sizeof *len + *len + 1)%8)%8; |
209 | if (part->len < i + padlen + sizeof *khandle) | |
210 | return -EIO; | |
211 | s = (void *)part + offset + i + sizeof *len; | |
382f4581 | 212 | if (s[*len] != 0) |
480e3e53 MB |
213 | return -EIO; |
214 | khandle = (void *)part + offset + i + padlen; | |
382f4581 | 215 | if (!dir_emit(ctx, s, *len, |
480e3e53 MB |
216 | orangefs_khandle_to_ino(khandle), |
217 | DT_UNKNOWN)) | |
382f4581 | 218 | return 0; |
72f66b83 MB |
219 | i += padlen + sizeof *khandle; |
220 | i = i + (8 - i%8)%8; | |
480e3e53 MB |
221 | BUG_ON(i > part->len); |
222 | ctx->pos = (ctx->pos & PART_MASK) | i; | |
223 | } | |
224 | return 1; | |
225 | } | |
226 | ||
227 | static int orangefs_dir_fill(struct orangefs_inode_s *oi, | |
228 | struct orangefs_dir *od, struct dentry *dentry, | |
229 | struct dir_context *ctx) | |
230 | { | |
231 | struct orangefs_dir_part *part; | |
232 | size_t count; | |
233 | ||
234 | count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; | |
235 | ||
236 | part = od->part; | |
237 | while (part->next && count) { | |
238 | count--; | |
239 | part = part->next; | |
240 | } | |
241 | /* This means the userspace file offset is invalid. */ | |
242 | if (count) { | |
243 | od->error = -EIO; | |
244 | return -EIO; | |
245 | } | |
246 | ||
247 | while (part && part->len) { | |
248 | int r; | |
249 | r = fill_from_part(part, ctx); | |
250 | if (r < 0) { | |
251 | od->error = r; | |
252 | return r; | |
253 | } else if (r == 0) { | |
254 | /* Userspace buffer is full. */ | |
255 | break; | |
256 | } else { | |
257 | /* | |
258 | * The part ran out of data. Move to the next | |
259 | * part. */ | |
260 | ctx->pos = (ctx->pos & PART_MASK) + | |
261 | (1 << PART_SHIFT); | |
262 | part = part->next; | |
263 | } | |
382f4581 | 264 | } |
382f4581 | 265 | return 0; |
382f4581 | 266 | } |
5db11c21 | 267 | |
382f4581 MB |
268 | static int orangefs_dir_iterate(struct file *file, |
269 | struct dir_context *ctx) | |
270 | { | |
271 | struct orangefs_inode_s *oi; | |
272 | struct orangefs_dir *od; | |
273 | struct dentry *dentry; | |
274 | int r; | |
5db11c21 | 275 | |
382f4581 MB |
276 | dentry = file->f_path.dentry; |
277 | oi = ORANGEFS_I(dentry->d_inode); | |
278 | od = file->private_data; | |
5db11c21 | 279 | |
382f4581 MB |
280 | if (od->error) |
281 | return od->error; | |
5db11c21 | 282 | |
382f4581 MB |
283 | if (ctx->pos == 0) { |
284 | if (!dir_emit_dot(file, ctx)) | |
285 | return 0; | |
286 | ctx->pos++; | |
5db11c21 | 287 | } |
382f4581 MB |
288 | if (ctx->pos == 1) { |
289 | if (!dir_emit_dotdot(file, ctx)) | |
290 | return 0; | |
480e3e53 | 291 | ctx->pos = 1 << PART_SHIFT; |
5db11c21 MM |
292 | } |
293 | ||
480e3e53 MB |
294 | /* |
295 | * The seek position is in the first synthesized part but is not | |
296 | * valid. | |
297 | */ | |
298 | if ((ctx->pos & PART_MASK) == 0) | |
299 | return -EIO; | |
300 | ||
382f4581 MB |
301 | r = 0; |
302 | ||
72f66b83 MB |
303 | /* |
304 | * Must read more if the user has sought past what has been read | |
305 | * so far. Stop a user who has sought past the end. | |
306 | */ | |
7b796ae3 | 307 | while (od->token != ORANGEFS_ITERATE_END && |
480e3e53 | 308 | ctx->pos > od->end) { |
72f66b83 MB |
309 | r = orangefs_dir_more(oi, od, dentry); |
310 | if (r) | |
311 | return r; | |
312 | } | |
7b796ae3 | 313 | if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end) |
72f66b83 | 314 | return -EIO; |
72f66b83 MB |
315 | |
316 | /* Then try to fill if there's any left in the buffer. */ | |
480e3e53 | 317 | if (ctx->pos < od->end) { |
382f4581 MB |
318 | r = orangefs_dir_fill(oi, od, dentry, ctx); |
319 | if (r) | |
320 | return r; | |
5db11c21 MM |
321 | } |
322 | ||
72f66b83 | 323 | /* Finally get some more and try to fill. */ |
7b796ae3 | 324 | if (od->token != ORANGEFS_ITERATE_END) { |
382f4581 MB |
325 | r = orangefs_dir_more(oi, od, dentry); |
326 | if (r) | |
327 | return r; | |
328 | r = orangefs_dir_fill(oi, od, dentry, ctx); | |
5db11c21 MM |
329 | } |
330 | ||
382f4581 | 331 | return r; |
5db11c21 MM |
332 | } |
333 | ||
8bb8aefd | 334 | static int orangefs_dir_open(struct inode *inode, struct file *file) |
5db11c21 | 335 | { |
382f4581 MB |
336 | struct orangefs_dir *od; |
337 | file->private_data = kmalloc(sizeof(struct orangefs_dir), | |
338 | GFP_KERNEL); | |
5db11c21 MM |
339 | if (!file->private_data) |
340 | return -ENOMEM; | |
382f4581 | 341 | od = file->private_data; |
7b796ae3 | 342 | od->token = ORANGEFS_ITERATE_START; |
480e3e53 MB |
343 | od->part = NULL; |
344 | od->end = 1 << PART_SHIFT; | |
382f4581 | 345 | od->error = 0; |
5db11c21 MM |
346 | return 0; |
347 | } | |
348 | ||
8bb8aefd | 349 | static int orangefs_dir_release(struct inode *inode, struct file *file) |
5db11c21 | 350 | { |
382f4581 | 351 | struct orangefs_dir *od = file->private_data; |
480e3e53 | 352 | struct orangefs_dir_part *part = od->part; |
8bb8aefd | 353 | orangefs_flush_inode(inode); |
480e3e53 MB |
354 | while (part) { |
355 | struct orangefs_dir_part *next = part->next; | |
356 | vfree(part); | |
357 | part = next; | |
358 | } | |
382f4581 | 359 | kfree(od); |
5db11c21 MM |
360 | return 0; |
361 | } | |
362 | ||
8bb8aefd | 363 | const struct file_operations orangefs_dir_operations = { |
72f66b83 | 364 | .llseek = default_llseek, |
5db11c21 | 365 | .read = generic_read_dir, |
382f4581 | 366 | .iterate = orangefs_dir_iterate, |
8bb8aefd | 367 | .open = orangefs_dir_open, |
382f4581 | 368 | .release = orangefs_dir_release |
5db11c21 | 369 | }; |