orangefs: handle zero size write in debugfs
[linux-block.git] / fs / orangefs / dir.c
CommitLineData
5db11c21 1/*
382f4581 2 * Copyright 2017 Omnibond Systems, L.L.C.
5db11c21
MM
3 */
4
5#include "protocol.h"
575e9461
MM
6#include "orangefs-kernel.h"
7#include "orangefs-bufmap.h"
5db11c21 8
480e3e53
MB
9struct orangefs_dir_part {
10 struct orangefs_dir_part *next;
11 size_t len;
12};
13
14struct orangefs_dir {
15 __u64 token;
16 struct orangefs_dir_part *part;
17 loff_t end;
18 int error;
19};
20
21#define PART_SHIFT (24)
22#define PART_SIZE (1<<24)
23#define PART_MASK (~(PART_SIZE - 1))
24
5db11c21 25/*
382f4581
MB
26 * There can be up to 512 directory entries. Each entry is encoded as
27 * follows:
28 * 4 bytes: string size (n)
29 * n bytes: string
30 * 1 byte: trailing zero
31 * padding to 8 bytes
32 * 16 bytes: khandle
33 * padding to 8 bytes
382f4581
MB
34 *
35 * The trailer_buf starts with a struct orangefs_readdir_response_s
36 * which must be skipped to get to the directory data.
480e3e53
MB
37 *
38 * The data which is received from the userspace daemon is termed a
39 * part and is stored in a linked list in case more than one part is
40 * needed for a large directory.
41 *
42 * The position pointer (ctx->pos) encodes the part and offset on which
43 * to begin reading at. Bits above PART_SHIFT encode the part and bits
44 * below PART_SHIFT encode the offset. Parts are stored in a linked
45 * list which grows as data is received from the server. The overhead
46 * associated with managing the list is presumed to be small compared to
47 * the overhead of communicating with the server.
48 *
49 * As data is received from the server, it is placed at the end of the
50 * part list. Data is parsed from the current position as it is needed.
51 * When data is determined to be corrupt, it is either because the
52 * userspace component has sent back corrupt data or because the file
53 * pointer has been moved to an invalid location. Since the two cannot
54 * be differentiated, return EIO.
55 *
56 * Part zero is synthesized to contains `.' and `..'. Part one is the
57 * first part of the part list.
5db11c21 58 */
5db11c21 59
480e3e53
MB
60static int do_readdir(struct orangefs_inode_s *oi,
61 struct orangefs_dir *od, struct dentry *dentry,
62 struct orangefs_kernel_op_s *op)
382f4581 63{
382f4581 64 struct orangefs_readdir_response_s *resp;
382f4581
MB
65 int bufi, r;
66
ee3b8d37 67 /*
382f4581
MB
68 * Despite the badly named field, readdir does not use shared
69 * memory. However, there are a limited number of readdir
70 * slots, which must be allocated here. This flag simply tells
71 * the op scheduler to return the op here for retry.
ee3b8d37 72 */
382f4581
MB
73 op->uses_shared_memory = 1;
74 op->upcall.req.readdir.refn = oi->refn;
75 op->upcall.req.readdir.token = od->token;
76 op->upcall.req.readdir.max_dirent_count =
7d221485 77 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
5db11c21 78
382f4581
MB
79again:
80 bufi = orangefs_readdir_index_get();
81 if (bufi < 0) {
382f4581
MB
82 od->error = bufi;
83 return bufi;
5db11c21 84 }
5db11c21 85
382f4581 86 op->upcall.req.readdir.buf_index = bufi;
5db11c21 87
382f4581
MB
88 r = service_operation(op, "orangefs_readdir",
89 get_interruptible_flag(dentry->d_inode));
5db11c21 90
382f4581 91 orangefs_readdir_index_put(bufi);
ee3b8d37 92
382f4581
MB
93 if (op_state_purged(op)) {
94 if (r == -EAGAIN) {
95 vfree(op->downcall.trailer_buf);
96 goto again;
97 } else if (r == -EIO) {
98 vfree(op->downcall.trailer_buf);
382f4581
MB
99 od->error = r;
100 return r;
101 }
5db11c21
MM
102 }
103
382f4581
MB
104 if (r < 0) {
105 vfree(op->downcall.trailer_buf);
382f4581
MB
106 od->error = r;
107 return r;
108 } else if (op->downcall.status) {
109 vfree(op->downcall.trailer_buf);
382f4581
MB
110 od->error = op->downcall.status;
111 return op->downcall.status;
112 }
113
480e3e53
MB
114 /*
115 * The maximum size is size per entry times the 512 entries plus
116 * the header. This is well under the limit.
117 */
118 if (op->downcall.trailer_size > PART_SIZE) {
119 vfree(op->downcall.trailer_buf);
120 od->error = -EIO;
121 return -EIO;
122 }
123
382f4581
MB
124 resp = (struct orangefs_readdir_response_s *)
125 op->downcall.trailer_buf;
126 od->token = resp->token;
480e3e53
MB
127 return 0;
128}
382f4581 129
480e3e53
MB
130static int parse_readdir(struct orangefs_dir *od,
131 struct orangefs_kernel_op_s *op)
132{
133 struct orangefs_dir_part *part, *new;
134 size_t count;
135
136 count = 1;
137 part = od->part;
138 while (part && part->next) {
139 part = part->next;
140 count++;
382f4581
MB
141 }
142
480e3e53
MB
143 new = (void *)op->downcall.trailer_buf;
144 new->next = NULL;
145 new->len = op->downcall.trailer_size -
146 sizeof(struct orangefs_readdir_response_s);
147 if (!od->part)
148 od->part = new;
149 else
150 part->next = new;
151 count++;
152 od->end = count << PART_SHIFT;
153
382f4581
MB
154 return 0;
155}
9f5e2f7f 156
480e3e53
MB
157static int orangefs_dir_more(struct orangefs_inode_s *oi,
158 struct orangefs_dir *od, struct dentry *dentry)
159{
160 struct orangefs_kernel_op_s *op;
161 int r;
162
163 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
164 if (!op) {
165 od->error = -ENOMEM;
166 return -ENOMEM;
167 }
168 r = do_readdir(oi, od, dentry, op);
169 if (r) {
170 od->error = r;
171 goto out;
172 }
173 r = parse_readdir(od, op);
174 if (r) {
175 od->error = r;
176 goto out;
177 }
178
179 od->error = 0;
180out:
181 op_release(op);
182 return od->error;
183}
184
185static int fill_from_part(struct orangefs_dir_part *part,
382f4581
MB
186 struct dir_context *ctx)
187{
480e3e53 188 const int offset = sizeof(struct orangefs_readdir_response_s);
382f4581
MB
189 struct orangefs_khandle *khandle;
190 __u32 *len, padlen;
72f66b83 191 loff_t i;
382f4581 192 char *s;
480e3e53
MB
193 i = ctx->pos & ~PART_MASK;
194
195 /* The file offset from userspace is too large. */
196 if (i > part->len)
197 return -EIO;
198
199 while (i < part->len) {
200 if (part->len < i + sizeof *len)
201 return -EIO;
202 len = (void *)part + offset + i;
382f4581
MB
203 /*
204 * len is the size of the string itself. padlen is the
205 * total size of the encoded string.
206 */
207 padlen = (sizeof *len + *len + 1) +
480e3e53
MB
208 (8 - (sizeof *len + *len + 1)%8)%8;
209 if (part->len < i + padlen + sizeof *khandle)
210 return -EIO;
211 s = (void *)part + offset + i + sizeof *len;
382f4581 212 if (s[*len] != 0)
480e3e53
MB
213 return -EIO;
214 khandle = (void *)part + offset + i + padlen;
382f4581 215 if (!dir_emit(ctx, s, *len,
480e3e53
MB
216 orangefs_khandle_to_ino(khandle),
217 DT_UNKNOWN))
382f4581 218 return 0;
72f66b83
MB
219 i += padlen + sizeof *khandle;
220 i = i + (8 - i%8)%8;
480e3e53
MB
221 BUG_ON(i > part->len);
222 ctx->pos = (ctx->pos & PART_MASK) | i;
223 }
224 return 1;
225}
226
227static int orangefs_dir_fill(struct orangefs_inode_s *oi,
228 struct orangefs_dir *od, struct dentry *dentry,
229 struct dir_context *ctx)
230{
231 struct orangefs_dir_part *part;
232 size_t count;
233
234 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
235
236 part = od->part;
237 while (part->next && count) {
238 count--;
239 part = part->next;
240 }
241 /* This means the userspace file offset is invalid. */
242 if (count) {
243 od->error = -EIO;
244 return -EIO;
245 }
246
247 while (part && part->len) {
248 int r;
249 r = fill_from_part(part, ctx);
250 if (r < 0) {
251 od->error = r;
252 return r;
253 } else if (r == 0) {
254 /* Userspace buffer is full. */
255 break;
256 } else {
257 /*
258 * The part ran out of data. Move to the next
259 * part. */
260 ctx->pos = (ctx->pos & PART_MASK) +
261 (1 << PART_SHIFT);
262 part = part->next;
263 }
382f4581 264 }
382f4581 265 return 0;
382f4581 266}
5db11c21 267
382f4581
MB
268static int orangefs_dir_iterate(struct file *file,
269 struct dir_context *ctx)
270{
271 struct orangefs_inode_s *oi;
272 struct orangefs_dir *od;
273 struct dentry *dentry;
274 int r;
5db11c21 275
382f4581
MB
276 dentry = file->f_path.dentry;
277 oi = ORANGEFS_I(dentry->d_inode);
278 od = file->private_data;
5db11c21 279
382f4581
MB
280 if (od->error)
281 return od->error;
5db11c21 282
382f4581
MB
283 if (ctx->pos == 0) {
284 if (!dir_emit_dot(file, ctx))
285 return 0;
286 ctx->pos++;
5db11c21 287 }
382f4581
MB
288 if (ctx->pos == 1) {
289 if (!dir_emit_dotdot(file, ctx))
290 return 0;
480e3e53 291 ctx->pos = 1 << PART_SHIFT;
5db11c21
MM
292 }
293
480e3e53
MB
294 /*
295 * The seek position is in the first synthesized part but is not
296 * valid.
297 */
298 if ((ctx->pos & PART_MASK) == 0)
299 return -EIO;
300
382f4581
MB
301 r = 0;
302
72f66b83
MB
303 /*
304 * Must read more if the user has sought past what has been read
305 * so far. Stop a user who has sought past the end.
306 */
7b796ae3 307 while (od->token != ORANGEFS_ITERATE_END &&
480e3e53 308 ctx->pos > od->end) {
72f66b83
MB
309 r = orangefs_dir_more(oi, od, dentry);
310 if (r)
311 return r;
312 }
7b796ae3 313 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
72f66b83 314 return -EIO;
72f66b83
MB
315
316 /* Then try to fill if there's any left in the buffer. */
480e3e53 317 if (ctx->pos < od->end) {
382f4581
MB
318 r = orangefs_dir_fill(oi, od, dentry, ctx);
319 if (r)
320 return r;
5db11c21
MM
321 }
322
72f66b83 323 /* Finally get some more and try to fill. */
7b796ae3 324 if (od->token != ORANGEFS_ITERATE_END) {
382f4581
MB
325 r = orangefs_dir_more(oi, od, dentry);
326 if (r)
327 return r;
328 r = orangefs_dir_fill(oi, od, dentry, ctx);
5db11c21
MM
329 }
330
382f4581 331 return r;
5db11c21
MM
332}
333
8bb8aefd 334static int orangefs_dir_open(struct inode *inode, struct file *file)
5db11c21 335{
382f4581
MB
336 struct orangefs_dir *od;
337 file->private_data = kmalloc(sizeof(struct orangefs_dir),
338 GFP_KERNEL);
5db11c21
MM
339 if (!file->private_data)
340 return -ENOMEM;
382f4581 341 od = file->private_data;
7b796ae3 342 od->token = ORANGEFS_ITERATE_START;
480e3e53
MB
343 od->part = NULL;
344 od->end = 1 << PART_SHIFT;
382f4581 345 od->error = 0;
5db11c21
MM
346 return 0;
347}
348
8bb8aefd 349static int orangefs_dir_release(struct inode *inode, struct file *file)
5db11c21 350{
382f4581 351 struct orangefs_dir *od = file->private_data;
480e3e53 352 struct orangefs_dir_part *part = od->part;
8bb8aefd 353 orangefs_flush_inode(inode);
480e3e53
MB
354 while (part) {
355 struct orangefs_dir_part *next = part->next;
356 vfree(part);
357 part = next;
358 }
382f4581 359 kfree(od);
5db11c21
MM
360 return 0;
361}
362
8bb8aefd 363const struct file_operations orangefs_dir_operations = {
72f66b83 364 .llseek = default_llseek,
5db11c21 365 .read = generic_read_dir,
382f4581 366 .iterate = orangefs_dir_iterate,
8bb8aefd 367 .open = orangefs_dir_open,
382f4581 368 .release = orangefs_dir_release
5db11c21 369};