Commit | Line | Data |
---|---|---|
334f485d MS |
1 | /* |
2 | FUSE: Filesystem in Userspace | |
d7133114 | 3 | Copyright (C) 2001-2006 Miklos Szeredi <miklos@szeredi.hu> |
334f485d MS |
4 | |
5 | This program can be distributed under the terms of the GNU GPL. | |
6 | See the file COPYING. | |
7 | */ | |
8 | ||
9 | #include "fuse_i.h" | |
10 | ||
11 | #include <linux/init.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/poll.h> | |
14 | #include <linux/uio.h> | |
15 | #include <linux/miscdevice.h> | |
16 | #include <linux/pagemap.h> | |
17 | #include <linux/file.h> | |
18 | #include <linux/slab.h> | |
19 | ||
20 | MODULE_ALIAS_MISCDEV(FUSE_MINOR); | |
21 | ||
e18b890b | 22 | static struct kmem_cache *fuse_req_cachep; |
334f485d | 23 | |
8bfc016d | 24 | static struct fuse_conn *fuse_get_conn(struct file *file) |
334f485d | 25 | { |
0720b315 MS |
26 | /* |
27 | * Lockless access is OK, because file->private data is set | |
28 | * once during mount and is valid until the file is released. | |
29 | */ | |
30 | return file->private_data; | |
334f485d MS |
31 | } |
32 | ||
8bfc016d | 33 | static void fuse_request_init(struct fuse_req *req) |
334f485d MS |
34 | { |
35 | memset(req, 0, sizeof(*req)); | |
36 | INIT_LIST_HEAD(&req->list); | |
a4d27e75 | 37 | INIT_LIST_HEAD(&req->intr_entry); |
334f485d MS |
38 | init_waitqueue_head(&req->waitq); |
39 | atomic_set(&req->count, 1); | |
40 | } | |
41 | ||
42 | struct fuse_req *fuse_request_alloc(void) | |
43 | { | |
e94b1766 | 44 | struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); |
334f485d MS |
45 | if (req) |
46 | fuse_request_init(req); | |
47 | return req; | |
48 | } | |
49 | ||
50 | void fuse_request_free(struct fuse_req *req) | |
51 | { | |
52 | kmem_cache_free(fuse_req_cachep, req); | |
53 | } | |
54 | ||
8bfc016d | 55 | static void block_sigs(sigset_t *oldset) |
334f485d MS |
56 | { |
57 | sigset_t mask; | |
58 | ||
59 | siginitsetinv(&mask, sigmask(SIGKILL)); | |
60 | sigprocmask(SIG_BLOCK, &mask, oldset); | |
61 | } | |
62 | ||
8bfc016d | 63 | static void restore_sigs(sigset_t *oldset) |
334f485d MS |
64 | { |
65 | sigprocmask(SIG_SETMASK, oldset, NULL); | |
66 | } | |
67 | ||
334f485d MS |
68 | static void __fuse_get_request(struct fuse_req *req) |
69 | { | |
70 | atomic_inc(&req->count); | |
71 | } | |
72 | ||
73 | /* Must be called with > 1 refcount */ | |
74 | static void __fuse_put_request(struct fuse_req *req) | |
75 | { | |
76 | BUG_ON(atomic_read(&req->count) < 2); | |
77 | atomic_dec(&req->count); | |
78 | } | |
79 | ||
33649c91 MS |
80 | static void fuse_req_init_context(struct fuse_req *req) |
81 | { | |
82 | req->in.h.uid = current->fsuid; | |
83 | req->in.h.gid = current->fsgid; | |
84 | req->in.h.pid = current->pid; | |
85 | } | |
86 | ||
ce1d5a49 | 87 | struct fuse_req *fuse_get_req(struct fuse_conn *fc) |
334f485d | 88 | { |
08a53cdc MS |
89 | struct fuse_req *req; |
90 | sigset_t oldset; | |
9bc5ddda | 91 | int intr; |
08a53cdc MS |
92 | int err; |
93 | ||
9bc5ddda | 94 | atomic_inc(&fc->num_waiting); |
08a53cdc | 95 | block_sigs(&oldset); |
9bc5ddda | 96 | intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked); |
08a53cdc | 97 | restore_sigs(&oldset); |
9bc5ddda MS |
98 | err = -EINTR; |
99 | if (intr) | |
100 | goto out; | |
08a53cdc | 101 | |
51eb01e7 MS |
102 | err = -ENOTCONN; |
103 | if (!fc->connected) | |
104 | goto out; | |
105 | ||
08a53cdc | 106 | req = fuse_request_alloc(); |
9bc5ddda | 107 | err = -ENOMEM; |
ce1d5a49 | 108 | if (!req) |
9bc5ddda | 109 | goto out; |
334f485d | 110 | |
33649c91 | 111 | fuse_req_init_context(req); |
9bc5ddda | 112 | req->waiting = 1; |
334f485d | 113 | return req; |
9bc5ddda MS |
114 | |
115 | out: | |
116 | atomic_dec(&fc->num_waiting); | |
117 | return ERR_PTR(err); | |
334f485d MS |
118 | } |
119 | ||
33649c91 MS |
120 | /* |
121 | * Return request in fuse_file->reserved_req. However that may | |
122 | * currently be in use. If that is the case, wait for it to become | |
123 | * available. | |
124 | */ | |
125 | static struct fuse_req *get_reserved_req(struct fuse_conn *fc, | |
126 | struct file *file) | |
127 | { | |
128 | struct fuse_req *req = NULL; | |
129 | struct fuse_file *ff = file->private_data; | |
130 | ||
131 | do { | |
de5e3dec | 132 | wait_event(fc->reserved_req_waitq, ff->reserved_req); |
33649c91 MS |
133 | spin_lock(&fc->lock); |
134 | if (ff->reserved_req) { | |
135 | req = ff->reserved_req; | |
136 | ff->reserved_req = NULL; | |
137 | get_file(file); | |
138 | req->stolen_file = file; | |
139 | } | |
140 | spin_unlock(&fc->lock); | |
141 | } while (!req); | |
142 | ||
143 | return req; | |
144 | } | |
145 | ||
146 | /* | |
147 | * Put stolen request back into fuse_file->reserved_req | |
148 | */ | |
149 | static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) | |
150 | { | |
151 | struct file *file = req->stolen_file; | |
152 | struct fuse_file *ff = file->private_data; | |
153 | ||
154 | spin_lock(&fc->lock); | |
155 | fuse_request_init(req); | |
156 | BUG_ON(ff->reserved_req); | |
157 | ff->reserved_req = req; | |
de5e3dec | 158 | wake_up_all(&fc->reserved_req_waitq); |
33649c91 MS |
159 | spin_unlock(&fc->lock); |
160 | fput(file); | |
161 | } | |
162 | ||
163 | /* | |
164 | * Gets a requests for a file operation, always succeeds | |
165 | * | |
166 | * This is used for sending the FLUSH request, which must get to | |
167 | * userspace, due to POSIX locks which may need to be unlocked. | |
168 | * | |
169 | * If allocation fails due to OOM, use the reserved request in | |
170 | * fuse_file. | |
171 | * | |
172 | * This is very unlikely to deadlock accidentally, since the | |
173 | * filesystem should not have it's own file open. If deadlock is | |
174 | * intentional, it can still be broken by "aborting" the filesystem. | |
175 | */ | |
176 | struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) | |
177 | { | |
178 | struct fuse_req *req; | |
179 | ||
180 | atomic_inc(&fc->num_waiting); | |
181 | wait_event(fc->blocked_waitq, !fc->blocked); | |
182 | req = fuse_request_alloc(); | |
183 | if (!req) | |
184 | req = get_reserved_req(fc, file); | |
185 | ||
186 | fuse_req_init_context(req); | |
187 | req->waiting = 1; | |
188 | return req; | |
189 | } | |
190 | ||
334f485d | 191 | void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) |
7128ec2a MS |
192 | { |
193 | if (atomic_dec_and_test(&req->count)) { | |
9bc5ddda MS |
194 | if (req->waiting) |
195 | atomic_dec(&fc->num_waiting); | |
33649c91 MS |
196 | |
197 | if (req->stolen_file) | |
198 | put_reserved_req(fc, req); | |
199 | else | |
200 | fuse_request_free(req); | |
7128ec2a MS |
201 | } |
202 | } | |
203 | ||
334f485d MS |
204 | /* |
205 | * This function is called when a request is finished. Either a reply | |
f9a2842e | 206 | * has arrived or it was aborted (and not yet sent) or some error |
f43b155a | 207 | * occurred during communication with userspace, or the device file |
51eb01e7 MS |
208 | * was closed. The requester thread is woken up (if still waiting), |
209 | * the 'end' callback is called if given, else the reference to the | |
210 | * request is released | |
7128ec2a | 211 | * |
d7133114 | 212 | * Called with fc->lock, unlocks it |
334f485d MS |
213 | */ |
214 | static void request_end(struct fuse_conn *fc, struct fuse_req *req) | |
105f4d7a | 215 | __releases(fc->lock) |
334f485d | 216 | { |
51eb01e7 MS |
217 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; |
218 | req->end = NULL; | |
d77a1d5b | 219 | list_del(&req->list); |
a4d27e75 | 220 | list_del(&req->intr_entry); |
83cfd493 | 221 | req->state = FUSE_REQ_FINISHED; |
51eb01e7 MS |
222 | if (req->background) { |
223 | if (fc->num_background == FUSE_MAX_BACKGROUND) { | |
224 | fc->blocked = 0; | |
225 | wake_up_all(&fc->blocked_waitq); | |
226 | } | |
f92b99b9 MS |
227 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { |
228 | clear_bdi_congested(&fc->bdi, READ); | |
229 | clear_bdi_congested(&fc->bdi, WRITE); | |
230 | } | |
51eb01e7 | 231 | fc->num_background--; |
334f485d | 232 | } |
51eb01e7 | 233 | spin_unlock(&fc->lock); |
51eb01e7 MS |
234 | wake_up(&req->waitq); |
235 | if (end) | |
236 | end(fc, req); | |
237 | else | |
238 | fuse_put_request(fc, req); | |
334f485d MS |
239 | } |
240 | ||
a4d27e75 MS |
241 | static void wait_answer_interruptible(struct fuse_conn *fc, |
242 | struct fuse_req *req) | |
243 | { | |
244 | if (signal_pending(current)) | |
245 | return; | |
246 | ||
247 | spin_unlock(&fc->lock); | |
248 | wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); | |
249 | spin_lock(&fc->lock); | |
250 | } | |
251 | ||
252 | static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) | |
253 | { | |
254 | list_add_tail(&req->intr_entry, &fc->interrupts); | |
255 | wake_up(&fc->waitq); | |
256 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); | |
257 | } | |
258 | ||
d7133114 | 259 | /* Called with fc->lock held. Releases, and then reacquires it. */ |
7c352bdf | 260 | static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) |
334f485d | 261 | { |
a4d27e75 MS |
262 | if (!fc->no_interrupt) { |
263 | /* Any signal may interrupt this */ | |
264 | wait_answer_interruptible(fc, req); | |
334f485d | 265 | |
a4d27e75 MS |
266 | if (req->aborted) |
267 | goto aborted; | |
268 | if (req->state == FUSE_REQ_FINISHED) | |
269 | return; | |
270 | ||
271 | req->interrupted = 1; | |
272 | if (req->state == FUSE_REQ_SENT) | |
273 | queue_interrupt(fc, req); | |
274 | } | |
275 | ||
a131de0a | 276 | if (!req->force) { |
a4d27e75 MS |
277 | sigset_t oldset; |
278 | ||
279 | /* Only fatal signals may interrupt this */ | |
51eb01e7 | 280 | block_sigs(&oldset); |
a4d27e75 | 281 | wait_answer_interruptible(fc, req); |
51eb01e7 | 282 | restore_sigs(&oldset); |
a131de0a MS |
283 | |
284 | if (req->aborted) | |
285 | goto aborted; | |
286 | if (req->state == FUSE_REQ_FINISHED) | |
287 | return; | |
288 | ||
289 | /* Request is not yet in userspace, bail out */ | |
290 | if (req->state == FUSE_REQ_PENDING) { | |
291 | list_del(&req->list); | |
292 | __fuse_put_request(req); | |
293 | req->out.h.error = -EINTR; | |
294 | return; | |
295 | } | |
51eb01e7 | 296 | } |
334f485d | 297 | |
a131de0a MS |
298 | /* |
299 | * Either request is already in userspace, or it was forced. | |
300 | * Wait it out. | |
301 | */ | |
302 | spin_unlock(&fc->lock); | |
303 | wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); | |
304 | spin_lock(&fc->lock); | |
a4d27e75 | 305 | |
a131de0a MS |
306 | if (!req->aborted) |
307 | return; | |
a4d27e75 MS |
308 | |
309 | aborted: | |
a131de0a | 310 | BUG_ON(req->state != FUSE_REQ_FINISHED); |
334f485d MS |
311 | if (req->locked) { |
312 | /* This is uninterruptible sleep, because data is | |
313 | being copied to/from the buffers of req. During | |
314 | locked state, there mustn't be any filesystem | |
315 | operation (e.g. page fault), since that could lead | |
316 | to deadlock */ | |
d7133114 | 317 | spin_unlock(&fc->lock); |
334f485d | 318 | wait_event(req->waitq, !req->locked); |
d7133114 | 319 | spin_lock(&fc->lock); |
334f485d | 320 | } |
334f485d MS |
321 | } |
322 | ||
323 | static unsigned len_args(unsigned numargs, struct fuse_arg *args) | |
324 | { | |
325 | unsigned nbytes = 0; | |
326 | unsigned i; | |
327 | ||
328 | for (i = 0; i < numargs; i++) | |
329 | nbytes += args[i].size; | |
330 | ||
331 | return nbytes; | |
332 | } | |
333 | ||
a4d27e75 MS |
334 | static u64 fuse_get_unique(struct fuse_conn *fc) |
335 | { | |
336 | fc->reqctr++; | |
337 | /* zero is special */ | |
338 | if (fc->reqctr == 0) | |
339 | fc->reqctr = 1; | |
340 | ||
341 | return fc->reqctr; | |
342 | } | |
343 | ||
334f485d MS |
344 | static void queue_request(struct fuse_conn *fc, struct fuse_req *req) |
345 | { | |
a4d27e75 | 346 | req->in.h.unique = fuse_get_unique(fc); |
334f485d MS |
347 | req->in.h.len = sizeof(struct fuse_in_header) + |
348 | len_args(req->in.numargs, (struct fuse_arg *) req->in.args); | |
334f485d | 349 | list_add_tail(&req->list, &fc->pending); |
83cfd493 | 350 | req->state = FUSE_REQ_PENDING; |
9bc5ddda MS |
351 | if (!req->waiting) { |
352 | req->waiting = 1; | |
353 | atomic_inc(&fc->num_waiting); | |
354 | } | |
334f485d | 355 | wake_up(&fc->waitq); |
385a17bf | 356 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
334f485d MS |
357 | } |
358 | ||
7c352bdf | 359 | void request_send(struct fuse_conn *fc, struct fuse_req *req) |
334f485d MS |
360 | { |
361 | req->isreply = 1; | |
d7133114 | 362 | spin_lock(&fc->lock); |
1e9a4ed9 | 363 | if (!fc->connected) |
334f485d MS |
364 | req->out.h.error = -ENOTCONN; |
365 | else if (fc->conn_error) | |
366 | req->out.h.error = -ECONNREFUSED; | |
367 | else { | |
368 | queue_request(fc, req); | |
369 | /* acquire extra reference, since request is still needed | |
370 | after request_end() */ | |
371 | __fuse_get_request(req); | |
372 | ||
7c352bdf | 373 | request_wait_answer(fc, req); |
334f485d | 374 | } |
d7133114 | 375 | spin_unlock(&fc->lock); |
334f485d MS |
376 | } |
377 | ||
334f485d MS |
378 | static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) |
379 | { | |
d7133114 | 380 | spin_lock(&fc->lock); |
1e9a4ed9 | 381 | if (fc->connected) { |
51eb01e7 MS |
382 | req->background = 1; |
383 | fc->num_background++; | |
384 | if (fc->num_background == FUSE_MAX_BACKGROUND) | |
385 | fc->blocked = 1; | |
f92b99b9 MS |
386 | if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { |
387 | set_bdi_congested(&fc->bdi, READ); | |
388 | set_bdi_congested(&fc->bdi, WRITE); | |
389 | } | |
51eb01e7 | 390 | |
334f485d | 391 | queue_request(fc, req); |
d7133114 | 392 | spin_unlock(&fc->lock); |
334f485d MS |
393 | } else { |
394 | req->out.h.error = -ENOTCONN; | |
395 | request_end(fc, req); | |
396 | } | |
397 | } | |
398 | ||
399 | void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req) | |
400 | { | |
401 | req->isreply = 0; | |
402 | request_send_nowait(fc, req); | |
403 | } | |
404 | ||
405 | void request_send_background(struct fuse_conn *fc, struct fuse_req *req) | |
406 | { | |
407 | req->isreply = 1; | |
334f485d MS |
408 | request_send_nowait(fc, req); |
409 | } | |
410 | ||
334f485d MS |
411 | /* |
412 | * Lock the request. Up to the next unlock_request() there mustn't be | |
413 | * anything that could cause a page-fault. If the request was already | |
f9a2842e | 414 | * aborted bail out. |
334f485d | 415 | */ |
d7133114 | 416 | static int lock_request(struct fuse_conn *fc, struct fuse_req *req) |
334f485d MS |
417 | { |
418 | int err = 0; | |
419 | if (req) { | |
d7133114 | 420 | spin_lock(&fc->lock); |
f9a2842e | 421 | if (req->aborted) |
334f485d MS |
422 | err = -ENOENT; |
423 | else | |
424 | req->locked = 1; | |
d7133114 | 425 | spin_unlock(&fc->lock); |
334f485d MS |
426 | } |
427 | return err; | |
428 | } | |
429 | ||
430 | /* | |
f9a2842e | 431 | * Unlock request. If it was aborted during being locked, the |
334f485d MS |
432 | * requester thread is currently waiting for it to be unlocked, so |
433 | * wake it up. | |
434 | */ | |
d7133114 | 435 | static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) |
334f485d MS |
436 | { |
437 | if (req) { | |
d7133114 | 438 | spin_lock(&fc->lock); |
334f485d | 439 | req->locked = 0; |
f9a2842e | 440 | if (req->aborted) |
334f485d | 441 | wake_up(&req->waitq); |
d7133114 | 442 | spin_unlock(&fc->lock); |
334f485d MS |
443 | } |
444 | } | |
445 | ||
446 | struct fuse_copy_state { | |
d7133114 | 447 | struct fuse_conn *fc; |
334f485d MS |
448 | int write; |
449 | struct fuse_req *req; | |
450 | const struct iovec *iov; | |
451 | unsigned long nr_segs; | |
452 | unsigned long seglen; | |
453 | unsigned long addr; | |
454 | struct page *pg; | |
455 | void *mapaddr; | |
456 | void *buf; | |
457 | unsigned len; | |
458 | }; | |
459 | ||
d7133114 MS |
460 | static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc, |
461 | int write, struct fuse_req *req, | |
462 | const struct iovec *iov, unsigned long nr_segs) | |
334f485d MS |
463 | { |
464 | memset(cs, 0, sizeof(*cs)); | |
d7133114 | 465 | cs->fc = fc; |
334f485d MS |
466 | cs->write = write; |
467 | cs->req = req; | |
468 | cs->iov = iov; | |
469 | cs->nr_segs = nr_segs; | |
470 | } | |
471 | ||
472 | /* Unmap and put previous page of userspace buffer */ | |
8bfc016d | 473 | static void fuse_copy_finish(struct fuse_copy_state *cs) |
334f485d MS |
474 | { |
475 | if (cs->mapaddr) { | |
476 | kunmap_atomic(cs->mapaddr, KM_USER0); | |
477 | if (cs->write) { | |
478 | flush_dcache_page(cs->pg); | |
479 | set_page_dirty_lock(cs->pg); | |
480 | } | |
481 | put_page(cs->pg); | |
482 | cs->mapaddr = NULL; | |
483 | } | |
484 | } | |
485 | ||
486 | /* | |
487 | * Get another pagefull of userspace buffer, and map it to kernel | |
488 | * address space, and lock request | |
489 | */ | |
490 | static int fuse_copy_fill(struct fuse_copy_state *cs) | |
491 | { | |
492 | unsigned long offset; | |
493 | int err; | |
494 | ||
d7133114 | 495 | unlock_request(cs->fc, cs->req); |
334f485d MS |
496 | fuse_copy_finish(cs); |
497 | if (!cs->seglen) { | |
498 | BUG_ON(!cs->nr_segs); | |
499 | cs->seglen = cs->iov[0].iov_len; | |
500 | cs->addr = (unsigned long) cs->iov[0].iov_base; | |
501 | cs->iov ++; | |
502 | cs->nr_segs --; | |
503 | } | |
504 | down_read(¤t->mm->mmap_sem); | |
505 | err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, | |
506 | &cs->pg, NULL); | |
507 | up_read(¤t->mm->mmap_sem); | |
508 | if (err < 0) | |
509 | return err; | |
510 | BUG_ON(err != 1); | |
511 | offset = cs->addr % PAGE_SIZE; | |
512 | cs->mapaddr = kmap_atomic(cs->pg, KM_USER0); | |
513 | cs->buf = cs->mapaddr + offset; | |
514 | cs->len = min(PAGE_SIZE - offset, cs->seglen); | |
515 | cs->seglen -= cs->len; | |
516 | cs->addr += cs->len; | |
517 | ||
d7133114 | 518 | return lock_request(cs->fc, cs->req); |
334f485d MS |
519 | } |
520 | ||
521 | /* Do as much copy to/from userspace buffer as we can */ | |
8bfc016d | 522 | static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) |
334f485d MS |
523 | { |
524 | unsigned ncpy = min(*size, cs->len); | |
525 | if (val) { | |
526 | if (cs->write) | |
527 | memcpy(cs->buf, *val, ncpy); | |
528 | else | |
529 | memcpy(*val, cs->buf, ncpy); | |
530 | *val += ncpy; | |
531 | } | |
532 | *size -= ncpy; | |
533 | cs->len -= ncpy; | |
534 | cs->buf += ncpy; | |
535 | return ncpy; | |
536 | } | |
537 | ||
538 | /* | |
539 | * Copy a page in the request to/from the userspace buffer. Must be | |
540 | * done atomically | |
541 | */ | |
8bfc016d MS |
542 | static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, |
543 | unsigned offset, unsigned count, int zeroing) | |
334f485d MS |
544 | { |
545 | if (page && zeroing && count < PAGE_SIZE) { | |
546 | void *mapaddr = kmap_atomic(page, KM_USER1); | |
547 | memset(mapaddr, 0, PAGE_SIZE); | |
548 | kunmap_atomic(mapaddr, KM_USER1); | |
549 | } | |
550 | while (count) { | |
551 | int err; | |
552 | if (!cs->len && (err = fuse_copy_fill(cs))) | |
553 | return err; | |
554 | if (page) { | |
555 | void *mapaddr = kmap_atomic(page, KM_USER1); | |
556 | void *buf = mapaddr + offset; | |
557 | offset += fuse_copy_do(cs, &buf, &count); | |
558 | kunmap_atomic(mapaddr, KM_USER1); | |
559 | } else | |
560 | offset += fuse_copy_do(cs, NULL, &count); | |
561 | } | |
562 | if (page && !cs->write) | |
563 | flush_dcache_page(page); | |
564 | return 0; | |
565 | } | |
566 | ||
567 | /* Copy pages in the request to/from userspace buffer */ | |
568 | static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes, | |
569 | int zeroing) | |
570 | { | |
571 | unsigned i; | |
572 | struct fuse_req *req = cs->req; | |
573 | unsigned offset = req->page_offset; | |
574 | unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset); | |
575 | ||
576 | for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) { | |
577 | struct page *page = req->pages[i]; | |
578 | int err = fuse_copy_page(cs, page, offset, count, zeroing); | |
579 | if (err) | |
580 | return err; | |
581 | ||
582 | nbytes -= count; | |
583 | count = min(nbytes, (unsigned) PAGE_SIZE); | |
584 | offset = 0; | |
585 | } | |
586 | return 0; | |
587 | } | |
588 | ||
589 | /* Copy a single argument in the request to/from userspace buffer */ | |
590 | static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) | |
591 | { | |
592 | while (size) { | |
593 | int err; | |
594 | if (!cs->len && (err = fuse_copy_fill(cs))) | |
595 | return err; | |
596 | fuse_copy_do(cs, &val, &size); | |
597 | } | |
598 | return 0; | |
599 | } | |
600 | ||
601 | /* Copy request arguments to/from userspace buffer */ | |
602 | static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, | |
603 | unsigned argpages, struct fuse_arg *args, | |
604 | int zeroing) | |
605 | { | |
606 | int err = 0; | |
607 | unsigned i; | |
608 | ||
609 | for (i = 0; !err && i < numargs; i++) { | |
610 | struct fuse_arg *arg = &args[i]; | |
611 | if (i == numargs - 1 && argpages) | |
612 | err = fuse_copy_pages(cs, arg->size, zeroing); | |
613 | else | |
614 | err = fuse_copy_one(cs, arg->value, arg->size); | |
615 | } | |
616 | return err; | |
617 | } | |
618 | ||
a4d27e75 MS |
619 | static int request_pending(struct fuse_conn *fc) |
620 | { | |
621 | return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); | |
622 | } | |
623 | ||
334f485d MS |
624 | /* Wait until a request is available on the pending list */ |
625 | static void request_wait(struct fuse_conn *fc) | |
626 | { | |
627 | DECLARE_WAITQUEUE(wait, current); | |
628 | ||
629 | add_wait_queue_exclusive(&fc->waitq, &wait); | |
a4d27e75 | 630 | while (fc->connected && !request_pending(fc)) { |
334f485d MS |
631 | set_current_state(TASK_INTERRUPTIBLE); |
632 | if (signal_pending(current)) | |
633 | break; | |
634 | ||
d7133114 | 635 | spin_unlock(&fc->lock); |
334f485d | 636 | schedule(); |
d7133114 | 637 | spin_lock(&fc->lock); |
334f485d MS |
638 | } |
639 | set_current_state(TASK_RUNNING); | |
640 | remove_wait_queue(&fc->waitq, &wait); | |
641 | } | |
642 | ||
a4d27e75 MS |
643 | /* |
644 | * Transfer an interrupt request to userspace | |
645 | * | |
646 | * Unlike other requests this is assembled on demand, without a need | |
647 | * to allocate a separate fuse_req structure. | |
648 | * | |
649 | * Called with fc->lock held, releases it | |
650 | */ | |
651 | static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, | |
652 | const struct iovec *iov, unsigned long nr_segs) | |
105f4d7a | 653 | __releases(fc->lock) |
a4d27e75 MS |
654 | { |
655 | struct fuse_copy_state cs; | |
656 | struct fuse_in_header ih; | |
657 | struct fuse_interrupt_in arg; | |
658 | unsigned reqsize = sizeof(ih) + sizeof(arg); | |
659 | int err; | |
660 | ||
661 | list_del_init(&req->intr_entry); | |
662 | req->intr_unique = fuse_get_unique(fc); | |
663 | memset(&ih, 0, sizeof(ih)); | |
664 | memset(&arg, 0, sizeof(arg)); | |
665 | ih.len = reqsize; | |
666 | ih.opcode = FUSE_INTERRUPT; | |
667 | ih.unique = req->intr_unique; | |
668 | arg.unique = req->in.h.unique; | |
669 | ||
670 | spin_unlock(&fc->lock); | |
671 | if (iov_length(iov, nr_segs) < reqsize) | |
672 | return -EINVAL; | |
673 | ||
674 | fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); | |
675 | err = fuse_copy_one(&cs, &ih, sizeof(ih)); | |
676 | if (!err) | |
677 | err = fuse_copy_one(&cs, &arg, sizeof(arg)); | |
678 | fuse_copy_finish(&cs); | |
679 | ||
680 | return err ? err : reqsize; | |
681 | } | |
682 | ||
334f485d MS |
683 | /* |
684 | * Read a single request into the userspace filesystem's buffer. This | |
685 | * function waits until a request is available, then removes it from | |
686 | * the pending list and copies request data to userspace buffer. If | |
f9a2842e MS |
687 | * no reply is needed (FORGET) or request has been aborted or there |
688 | * was an error during the copying then it's finished by calling | |
334f485d MS |
689 | * request_end(). Otherwise add it to the processing list, and set |
690 | * the 'sent' flag. | |
691 | */ | |
ee0b3e67 BP |
692 | static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, |
693 | unsigned long nr_segs, loff_t pos) | |
334f485d MS |
694 | { |
695 | int err; | |
334f485d MS |
696 | struct fuse_req *req; |
697 | struct fuse_in *in; | |
698 | struct fuse_copy_state cs; | |
699 | unsigned reqsize; | |
ee0b3e67 | 700 | struct file *file = iocb->ki_filp; |
0720b315 MS |
701 | struct fuse_conn *fc = fuse_get_conn(file); |
702 | if (!fc) | |
703 | return -EPERM; | |
334f485d | 704 | |
1d3d752b | 705 | restart: |
d7133114 | 706 | spin_lock(&fc->lock); |
e5ac1d1e JD |
707 | err = -EAGAIN; |
708 | if ((file->f_flags & O_NONBLOCK) && fc->connected && | |
a4d27e75 | 709 | !request_pending(fc)) |
e5ac1d1e JD |
710 | goto err_unlock; |
711 | ||
334f485d MS |
712 | request_wait(fc); |
713 | err = -ENODEV; | |
9ba7cbba | 714 | if (!fc->connected) |
334f485d MS |
715 | goto err_unlock; |
716 | err = -ERESTARTSYS; | |
a4d27e75 | 717 | if (!request_pending(fc)) |
334f485d MS |
718 | goto err_unlock; |
719 | ||
a4d27e75 MS |
720 | if (!list_empty(&fc->interrupts)) { |
721 | req = list_entry(fc->interrupts.next, struct fuse_req, | |
722 | intr_entry); | |
723 | return fuse_read_interrupt(fc, req, iov, nr_segs); | |
724 | } | |
725 | ||
334f485d | 726 | req = list_entry(fc->pending.next, struct fuse_req, list); |
83cfd493 | 727 | req->state = FUSE_REQ_READING; |
d77a1d5b | 728 | list_move(&req->list, &fc->io); |
334f485d MS |
729 | |
730 | in = &req->in; | |
1d3d752b MS |
731 | reqsize = in->h.len; |
732 | /* If request is too large, reply with an error and restart the read */ | |
733 | if (iov_length(iov, nr_segs) < reqsize) { | |
734 | req->out.h.error = -EIO; | |
735 | /* SETXATTR is special, since it may contain too large data */ | |
736 | if (in->h.opcode == FUSE_SETXATTR) | |
737 | req->out.h.error = -E2BIG; | |
738 | request_end(fc, req); | |
739 | goto restart; | |
334f485d | 740 | } |
d7133114 MS |
741 | spin_unlock(&fc->lock); |
742 | fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); | |
1d3d752b MS |
743 | err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); |
744 | if (!err) | |
745 | err = fuse_copy_args(&cs, in->numargs, in->argpages, | |
746 | (struct fuse_arg *) in->args, 0); | |
334f485d | 747 | fuse_copy_finish(&cs); |
d7133114 | 748 | spin_lock(&fc->lock); |
334f485d | 749 | req->locked = 0; |
c9c9d7df MS |
750 | if (req->aborted) { |
751 | request_end(fc, req); | |
752 | return -ENODEV; | |
753 | } | |
334f485d | 754 | if (err) { |
c9c9d7df | 755 | req->out.h.error = -EIO; |
334f485d MS |
756 | request_end(fc, req); |
757 | return err; | |
758 | } | |
759 | if (!req->isreply) | |
760 | request_end(fc, req); | |
761 | else { | |
83cfd493 | 762 | req->state = FUSE_REQ_SENT; |
d77a1d5b | 763 | list_move_tail(&req->list, &fc->processing); |
a4d27e75 MS |
764 | if (req->interrupted) |
765 | queue_interrupt(fc, req); | |
d7133114 | 766 | spin_unlock(&fc->lock); |
334f485d MS |
767 | } |
768 | return reqsize; | |
769 | ||
770 | err_unlock: | |
d7133114 | 771 | spin_unlock(&fc->lock); |
334f485d MS |
772 | return err; |
773 | } | |
774 | ||
334f485d MS |
775 | /* Look up request on processing list by unique ID */ |
776 | static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) | |
777 | { | |
778 | struct list_head *entry; | |
779 | ||
780 | list_for_each(entry, &fc->processing) { | |
781 | struct fuse_req *req; | |
782 | req = list_entry(entry, struct fuse_req, list); | |
a4d27e75 | 783 | if (req->in.h.unique == unique || req->intr_unique == unique) |
334f485d MS |
784 | return req; |
785 | } | |
786 | return NULL; | |
787 | } | |
788 | ||
789 | static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out, | |
790 | unsigned nbytes) | |
791 | { | |
792 | unsigned reqsize = sizeof(struct fuse_out_header); | |
793 | ||
794 | if (out->h.error) | |
795 | return nbytes != reqsize ? -EINVAL : 0; | |
796 | ||
797 | reqsize += len_args(out->numargs, out->args); | |
798 | ||
799 | if (reqsize < nbytes || (reqsize > nbytes && !out->argvar)) | |
800 | return -EINVAL; | |
801 | else if (reqsize > nbytes) { | |
802 | struct fuse_arg *lastarg = &out->args[out->numargs-1]; | |
803 | unsigned diffsize = reqsize - nbytes; | |
804 | if (diffsize > lastarg->size) | |
805 | return -EINVAL; | |
806 | lastarg->size -= diffsize; | |
807 | } | |
808 | return fuse_copy_args(cs, out->numargs, out->argpages, out->args, | |
809 | out->page_zeroing); | |
810 | } | |
811 | ||
812 | /* | |
813 | * Write a single reply to a request. First the header is copied from | |
814 | * the write buffer. The request is then searched on the processing | |
815 | * list by the unique ID found in the header. If found, then remove | |
816 | * it from the list and copy the rest of the buffer to the request. | |
817 | * The request is finished by calling request_end() | |
818 | */ | |
ee0b3e67 BP |
819 | static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, |
820 | unsigned long nr_segs, loff_t pos) | |
334f485d MS |
821 | { |
822 | int err; | |
823 | unsigned nbytes = iov_length(iov, nr_segs); | |
824 | struct fuse_req *req; | |
825 | struct fuse_out_header oh; | |
826 | struct fuse_copy_state cs; | |
ee0b3e67 | 827 | struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp); |
334f485d | 828 | if (!fc) |
a87046d8 | 829 | return -EPERM; |
334f485d | 830 | |
d7133114 | 831 | fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs); |
334f485d MS |
832 | if (nbytes < sizeof(struct fuse_out_header)) |
833 | return -EINVAL; | |
834 | ||
835 | err = fuse_copy_one(&cs, &oh, sizeof(oh)); | |
836 | if (err) | |
837 | goto err_finish; | |
838 | err = -EINVAL; | |
839 | if (!oh.unique || oh.error <= -1000 || oh.error > 0 || | |
840 | oh.len != nbytes) | |
841 | goto err_finish; | |
842 | ||
d7133114 | 843 | spin_lock(&fc->lock); |
69a53bf2 MS |
844 | err = -ENOENT; |
845 | if (!fc->connected) | |
846 | goto err_unlock; | |
847 | ||
334f485d | 848 | req = request_find(fc, oh.unique); |
334f485d MS |
849 | if (!req) |
850 | goto err_unlock; | |
851 | ||
f9a2842e | 852 | if (req->aborted) { |
d7133114 | 853 | spin_unlock(&fc->lock); |
334f485d | 854 | fuse_copy_finish(&cs); |
d7133114 | 855 | spin_lock(&fc->lock); |
222f1d69 | 856 | request_end(fc, req); |
334f485d MS |
857 | return -ENOENT; |
858 | } | |
a4d27e75 MS |
859 | /* Is it an interrupt reply? */ |
860 | if (req->intr_unique == oh.unique) { | |
861 | err = -EINVAL; | |
862 | if (nbytes != sizeof(struct fuse_out_header)) | |
863 | goto err_unlock; | |
864 | ||
865 | if (oh.error == -ENOSYS) | |
866 | fc->no_interrupt = 1; | |
867 | else if (oh.error == -EAGAIN) | |
868 | queue_interrupt(fc, req); | |
869 | ||
870 | spin_unlock(&fc->lock); | |
871 | fuse_copy_finish(&cs); | |
872 | return nbytes; | |
873 | } | |
874 | ||
875 | req->state = FUSE_REQ_WRITING; | |
d77a1d5b | 876 | list_move(&req->list, &fc->io); |
334f485d MS |
877 | req->out.h = oh; |
878 | req->locked = 1; | |
879 | cs.req = req; | |
d7133114 | 880 | spin_unlock(&fc->lock); |
334f485d MS |
881 | |
882 | err = copy_out_args(&cs, &req->out, nbytes); | |
883 | fuse_copy_finish(&cs); | |
884 | ||
d7133114 | 885 | spin_lock(&fc->lock); |
334f485d MS |
886 | req->locked = 0; |
887 | if (!err) { | |
f9a2842e | 888 | if (req->aborted) |
334f485d | 889 | err = -ENOENT; |
f9a2842e | 890 | } else if (!req->aborted) |
334f485d MS |
891 | req->out.h.error = -EIO; |
892 | request_end(fc, req); | |
893 | ||
894 | return err ? err : nbytes; | |
895 | ||
896 | err_unlock: | |
d7133114 | 897 | spin_unlock(&fc->lock); |
334f485d MS |
898 | err_finish: |
899 | fuse_copy_finish(&cs); | |
900 | return err; | |
901 | } | |
902 | ||
334f485d MS |
903 | static unsigned fuse_dev_poll(struct file *file, poll_table *wait) |
904 | { | |
334f485d | 905 | unsigned mask = POLLOUT | POLLWRNORM; |
7025d9ad | 906 | struct fuse_conn *fc = fuse_get_conn(file); |
334f485d | 907 | if (!fc) |
7025d9ad | 908 | return POLLERR; |
334f485d MS |
909 | |
910 | poll_wait(file, &fc->waitq, wait); | |
911 | ||
d7133114 | 912 | spin_lock(&fc->lock); |
7025d9ad MS |
913 | if (!fc->connected) |
914 | mask = POLLERR; | |
a4d27e75 | 915 | else if (request_pending(fc)) |
7025d9ad | 916 | mask |= POLLIN | POLLRDNORM; |
d7133114 | 917 | spin_unlock(&fc->lock); |
334f485d MS |
918 | |
919 | return mask; | |
920 | } | |
921 | ||
69a53bf2 MS |
922 | /* |
923 | * Abort all requests on the given list (pending or processing) | |
924 | * | |
d7133114 | 925 | * This function releases and reacquires fc->lock |
69a53bf2 | 926 | */ |
334f485d MS |
927 | static void end_requests(struct fuse_conn *fc, struct list_head *head) |
928 | { | |
929 | while (!list_empty(head)) { | |
930 | struct fuse_req *req; | |
931 | req = list_entry(head->next, struct fuse_req, list); | |
334f485d MS |
932 | req->out.h.error = -ECONNABORTED; |
933 | request_end(fc, req); | |
d7133114 | 934 | spin_lock(&fc->lock); |
334f485d MS |
935 | } |
936 | } | |
937 | ||
69a53bf2 MS |
938 | /* |
939 | * Abort requests under I/O | |
940 | * | |
f9a2842e | 941 | * The requests are set to aborted and finished, and the request |
69a53bf2 MS |
942 | * waiter is woken up. This will make request_wait_answer() wait |
943 | * until the request is unlocked and then return. | |
64c6d8ed MS |
944 | * |
945 | * If the request is asynchronous, then the end function needs to be | |
946 | * called after waiting for the request to be unlocked (if it was | |
947 | * locked). | |
69a53bf2 MS |
948 | */ |
949 | static void end_io_requests(struct fuse_conn *fc) | |
950 | { | |
951 | while (!list_empty(&fc->io)) { | |
64c6d8ed MS |
952 | struct fuse_req *req = |
953 | list_entry(fc->io.next, struct fuse_req, list); | |
954 | void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; | |
955 | ||
f9a2842e | 956 | req->aborted = 1; |
69a53bf2 MS |
957 | req->out.h.error = -ECONNABORTED; |
958 | req->state = FUSE_REQ_FINISHED; | |
959 | list_del_init(&req->list); | |
960 | wake_up(&req->waitq); | |
64c6d8ed MS |
961 | if (end) { |
962 | req->end = NULL; | |
963 | /* The end function will consume this reference */ | |
964 | __fuse_get_request(req); | |
d7133114 | 965 | spin_unlock(&fc->lock); |
64c6d8ed MS |
966 | wait_event(req->waitq, !req->locked); |
967 | end(fc, req); | |
d7133114 | 968 | spin_lock(&fc->lock); |
64c6d8ed | 969 | } |
69a53bf2 MS |
970 | } |
971 | } | |
972 | ||
973 | /* | |
974 | * Abort all requests. | |
975 | * | |
976 | * Emergency exit in case of a malicious or accidental deadlock, or | |
977 | * just a hung filesystem. | |
978 | * | |
979 | * The same effect is usually achievable through killing the | |
980 | * filesystem daemon and all users of the filesystem. The exception | |
981 | * is the combination of an asynchronous request and the tricky | |
982 | * deadlock (see Documentation/filesystems/fuse.txt). | |
983 | * | |
984 | * During the aborting, progression of requests from the pending and | |
985 | * processing lists onto the io list, and progression of new requests | |
986 | * onto the pending list is prevented by req->connected being false. | |
987 | * | |
988 | * Progression of requests under I/O to the processing list is | |
f9a2842e MS |
989 | * prevented by the req->aborted flag being true for these requests. |
990 | * For this reason requests on the io list must be aborted first. | |
69a53bf2 MS |
991 | */ |
992 | void fuse_abort_conn(struct fuse_conn *fc) | |
993 | { | |
d7133114 | 994 | spin_lock(&fc->lock); |
69a53bf2 MS |
995 | if (fc->connected) { |
996 | fc->connected = 0; | |
51eb01e7 | 997 | fc->blocked = 0; |
69a53bf2 MS |
998 | end_io_requests(fc); |
999 | end_requests(fc, &fc->pending); | |
1000 | end_requests(fc, &fc->processing); | |
1001 | wake_up_all(&fc->waitq); | |
51eb01e7 | 1002 | wake_up_all(&fc->blocked_waitq); |
385a17bf | 1003 | kill_fasync(&fc->fasync, SIGIO, POLL_IN); |
69a53bf2 | 1004 | } |
d7133114 | 1005 | spin_unlock(&fc->lock); |
69a53bf2 MS |
1006 | } |
1007 | ||
334f485d MS |
1008 | static int fuse_dev_release(struct inode *inode, struct file *file) |
1009 | { | |
0720b315 | 1010 | struct fuse_conn *fc = fuse_get_conn(file); |
334f485d | 1011 | if (fc) { |
d7133114 | 1012 | spin_lock(&fc->lock); |
1e9a4ed9 | 1013 | fc->connected = 0; |
334f485d MS |
1014 | end_requests(fc, &fc->pending); |
1015 | end_requests(fc, &fc->processing); | |
d7133114 | 1016 | spin_unlock(&fc->lock); |
385a17bf | 1017 | fasync_helper(-1, file, 0, &fc->fasync); |
bafa9654 | 1018 | fuse_conn_put(fc); |
385a17bf | 1019 | } |
f543f253 | 1020 | |
334f485d MS |
1021 | return 0; |
1022 | } | |
1023 | ||
385a17bf JD |
1024 | static int fuse_dev_fasync(int fd, struct file *file, int on) |
1025 | { | |
1026 | struct fuse_conn *fc = fuse_get_conn(file); | |
1027 | if (!fc) | |
a87046d8 | 1028 | return -EPERM; |
385a17bf JD |
1029 | |
1030 | /* No locking - fasync_helper does its own locking */ | |
1031 | return fasync_helper(fd, file, on, &fc->fasync); | |
1032 | } | |
1033 | ||
4b6f5d20 | 1034 | const struct file_operations fuse_dev_operations = { |
334f485d MS |
1035 | .owner = THIS_MODULE, |
1036 | .llseek = no_llseek, | |
ee0b3e67 BP |
1037 | .read = do_sync_read, |
1038 | .aio_read = fuse_dev_read, | |
1039 | .write = do_sync_write, | |
1040 | .aio_write = fuse_dev_write, | |
334f485d MS |
1041 | .poll = fuse_dev_poll, |
1042 | .release = fuse_dev_release, | |
385a17bf | 1043 | .fasync = fuse_dev_fasync, |
334f485d MS |
1044 | }; |
1045 | ||
1046 | static struct miscdevice fuse_miscdevice = { | |
1047 | .minor = FUSE_MINOR, | |
1048 | .name = "fuse", | |
1049 | .fops = &fuse_dev_operations, | |
1050 | }; | |
1051 | ||
1052 | int __init fuse_dev_init(void) | |
1053 | { | |
1054 | int err = -ENOMEM; | |
1055 | fuse_req_cachep = kmem_cache_create("fuse_request", | |
1056 | sizeof(struct fuse_req), | |
20c2df83 | 1057 | 0, 0, NULL); |
334f485d MS |
1058 | if (!fuse_req_cachep) |
1059 | goto out; | |
1060 | ||
1061 | err = misc_register(&fuse_miscdevice); | |
1062 | if (err) | |
1063 | goto out_cache_clean; | |
1064 | ||
1065 | return 0; | |
1066 | ||
1067 | out_cache_clean: | |
1068 | kmem_cache_destroy(fuse_req_cachep); | |
1069 | out: | |
1070 | return err; | |
1071 | } | |
1072 | ||
1073 | void fuse_dev_cleanup(void) | |
1074 | { | |
1075 | misc_deregister(&fuse_miscdevice); | |
1076 | kmem_cache_destroy(fuse_req_cachep); | |
1077 | } |