knfsd: clean up nfsd filesystem interfaces
[linux-2.6-block.git] / fs / nfsd / nfssvc.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/nfsd/nfssvc.c
3 *
4 * Central processing for nfsd.
5 *
6 * Authors: Olaf Kirch (okir@monad.swb.de)
7 *
8 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
9 */
10
1da177e4 11#include <linux/module.h>
e8edc6e0 12#include <linux/sched.h>
1da177e4
LT
13#include <linux/time.h>
14#include <linux/errno.h>
15#include <linux/nfs.h>
16#include <linux/in.h>
17#include <linux/uio.h>
18#include <linux/unistd.h>
19#include <linux/slab.h>
20#include <linux/smp.h>
21#include <linux/smp_lock.h>
83144186 22#include <linux/freezer.h>
1da177e4
LT
23#include <linux/fs_struct.h>
24
25#include <linux/sunrpc/types.h>
26#include <linux/sunrpc/stats.h>
27#include <linux/sunrpc/svc.h>
28#include <linux/sunrpc/svcsock.h>
29#include <linux/sunrpc/cache.h>
30#include <linux/nfsd/nfsd.h>
31#include <linux/nfsd/stats.h>
32#include <linux/nfsd/cache.h>
70c3b76c 33#include <linux/nfsd/syscall.h>
1da177e4 34#include <linux/lockd/bind.h>
a257cdd0 35#include <linux/nfsacl.h>
1da177e4
LT
36
37#define NFSDDBG_FACILITY NFSDDBG_SVC
38
39/* these signals will be delivered to an nfsd thread
40 * when handling a request
41 */
42#define ALLOWED_SIGS (sigmask(SIGKILL))
43/* these signals will be delivered to an nfsd thread
44 * when not handling a request. i.e. when waiting
45 */
46#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
47/* if the last thread dies with SIGHUP, then the exports table is
48 * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
49 * the exports table (like 2.2).
50 */
51#define SIG_NOCLEAN SIGHUP
52
53extern struct svc_program nfsd_program;
54static void nfsd(struct svc_rqst *rqstp);
55struct timeval nfssvc_boot;
1da177e4
LT
56static atomic_t nfsd_busy;
57static unsigned long nfsd_last_call;
58static DEFINE_SPINLOCK(nfsd_call_lock);
59
bedbdd8b
NB
60/*
61 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
62 * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
63 * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
64 *
65 * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
66 * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
67 * of nfsd threads must exist and each must listed in ->sp_all_threads in each
68 * entry of ->sv_pools[].
69 *
70 * Transitions of the thread count between zero and non-zero are of particular
71 * interest since the svc_serv needs to be created and initialized at that
72 * point, or freed.
3dd98a3b
JL
73 *
74 * Finally, the nfsd_mutex also protects some of the global variables that are
75 * accessed when nfsd starts and that are settable via the write_* routines in
76 * nfsctl.c. In particular:
77 *
78 * user_recovery_dirname
79 * user_lease_time
80 * nfsd_versions
bedbdd8b
NB
81 */
82DEFINE_MUTEX(nfsd_mutex);
83struct svc_serv *nfsd_serv;
84
3fb803a9
AG
85#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
86static struct svc_stat nfsd_acl_svcstats;
87static struct svc_version * nfsd_acl_version[] = {
88 [2] = &nfsd_acl_version2,
89 [3] = &nfsd_acl_version3,
90};
91
92#define NFSD_ACL_MINVERS 2
e8c96f8c 93#define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version)
3fb803a9
AG
94static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
95
96static struct svc_program nfsd_acl_program = {
97 .pg_prog = NFS_ACL_PROGRAM,
98 .pg_nvers = NFSD_ACL_NRVERS,
99 .pg_vers = nfsd_acl_versions,
1a8eff6d 100 .pg_name = "nfsacl",
3fb803a9
AG
101 .pg_class = "nfsd",
102 .pg_stats = &nfsd_acl_svcstats,
103 .pg_authenticate = &svc_set_client,
104};
105
106static struct svc_stat nfsd_acl_svcstats = {
107 .program = &nfsd_acl_program,
108};
109#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
110
70c3b76c
N
111static struct svc_version * nfsd_version[] = {
112 [2] = &nfsd_version2,
113#if defined(CONFIG_NFSD_V3)
114 [3] = &nfsd_version3,
115#endif
116#if defined(CONFIG_NFSD_V4)
117 [4] = &nfsd_version4,
118#endif
119};
120
121#define NFSD_MINVERS 2
e8c96f8c 122#define NFSD_NRVERS ARRAY_SIZE(nfsd_version)
70c3b76c
N
123static struct svc_version *nfsd_versions[NFSD_NRVERS];
124
125struct svc_program nfsd_program = {
3fb803a9
AG
126#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
127 .pg_next = &nfsd_acl_program,
128#endif
70c3b76c
N
129 .pg_prog = NFS_PROGRAM, /* program number */
130 .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
131 .pg_vers = nfsd_versions, /* version table */
132 .pg_name = "nfsd", /* program name */
133 .pg_class = "nfsd", /* authentication class */
134 .pg_stats = &nfsd_svcstats, /* version table */
135 .pg_authenticate = &svc_set_client, /* export authentication */
136
137};
138
6658d3a7
N
139int nfsd_vers(int vers, enum vers_op change)
140{
141 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
142 return -1;
143 switch(change) {
144 case NFSD_SET:
145 nfsd_versions[vers] = nfsd_version[vers];
6658d3a7
N
146#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
147 if (vers < NFSD_ACL_NRVERS)
1a8eff6d 148 nfsd_acl_versions[vers] = nfsd_acl_version[vers];
6658d3a7 149#endif
1a8eff6d 150 break;
6658d3a7
N
151 case NFSD_CLEAR:
152 nfsd_versions[vers] = NULL;
153#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
154 if (vers < NFSD_ACL_NRVERS)
1a8eff6d 155 nfsd_acl_versions[vers] = NULL;
6658d3a7
N
156#endif
157 break;
158 case NFSD_TEST:
159 return nfsd_versions[vers] != NULL;
160 case NFSD_AVAIL:
161 return nfsd_version[vers] != NULL;
162 }
163 return 0;
164}
1da177e4
LT
165/*
166 * Maximum number of nfsd processes
167 */
168#define NFSD_MAXSERVS 8192
169
170int nfsd_nrthreads(void)
171{
172 if (nfsd_serv == NULL)
173 return 0;
174 else
175 return nfsd_serv->sv_nrthreads;
176}
177
bc591ccf
N
178static int killsig; /* signal that was used to kill last nfsd */
179static void nfsd_last_thread(struct svc_serv *serv)
180{
181 /* When last nfsd thread exits we need to do some clean-up */
7a182083
TT
182 struct svc_xprt *xprt;
183 list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
24e36663 184 lockd_down();
bc591ccf
N
185 nfsd_serv = NULL;
186 nfsd_racache_shutdown();
187 nfs4_state_shutdown();
188
189 printk(KERN_WARNING "nfsd: last server has exited\n");
190 if (killsig != SIG_NOCLEAN) {
191 printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
192 nfsd_export_flush();
193 }
194}
6658d3a7
N
195
196void nfsd_reset_versions(void)
197{
198 int found_one = 0;
199 int i;
200
201 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
202 if (nfsd_program.pg_vers[i])
203 found_one = 1;
204 }
205
206 if (!found_one) {
207 for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
208 nfsd_program.pg_vers[i] = nfsd_version[i];
209#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
210 for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
211 nfsd_acl_program.pg_vers[i] =
212 nfsd_acl_version[i];
213#endif
214 }
215}
216
bedbdd8b 217
b41b66d6 218int nfsd_create_serv(void)
02a375f0
N
219{
220 int err = 0;
bedbdd8b
NB
221
222 WARN_ON(!mutex_is_locked(&nfsd_mutex));
02a375f0 223 if (nfsd_serv) {
9a24ab57 224 svc_get(nfsd_serv);
02a375f0
N
225 return 0;
226 }
596bbe53
N
227 if (nfsd_max_blksize == 0) {
228 /* choose a suitable default */
229 struct sysinfo i;
230 si_meminfo(&i);
231 /* Aim for 1/4096 of memory per thread
232 * This gives 1MB on 4Gig machines
233 * But only uses 32K on 128M machines.
234 * Bottom out at 8K on 32M and smaller.
235 * Of course, this is only a default.
236 */
237 nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
44c55600 238 i.totalram <<= PAGE_SHIFT - 12;
596bbe53
N
239 while (nfsd_max_blksize > i.totalram &&
240 nfsd_max_blksize >= 8*1024*2)
241 nfsd_max_blksize /= 2;
242 }
02a375f0
N
243
244 atomic_set(&nfsd_busy, 0);
596bbe53 245 nfsd_serv = svc_create_pooled(&nfsd_program,
c6b0a9f8 246 nfsd_max_blksize,
eec09661
GB
247 nfsd_last_thread,
248 nfsd, SIG_NOCLEAN, THIS_MODULE);
02a375f0
N
249 if (nfsd_serv == NULL)
250 err = -ENOMEM;
bedbdd8b 251
02a375f0
N
252 do_gettimeofday(&nfssvc_boot); /* record boot time */
253 return err;
254}
255
256static int nfsd_init_socks(int port)
257{
258 int error;
259 if (!list_empty(&nfsd_serv->sv_permsocks))
260 return 0;
261
02a375f0 262 error = lockd_up(IPPROTO_UDP);
4a3ae42d 263 if (error >= 0) {
d7c9f1ed 264 error = svc_create_xprt(nfsd_serv, "udp", port,
482fb94e 265 SVC_SOCK_DEFAULTS);
4a3ae42d
N
266 if (error < 0)
267 lockd_down();
268 }
02a375f0
N
269 if (error < 0)
270 return error;
271
02a375f0 272 error = lockd_up(IPPROTO_TCP);
4a3ae42d 273 if (error >= 0) {
d7c9f1ed 274 error = svc_create_xprt(nfsd_serv, "tcp", port,
482fb94e 275 SVC_SOCK_DEFAULTS);
4a3ae42d
N
276 if (error < 0)
277 lockd_down();
278 }
02a375f0
N
279 if (error < 0)
280 return error;
02a375f0
N
281 return 0;
282}
283
eed2965a
GB
284int nfsd_nrpools(void)
285{
286 if (nfsd_serv == NULL)
287 return 0;
288 else
289 return nfsd_serv->sv_nrpools;
290}
291
292int nfsd_get_nrthreads(int n, int *nthreads)
293{
294 int i = 0;
295
296 if (nfsd_serv != NULL) {
297 for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
298 nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
299 }
300
301 return 0;
302}
303
304int nfsd_set_nrthreads(int n, int *nthreads)
305{
306 int i = 0;
307 int tot = 0;
308 int err = 0;
309
bedbdd8b
NB
310 WARN_ON(!mutex_is_locked(&nfsd_mutex));
311
eed2965a
GB
312 if (nfsd_serv == NULL || n <= 0)
313 return 0;
314
315 if (n > nfsd_serv->sv_nrpools)
316 n = nfsd_serv->sv_nrpools;
317
318 /* enforce a global maximum number of threads */
319 tot = 0;
320 for (i = 0; i < n; i++) {
321 if (nthreads[i] > NFSD_MAXSERVS)
322 nthreads[i] = NFSD_MAXSERVS;
323 tot += nthreads[i];
324 }
325 if (tot > NFSD_MAXSERVS) {
326 /* total too large: scale down requested numbers */
327 for (i = 0; i < n && tot > 0; i++) {
328 int new = nthreads[i] * NFSD_MAXSERVS / tot;
329 tot -= (nthreads[i] - new);
330 nthreads[i] = new;
331 }
332 for (i = 0; i < n && tot > 0; i++) {
333 nthreads[i]--;
334 tot--;
335 }
336 }
337
338 /*
339 * There must always be a thread in pool 0; the admin
340 * can't shut down NFS completely using pool_threads.
341 */
342 if (nthreads[0] == 0)
343 nthreads[0] = 1;
344
345 /* apply the new numbers */
eed2965a
GB
346 svc_get(nfsd_serv);
347 for (i = 0; i < n; i++) {
348 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
349 nthreads[i]);
350 if (err)
351 break;
352 }
353 svc_destroy(nfsd_serv);
eed2965a
GB
354
355 return err;
356}
357
1da177e4
LT
358int
359nfsd_svc(unsigned short port, int nrservs)
360{
361 int error;
bedbdd8b
NB
362
363 mutex_lock(&nfsd_mutex);
6658d3a7 364 dprintk("nfsd: creating service\n");
1da177e4
LT
365 error = -EINVAL;
366 if (nrservs <= 0)
367 nrservs = 0;
368 if (nrservs > NFSD_MAXSERVS)
369 nrservs = NFSD_MAXSERVS;
370
371 /* Readahead param cache - will no-op if it already exists */
372 error = nfsd_racache_init(2*nrservs);
373 if (error<0)
374 goto out;
e8ff2a84 375 nfs4_state_start();
02a375f0
N
376
377 nfsd_reset_versions();
378
379 error = nfsd_create_serv();
380
381 if (error)
382 goto out;
383 error = nfsd_init_socks(port);
384 if (error)
385 goto failure;
386
eec09661 387 error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
1da177e4 388 failure:
1da177e4 389 svc_destroy(nfsd_serv); /* Release server */
1da177e4 390 out:
bedbdd8b 391 mutex_unlock(&nfsd_mutex);
1da177e4
LT
392 return error;
393}
394
395static inline void
396update_thread_usage(int busy_threads)
397{
398 unsigned long prev_call;
399 unsigned long diff;
400 int decile;
401
402 spin_lock(&nfsd_call_lock);
403 prev_call = nfsd_last_call;
404 nfsd_last_call = jiffies;
405 decile = busy_threads*10/nfsdstats.th_cnt;
406 if (decile>0 && decile <= 10) {
407 diff = nfsd_last_call - prev_call;
408 if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
409 nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
410 if (decile == 10)
411 nfsdstats.th_fullcnt++;
412 }
413 spin_unlock(&nfsd_call_lock);
414}
415
416/*
417 * This is the NFS server kernel thread
418 */
419static void
420nfsd(struct svc_rqst *rqstp)
421{
1da177e4
LT
422 struct fs_struct *fsp;
423 int err;
1da177e4
LT
424 sigset_t shutdown_mask, allowed_mask;
425
426 /* Lock module and set up kernel thread */
bedbdd8b 427 mutex_lock(&nfsd_mutex);
1da177e4
LT
428 daemonize("nfsd");
429
430 /* After daemonize() this kernel thread shares current->fs
431 * with the init process. We need to create files with a
432 * umask of 0 instead of init's umask. */
433 fsp = copy_fs_struct(current->fs);
434 if (!fsp) {
435 printk("Unable to start nfsd thread: out of memory\n");
436 goto out;
437 }
438 exit_fs(current);
439 current->fs = fsp;
440 current->fs->umask = 0;
441
442 siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
443 siginitsetinv(&allowed_mask, ALLOWED_SIGS);
444
bedbdd8b 445
1da177e4
LT
446 nfsdstats.th_cnt++;
447
eec09661 448 rqstp->rq_task = current;
1da177e4 449
bedbdd8b
NB
450 mutex_unlock(&nfsd_mutex);
451
1da177e4
LT
452
453 /*
454 * We want less throttling in balance_dirty_pages() so that nfs to
455 * localhost doesn't cause nfsd to lock up due to all the client's
456 * dirty pages.
457 */
458 current->flags |= PF_LESS_THROTTLE;
83144186 459 set_freezable();
1da177e4
LT
460
461 /*
462 * The main request loop
463 */
464 for (;;) {
465 /* Block all but the shutdown signals */
466 sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
467
468 /*
469 * Find a socket with data available and call its
470 * recvfrom routine.
471 */
6fb2b47f 472 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
1da177e4
LT
473 ;
474 if (err < 0)
475 break;
476 update_thread_usage(atomic_read(&nfsd_busy));
477 atomic_inc(&nfsd_busy);
478
479 /* Lock the export hash tables for reading. */
480 exp_readlock();
481
482 /* Process request with signals blocked. */
483 sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
484
6fb2b47f 485 svc_process(rqstp);
1da177e4
LT
486
487 /* Unlock export hash tables */
488 exp_readunlock();
489 update_thread_usage(atomic_read(&nfsd_busy));
490 atomic_dec(&nfsd_busy);
491 }
492
493 if (err != -EINTR) {
494 printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
495 } else {
496 unsigned int signo;
497
498 for (signo = 1; signo <= _NSIG; signo++)
499 if (sigismember(&current->pending.signal, signo) &&
500 !sigismember(&current->blocked, signo))
501 break;
bc591ccf 502 killsig = signo;
1da177e4 503 }
24e36663 504 /* Clear signals before calling svc_exit_thread() */
9e416052 505 flush_signals(current);
1da177e4 506
bedbdd8b 507 mutex_lock(&nfsd_mutex);
1da177e4 508
1da177e4
LT
509 nfsdstats.th_cnt --;
510
511out:
512 /* Release the thread */
513 svc_exit_thread(rqstp);
514
515 /* Release module */
bedbdd8b 516 mutex_unlock(&nfsd_mutex);
1da177e4
LT
517 module_put_and_exit(0);
518}
519
32c1eb0c
AA
520static __be32 map_new_errors(u32 vers, __be32 nfserr)
521{
522 if (nfserr == nfserr_jukebox && vers == 2)
523 return nfserr_dropit;
524 if (nfserr == nfserr_wrongsec && vers < 4)
525 return nfserr_acces;
526 return nfserr;
527}
528
1da177e4 529int
c7afef1f 530nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
1da177e4
LT
531{
532 struct svc_procedure *proc;
533 kxdrproc_t xdr;
ad451d38
AV
534 __be32 nfserr;
535 __be32 *nfserrp;
1da177e4
LT
536
537 dprintk("nfsd_dispatch: vers %d proc %d\n",
538 rqstp->rq_vers, rqstp->rq_proc);
539 proc = rqstp->rq_procinfo;
540
541 /* Check whether we have this call in the cache. */
542 switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) {
543 case RC_INTR:
544 case RC_DROPIT:
545 return 0;
546 case RC_REPLY:
547 return 1;
548 case RC_DOIT:;
549 /* do it */
550 }
551
552 /* Decode arguments */
553 xdr = proc->pc_decode;
ad451d38 554 if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
1da177e4
LT
555 rqstp->rq_argp)) {
556 dprintk("nfsd: failed to decode arguments!\n");
557 nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
558 *statp = rpc_garbage_args;
559 return 1;
560 }
561
562 /* need to grab the location to store the status, as
563 * nfsv4 does some encoding while processing
564 */
565 nfserrp = rqstp->rq_res.head[0].iov_base
566 + rqstp->rq_res.head[0].iov_len;
ad451d38 567 rqstp->rq_res.head[0].iov_len += sizeof(__be32);
1da177e4
LT
568
569 /* Now call the procedure handler, and encode NFS status. */
570 nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
32c1eb0c 571 nfserr = map_new_errors(rqstp->rq_vers, nfserr);
1da177e4 572 if (nfserr == nfserr_dropit) {
45457e09 573 dprintk("nfsd: Dropping request; may be revisited later\n");
1da177e4
LT
574 nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
575 return 0;
576 }
577
578 if (rqstp->rq_proc != 0)
579 *nfserrp++ = nfserr;
580
581 /* Encode result.
582 * For NFSv2, additional info is never returned in case of an error.
583 */
584 if (!(nfserr && rqstp->rq_vers == 2)) {
585 xdr = proc->pc_encode;
586 if (xdr && !xdr(rqstp, nfserrp,
587 rqstp->rq_resp)) {
588 /* Failed to encode result. Release cache entry */
589 dprintk("nfsd: failed to encode result!\n");
590 nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
591 *statp = rpc_system_err;
592 return 1;
593 }
594 }
595
596 /* Store reply in cache. */
597 nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
598 return 1;
599}