net: sock: tracing: Fix sock_exceed_buf_limit not to dereference stale pointer
[linux-block.git] / kernel / sysctl.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 * Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 * Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
1da177e4
LT
22#include <linux/module.h>
23#include <linux/mm.h>
24#include <linux/swap.h>
25#include <linux/slab.h>
26#include <linux/sysctl.h>
5a04cca6 27#include <linux/bitmap.h>
d33ed52d 28#include <linux/signal.h>
f39650de 29#include <linux/panic.h>
455cd5ab 30#include <linux/printk.h>
1da177e4 31#include <linux/proc_fs.h>
72c2d582 32#include <linux/security.h>
1da177e4 33#include <linux/ctype.h>
fd4b616b 34#include <linux/kmemleak.h>
b6459415 35#include <linux/filter.h>
62239ac2 36#include <linux/fs.h>
1da177e4
LT
37#include <linux/init.h>
38#include <linux/kernel.h>
0296b228 39#include <linux/kobject.h>
20380731 40#include <linux/net.h>
1da177e4
LT
41#include <linux/sysrq.h>
42#include <linux/highuid.h>
43#include <linux/writeback.h>
3fff4c42 44#include <linux/ratelimit.h>
76ab0f53 45#include <linux/compaction.h>
1da177e4 46#include <linux/hugetlb.h>
1da177e4 47#include <linux/initrd.h>
0b77f5bf 48#include <linux/key.h>
1da177e4
LT
49#include <linux/times.h>
50#include <linux/limits.h>
51#include <linux/dcache.h>
52#include <linux/syscalls.h>
c748e134 53#include <linux/vmstat.h>
c255d844
PM
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
10a0a8d4 56#include <linux/reboot.h>
b0fc494f 57#include <linux/ftrace.h>
cdd6c482 58#include <linux/perf_event.h>
8e4228e1 59#include <linux/oom.h>
17f60a7d 60#include <linux/kmod.h>
73efc039 61#include <linux/capability.h>
40401530 62#include <linux/binfmts.h>
cf4aebc2 63#include <linux/sched/sysctl.h>
d2921684 64#include <linux/mount.h>
cefdca0a 65#include <linux/userfaultfd_k.h>
2374c09b 66#include <linux/pid.h>
1da177e4 67
7f2923c4
CB
68#include "../lib/kstrtox.h"
69
7c0f6ba6 70#include <linux/uaccess.h>
1da177e4
LT
71#include <asm/processor.h>
72
29cbc78b
AK
73#ifdef CONFIG_X86
74#include <asm/nmi.h>
0741f4d2 75#include <asm/stacktrace.h>
6e7c4025 76#include <asm/io.h>
29cbc78b 77#endif
d550bbd4
DH
78#ifdef CONFIG_SPARC
79#include <asm/setup.h>
80#endif
4f0e056f
DY
81#ifdef CONFIG_RT_MUTEXES
82#include <linux/rtmutex.h>
83#endif
504d7cf1 84
1da177e4
LT
85#if defined(CONFIG_SYSCTL)
86
c4f3b63f 87/* Constants used for minimum and maximum */
c4f3b63f 88
c5dfd78e 89#ifdef CONFIG_PERF_EVENTS
d73840ec 90static const int six_hundred_forty_kb = 640 * 1024;
c5dfd78e 91#endif
c4f3b63f 92
9e4a5bda 93
f628867d 94static const int ngroups_max = NGROUPS_MAX;
73efc039 95static const int cap_last_cap = CAP_LAST_CAP;
1da177e4 96
d6f8ff73 97#ifdef CONFIG_PROC_SYSCTL
f4aacea2 98
a19ac337
LR
99/**
100 * enum sysctl_writes_mode - supported sysctl write modes
101 *
102 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
65f50f25
WH
103 * to be written, and multiple writes on the same sysctl file descriptor
104 * will rewrite the sysctl value, regardless of file position. No warning
105 * is issued when the initial position is not 0.
a19ac337 106 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
65f50f25 107 * not 0.
a19ac337 108 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
65f50f25
WH
109 * file position 0 and the value must be fully contained in the buffer
110 * sent to the write syscall. If dealing with strings respect the file
111 * position, but restrict this to the max length of the buffer, anything
112 * passed the max length will be ignored. Multiple writes will append
113 * to the buffer.
a19ac337
LR
114 *
115 * These write modes control how current file position affects the behavior of
116 * updating sysctl values through the proc interface on each write.
117 */
118enum sysctl_writes_mode {
119 SYSCTL_WRITES_LEGACY = -1,
120 SYSCTL_WRITES_WARN = 0,
121 SYSCTL_WRITES_STRICT = 1,
122};
f4aacea2 123
a19ac337 124static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
f461d2dc 125#endif /* CONFIG_PROC_SYSCTL */
ceb18132 126
67f3977f
AG
127#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
128 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
1da177e4
LT
129int sysctl_legacy_va_layout;
130#endif
131
5e771905 132#ifdef CONFIG_COMPACTION
2452dcb9 133/* min_extfrag_threshold is SYSCTL_ZERO */;
d73840ec 134static const int max_extfrag_threshold = 1000;
5e771905
MG
135#endif
136
f461d2dc
CH
137#endif /* CONFIG_SYSCTL */
138
139/*
140 * /proc/sys support
141 */
142
b89a8171 143#ifdef CONFIG_PROC_SYSCTL
1da177e4 144
f8808300 145static int _proc_do_string(char *data, int maxlen, int write,
32927393 146 char *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
147{
148 size_t len;
32927393 149 char c, *p;
8d060877
ON
150
151 if (!data || !maxlen || !*lenp) {
1da177e4
LT
152 *lenp = 0;
153 return 0;
154 }
8d060877 155
1da177e4 156 if (write) {
f4aacea2
KC
157 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
158 /* Only continue writes not past the end of buffer. */
159 len = strlen(data);
160 if (len > maxlen - 1)
161 len = maxlen - 1;
162
163 if (*ppos > len)
164 return 0;
165 len = *ppos;
166 } else {
167 /* Start writing from beginning of buffer. */
168 len = 0;
169 }
170
2ca9bb45 171 *ppos += *lenp;
1da177e4 172 p = buffer;
2ca9bb45 173 while ((p - buffer) < *lenp && len < maxlen - 1) {
32927393 174 c = *(p++);
1da177e4
LT
175 if (c == 0 || c == '\n')
176 break;
2ca9bb45 177 data[len++] = c;
1da177e4 178 }
f8808300 179 data[len] = 0;
1da177e4 180 } else {
f5dd3d6f
SV
181 len = strlen(data);
182 if (len > maxlen)
183 len = maxlen;
8d060877
ON
184
185 if (*ppos > len) {
186 *lenp = 0;
187 return 0;
188 }
189
190 data += *ppos;
191 len -= *ppos;
192
1da177e4
LT
193 if (len > *lenp)
194 len = *lenp;
195 if (len)
32927393 196 memcpy(buffer, data, len);
1da177e4 197 if (len < *lenp) {
32927393 198 buffer[len] = '\n';
1da177e4
LT
199 len++;
200 }
201 *lenp = len;
202 *ppos += len;
203 }
204 return 0;
205}
206
f4aacea2
KC
207static void warn_sysctl_write(struct ctl_table *table)
208{
209 pr_warn_once("%s wrote to %s when file position was not 0!\n"
210 "This will not be supported in the future. To silence this\n"
211 "warning, set kernel.sysctl_writes_strict = -1\n",
212 current->comm, table->procname);
213}
214
d383d484 215/**
5f733e8a 216 * proc_first_pos_non_zero_ignore - check if first position is allowed
d383d484
LR
217 * @ppos: file position
218 * @table: the sysctl table
219 *
220 * Returns true if the first position is non-zero and the sysctl_writes_strict
221 * mode indicates this is not allowed for numeric input types. String proc
5f733e8a 222 * handlers can ignore the return value.
d383d484
LR
223 */
224static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
225 struct ctl_table *table)
226{
227 if (!*ppos)
228 return false;
229
230 switch (sysctl_writes_strict) {
231 case SYSCTL_WRITES_STRICT:
232 return true;
233 case SYSCTL_WRITES_WARN:
234 warn_sysctl_write(table);
235 return false;
236 default:
237 return false;
238 }
239}
240
f5dd3d6f
SV
241/**
242 * proc_dostring - read a string sysctl
243 * @table: the sysctl table
244 * @write: %TRUE if this is a write to the sysctl file
f5dd3d6f
SV
245 * @buffer: the user buffer
246 * @lenp: the size of the user buffer
247 * @ppos: file position
248 *
249 * Reads/writes a string from/to the user buffer. If the kernel
250 * buffer provided is not large enough to hold the string, the
251 * string is truncated. The copied string is %NULL-terminated.
252 * If the string is being read by the user process, it is copied
253 * and a newline '\n' is added. It is truncated if the buffer is
254 * not large enough.
255 *
256 * Returns 0 on success.
257 */
8d65af78 258int proc_dostring(struct ctl_table *table, int write,
32927393 259 void *buffer, size_t *lenp, loff_t *ppos)
f5dd3d6f 260{
d383d484
LR
261 if (write)
262 proc_first_pos_non_zero_ignore(ppos, table);
f4aacea2 263
32927393
CH
264 return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
265 ppos);
f5dd3d6f
SV
266}
267
00b7c339
AW
268static size_t proc_skip_spaces(char **buf)
269{
270 size_t ret;
271 char *tmp = skip_spaces(*buf);
272 ret = tmp - *buf;
273 *buf = tmp;
274 return ret;
275}
276
9f977fb7
OP
277static void proc_skip_char(char **buf, size_t *size, const char v)
278{
279 while (*size) {
280 if (**buf != v)
281 break;
282 (*size)--;
283 (*buf)++;
284 }
285}
286
7f2923c4
CB
287/**
288 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
289 * fail on overflow
290 *
291 * @cp: kernel buffer containing the string to parse
292 * @endp: pointer to store the trailing characters
293 * @base: the base to use
294 * @res: where the parsed integer will be stored
295 *
296 * In case of success 0 is returned and @res will contain the parsed integer,
297 * @endp will hold any trailing characters.
298 * This function will fail the parse on overflow. If there wasn't an overflow
299 * the function will defer the decision what characters count as invalid to the
300 * caller.
301 */
302static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
303 unsigned long *res)
304{
305 unsigned long long result;
306 unsigned int rv;
307
308 cp = _parse_integer_fixup_radix(cp, &base);
309 rv = _parse_integer(cp, base, &result);
310 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
311 return -ERANGE;
312
313 cp += rv;
314
315 if (endp)
316 *endp = (char *)cp;
317
318 *res = (unsigned long)result;
319 return 0;
320}
321
00b7c339
AW
322#define TMPBUFLEN 22
323/**
0fc377bd 324 * proc_get_long - reads an ASCII formatted integer from a user buffer
00b7c339 325 *
0fc377bd
RD
326 * @buf: a kernel buffer
327 * @size: size of the kernel buffer
328 * @val: this is where the number will be stored
329 * @neg: set to %TRUE if number is negative
330 * @perm_tr: a vector which contains the allowed trailers
331 * @perm_tr_len: size of the perm_tr vector
332 * @tr: pointer to store the trailer character
00b7c339 333 *
0fc377bd
RD
334 * In case of success %0 is returned and @buf and @size are updated with
335 * the amount of bytes read. If @tr is non-NULL and a trailing
336 * character exists (size is non-zero after returning from this
337 * function), @tr is updated with the trailing character.
00b7c339
AW
338 */
339static int proc_get_long(char **buf, size_t *size,
340 unsigned long *val, bool *neg,
341 const char *perm_tr, unsigned perm_tr_len, char *tr)
342{
343 int len;
344 char *p, tmp[TMPBUFLEN];
345
346 if (!*size)
347 return -EINVAL;
348
349 len = *size;
350 if (len > TMPBUFLEN - 1)
351 len = TMPBUFLEN - 1;
352
353 memcpy(tmp, *buf, len);
354
355 tmp[len] = 0;
356 p = tmp;
357 if (*p == '-' && *size > 1) {
358 *neg = true;
359 p++;
360 } else
361 *neg = false;
362 if (!isdigit(*p))
363 return -EINVAL;
364
7f2923c4
CB
365 if (strtoul_lenient(p, &p, 0, val))
366 return -EINVAL;
00b7c339
AW
367
368 len = p - tmp;
369
370 /* We don't know if the next char is whitespace thus we may accept
371 * invalid integers (e.g. 1234...a) or two integers instead of one
372 * (e.g. 123...1). So lets not allow such large numbers. */
373 if (len == TMPBUFLEN - 1)
374 return -EINVAL;
375
376 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
377 return -EINVAL;
1da177e4 378
00b7c339
AW
379 if (tr && (len < *size))
380 *tr = *p;
381
382 *buf += len;
383 *size -= len;
384
385 return 0;
386}
387
388/**
0fc377bd 389 * proc_put_long - converts an integer to a decimal ASCII formatted string
00b7c339 390 *
0fc377bd
RD
391 * @buf: the user buffer
392 * @size: the size of the user buffer
393 * @val: the integer to be converted
394 * @neg: sign of the number, %TRUE for negative
00b7c339 395 *
32927393
CH
396 * In case of success @buf and @size are updated with the amount of bytes
397 * written.
00b7c339 398 */
32927393 399static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
00b7c339
AW
400{
401 int len;
402 char tmp[TMPBUFLEN], *p = tmp;
403
404 sprintf(p, "%s%lu", neg ? "-" : "", val);
405 len = strlen(tmp);
406 if (len > *size)
407 len = *size;
32927393 408 memcpy(*buf, tmp, len);
00b7c339
AW
409 *size -= len;
410 *buf += len;
00b7c339
AW
411}
412#undef TMPBUFLEN
413
32927393 414static void proc_put_char(void **buf, size_t *size, char c)
00b7c339
AW
415{
416 if (*size) {
32927393
CH
417 char **buffer = (char **)buf;
418 **buffer = c;
419
420 (*size)--;
421 (*buffer)++;
00b7c339
AW
422 *buf = *buffer;
423 }
00b7c339 424}
1da177e4 425
a2071573
JH
426static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
427 int *valp,
428 int write, void *data)
429{
430 if (write) {
431 *(bool *)valp = *lvalp;
432 } else {
433 int val = *(bool *)valp;
434
435 *lvalp = (unsigned long)val;
436 *negp = false;
437 }
438 return 0;
439}
440
00b7c339 441static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
442 int *valp,
443 int write, void *data)
444{
445 if (write) {
230633d1
HS
446 if (*negp) {
447 if (*lvalp > (unsigned long) INT_MAX + 1)
448 return -EINVAL;
449 *valp = -*lvalp;
450 } else {
451 if (*lvalp > (unsigned long) INT_MAX)
452 return -EINVAL;
453 *valp = *lvalp;
454 }
1da177e4
LT
455 } else {
456 int val = *valp;
457 if (val < 0) {
00b7c339 458 *negp = true;
9a5bc726 459 *lvalp = -(unsigned long)val;
1da177e4 460 } else {
00b7c339 461 *negp = false;
1da177e4
LT
462 *lvalp = (unsigned long)val;
463 }
464 }
465 return 0;
466}
467
4f2fec00
LR
468static int do_proc_douintvec_conv(unsigned long *lvalp,
469 unsigned int *valp,
470 int write, void *data)
e7d316a0
SAK
471{
472 if (write) {
425fffd8
LZ
473 if (*lvalp > UINT_MAX)
474 return -EINVAL;
e7d316a0
SAK
475 *valp = *lvalp;
476 } else {
477 unsigned int val = *valp;
478 *lvalp = (unsigned long)val;
479 }
480 return 0;
481}
482
00b7c339
AW
483static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
484
d8217f07 485static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
32927393 486 int write, void *buffer,
fcfbd547 487 size_t *lenp, loff_t *ppos,
00b7c339 488 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1da177e4
LT
489 int write, void *data),
490 void *data)
491{
00b7c339 492 int *i, vleft, first = 1, err = 0;
00b7c339 493 size_t left;
32927393 494 char *p;
1da177e4 495
00b7c339 496 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
497 *lenp = 0;
498 return 0;
499 }
500
fcfbd547 501 i = (int *) tbl_data;
1da177e4
LT
502 vleft = table->maxlen / sizeof(*i);
503 left = *lenp;
504
505 if (!conv)
506 conv = do_proc_dointvec_conv;
507
00b7c339 508 if (write) {
d383d484
LR
509 if (proc_first_pos_non_zero_ignore(ppos, table))
510 goto out;
f4aacea2 511
00b7c339
AW
512 if (left > PAGE_SIZE - 1)
513 left = PAGE_SIZE - 1;
32927393 514 p = buffer;
00b7c339
AW
515 }
516
1da177e4 517 for (; left && vleft--; i++, first=0) {
00b7c339
AW
518 unsigned long lval;
519 bool neg;
1da177e4 520
00b7c339 521 if (write) {
70f6cbb6 522 left -= proc_skip_spaces(&p);
1da177e4 523
563b0467
O
524 if (!left)
525 break;
70f6cbb6 526 err = proc_get_long(&p, &left, &lval, &neg,
00b7c339
AW
527 proc_wspace_sep,
528 sizeof(proc_wspace_sep), NULL);
529 if (err)
1da177e4 530 break;
00b7c339
AW
531 if (conv(&neg, &lval, i, 1, data)) {
532 err = -EINVAL;
1da177e4 533 break;
00b7c339 534 }
1da177e4 535 } else {
00b7c339
AW
536 if (conv(&neg, &lval, i, 0, data)) {
537 err = -EINVAL;
538 break;
539 }
1da177e4 540 if (!first)
32927393
CH
541 proc_put_char(&buffer, &left, '\t');
542 proc_put_long(&buffer, &left, lval, neg);
1da177e4
LT
543 }
544 }
545
00b7c339 546 if (!write && !first && left && !err)
32927393 547 proc_put_char(&buffer, &left, '\n');
563b0467 548 if (write && !err && left)
70f6cbb6 549 left -= proc_skip_spaces(&p);
32927393
CH
550 if (write && first)
551 return err ? : -EINVAL;
1da177e4 552 *lenp -= left;
f4aacea2 553out:
1da177e4 554 *ppos += *lenp;
00b7c339 555 return err;
1da177e4
LT
556}
557
8d65af78 558static int do_proc_dointvec(struct ctl_table *table, int write,
32927393 559 void *buffer, size_t *lenp, loff_t *ppos,
00b7c339 560 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
fcfbd547
KK
561 int write, void *data),
562 void *data)
563{
8d65af78 564 return __do_proc_dointvec(table->data, table, write,
fcfbd547
KK
565 buffer, lenp, ppos, conv, data);
566}
567
4f2fec00
LR
568static int do_proc_douintvec_w(unsigned int *tbl_data,
569 struct ctl_table *table,
32927393 570 void *buffer,
4f2fec00
LR
571 size_t *lenp, loff_t *ppos,
572 int (*conv)(unsigned long *lvalp,
573 unsigned int *valp,
574 int write, void *data),
575 void *data)
576{
577 unsigned long lval;
578 int err = 0;
579 size_t left;
580 bool neg;
32927393 581 char *p = buffer;
4f2fec00
LR
582
583 left = *lenp;
584
585 if (proc_first_pos_non_zero_ignore(ppos, table))
586 goto bail_early;
587
588 if (left > PAGE_SIZE - 1)
589 left = PAGE_SIZE - 1;
590
4f2fec00
LR
591 left -= proc_skip_spaces(&p);
592 if (!left) {
593 err = -EINVAL;
594 goto out_free;
595 }
596
597 err = proc_get_long(&p, &left, &lval, &neg,
598 proc_wspace_sep,
599 sizeof(proc_wspace_sep), NULL);
600 if (err || neg) {
601 err = -EINVAL;
602 goto out_free;
603 }
604
605 if (conv(&lval, tbl_data, 1, data)) {
606 err = -EINVAL;
607 goto out_free;
608 }
609
610 if (!err && left)
611 left -= proc_skip_spaces(&p);
612
613out_free:
4f2fec00
LR
614 if (err)
615 return -EINVAL;
616
617 return 0;
618
619 /* This is in keeping with old __do_proc_dointvec() */
620bail_early:
621 *ppos += *lenp;
622 return err;
623}
624
32927393 625static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
4f2fec00
LR
626 size_t *lenp, loff_t *ppos,
627 int (*conv)(unsigned long *lvalp,
628 unsigned int *valp,
629 int write, void *data),
630 void *data)
631{
632 unsigned long lval;
633 int err = 0;
634 size_t left;
635
636 left = *lenp;
637
638 if (conv(&lval, tbl_data, 0, data)) {
639 err = -EINVAL;
640 goto out;
641 }
642
32927393
CH
643 proc_put_long(&buffer, &left, lval, false);
644 if (!left)
4f2fec00
LR
645 goto out;
646
32927393 647 proc_put_char(&buffer, &left, '\n');
4f2fec00
LR
648
649out:
650 *lenp -= left;
651 *ppos += *lenp;
652
653 return err;
654}
655
656static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
32927393 657 int write, void *buffer,
4f2fec00
LR
658 size_t *lenp, loff_t *ppos,
659 int (*conv)(unsigned long *lvalp,
660 unsigned int *valp,
661 int write, void *data),
662 void *data)
663{
664 unsigned int *i, vleft;
665
666 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
667 *lenp = 0;
668 return 0;
669 }
670
671 i = (unsigned int *) tbl_data;
672 vleft = table->maxlen / sizeof(*i);
673
674 /*
675 * Arrays are not supported, keep this simple. *Do not* add
676 * support for them.
677 */
678 if (vleft != 1) {
679 *lenp = 0;
680 return -EINVAL;
681 }
682
683 if (!conv)
684 conv = do_proc_douintvec_conv;
685
686 if (write)
687 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
688 conv, data);
689 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
690}
691
1998f193
LC
692int do_proc_douintvec(struct ctl_table *table, int write,
693 void *buffer, size_t *lenp, loff_t *ppos,
694 int (*conv)(unsigned long *lvalp,
695 unsigned int *valp,
696 int write, void *data),
697 void *data)
4f2fec00
LR
698{
699 return __do_proc_douintvec(table->data, table, write,
700 buffer, lenp, ppos, conv, data);
701}
702
a2071573
JH
703/**
704 * proc_dobool - read/write a bool
705 * @table: the sysctl table
706 * @write: %TRUE if this is a write to the sysctl file
707 * @buffer: the user buffer
708 * @lenp: the size of the user buffer
709 * @ppos: file position
710 *
711 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
712 * values from/to the user buffer, treated as an ASCII string.
713 *
714 * Returns 0 on success.
715 */
716int proc_dobool(struct ctl_table *table, int write, void *buffer,
717 size_t *lenp, loff_t *ppos)
718{
719 return do_proc_dointvec(table, write, buffer, lenp, ppos,
720 do_proc_dobool_conv, NULL);
721}
722
1da177e4
LT
723/**
724 * proc_dointvec - read a vector of integers
725 * @table: the sysctl table
726 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
727 * @buffer: the user buffer
728 * @lenp: the size of the user buffer
729 * @ppos: file position
730 *
731 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
732 * values from/to the user buffer, treated as an ASCII string.
733 *
734 * Returns 0 on success.
735 */
32927393
CH
736int proc_dointvec(struct ctl_table *table, int write, void *buffer,
737 size_t *lenp, loff_t *ppos)
1da177e4 738{
e7d316a0
SAK
739 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
740}
741
6923aa0d
SAS
742#ifdef CONFIG_COMPACTION
743static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
32927393 744 int write, void *buffer, size_t *lenp, loff_t *ppos)
6923aa0d
SAS
745{
746 int ret, old;
747
748 if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
749 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
750
751 old = *(int *)table->data;
752 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
753 if (ret)
754 return ret;
755 if (old != *(int *)table->data)
756 pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
757 table->procname, current->comm,
758 task_pid_nr(current));
759 return ret;
760}
761#endif
762
e7d316a0
SAK
763/**
764 * proc_douintvec - read a vector of unsigned integers
765 * @table: the sysctl table
766 * @write: %TRUE if this is a write to the sysctl file
767 * @buffer: the user buffer
768 * @lenp: the size of the user buffer
769 * @ppos: file position
770 *
771 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
772 * values from/to the user buffer, treated as an ASCII string.
773 *
774 * Returns 0 on success.
775 */
32927393
CH
776int proc_douintvec(struct ctl_table *table, int write, void *buffer,
777 size_t *lenp, loff_t *ppos)
e7d316a0 778{
4f2fec00
LR
779 return do_proc_douintvec(table, write, buffer, lenp, ppos,
780 do_proc_douintvec_conv, NULL);
1da177e4
LT
781}
782
34f5a398 783/*
25ddbb18
AK
784 * Taint values can only be increased
785 * This means we can safely use a temporary.
34f5a398 786 */
8d65af78 787static int proc_taint(struct ctl_table *table, int write,
32927393 788 void *buffer, size_t *lenp, loff_t *ppos)
34f5a398 789{
25ddbb18
AK
790 struct ctl_table t;
791 unsigned long tmptaint = get_taint();
792 int err;
34f5a398 793
91fcd412 794 if (write && !capable(CAP_SYS_ADMIN))
34f5a398
TT
795 return -EPERM;
796
25ddbb18
AK
797 t = *table;
798 t.data = &tmptaint;
8d65af78 799 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
25ddbb18
AK
800 if (err < 0)
801 return err;
802
803 if (write) {
db38d5c1
RA
804 int i;
805
806 /*
807 * If we are relying on panic_on_taint not producing
808 * false positives due to userspace input, bail out
809 * before setting the requested taint flags.
810 */
811 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
812 return -EINVAL;
813
25ddbb18
AK
814 /*
815 * Poor man's atomic or. Not worth adding a primitive
816 * to everyone's atomic.h for this
817 */
e77132e7
RA
818 for (i = 0; i < TAINT_FLAGS_COUNT; i++)
819 if ((1UL << i) & tmptaint)
373d4d09 820 add_taint(i, LOCKDEP_STILL_OK);
25ddbb18
AK
821 }
822
823 return err;
34f5a398
TT
824}
825
24704f36
WL
826/**
827 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
828 * @min: pointer to minimum allowable value
829 * @max: pointer to maximum allowable value
830 *
831 * The do_proc_dointvec_minmax_conv_param structure provides the
832 * minimum and maximum values for doing range checking for those sysctl
833 * parameters that use the proc_dointvec_minmax() handler.
834 */
1da177e4
LT
835struct do_proc_dointvec_minmax_conv_param {
836 int *min;
837 int *max;
838};
839
00b7c339
AW
840static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
841 int *valp,
1da177e4
LT
842 int write, void *data)
843{
2bc4fc60 844 int tmp, ret;
1da177e4 845 struct do_proc_dointvec_minmax_conv_param *param = data;
2bc4fc60
ZW
846 /*
847 * If writing, first do so via a temporary local int so we can
848 * bounds-check it before touching *valp.
849 */
850 int *ip = write ? &tmp : valp;
851
852 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
853 if (ret)
854 return ret;
855
1da177e4 856 if (write) {
2bc4fc60
ZW
857 if ((param->min && *param->min > tmp) ||
858 (param->max && *param->max < tmp))
1da177e4 859 return -EINVAL;
2bc4fc60 860 *valp = tmp;
1da177e4 861 }
2bc4fc60 862
1da177e4
LT
863 return 0;
864}
865
866/**
867 * proc_dointvec_minmax - read a vector of integers with min/max values
868 * @table: the sysctl table
869 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
870 * @buffer: the user buffer
871 * @lenp: the size of the user buffer
872 * @ppos: file position
873 *
874 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
875 * values from/to the user buffer, treated as an ASCII string.
876 *
877 * This routine will ensure the values are within the range specified by
878 * table->extra1 (min) and table->extra2 (max).
879 *
24704f36 880 * Returns 0 on success or -EINVAL on write when the range check fails.
1da177e4 881 */
8d65af78 882int proc_dointvec_minmax(struct ctl_table *table, int write,
32927393 883 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
884{
885 struct do_proc_dointvec_minmax_conv_param param = {
886 .min = (int *) table->extra1,
887 .max = (int *) table->extra2,
888 };
8d65af78 889 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
890 do_proc_dointvec_minmax_conv, &param);
891}
892
24704f36
WL
893/**
894 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
895 * @min: pointer to minimum allowable value
896 * @max: pointer to maximum allowable value
897 *
898 * The do_proc_douintvec_minmax_conv_param structure provides the
899 * minimum and maximum values for doing range checking for those sysctl
900 * parameters that use the proc_douintvec_minmax() handler.
901 */
61d9b56a
LR
902struct do_proc_douintvec_minmax_conv_param {
903 unsigned int *min;
904 unsigned int *max;
905};
906
907static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
908 unsigned int *valp,
909 int write, void *data)
910{
2bc4fc60
ZW
911 int ret;
912 unsigned int tmp;
61d9b56a 913 struct do_proc_douintvec_minmax_conv_param *param = data;
2bc4fc60
ZW
914 /* write via temporary local uint for bounds-checking */
915 unsigned int *up = write ? &tmp : valp;
61d9b56a 916
2bc4fc60
ZW
917 ret = do_proc_douintvec_conv(lvalp, up, write, data);
918 if (ret)
919 return ret;
fb910c42 920
2bc4fc60
ZW
921 if (write) {
922 if ((param->min && *param->min > tmp) ||
923 (param->max && *param->max < tmp))
61d9b56a
LR
924 return -ERANGE;
925
2bc4fc60 926 *valp = tmp;
61d9b56a
LR
927 }
928
929 return 0;
930}
931
932/**
933 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
934 * @table: the sysctl table
935 * @write: %TRUE if this is a write to the sysctl file
936 * @buffer: the user buffer
937 * @lenp: the size of the user buffer
938 * @ppos: file position
939 *
940 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
941 * values from/to the user buffer, treated as an ASCII string. Negative
942 * strings are not allowed.
943 *
944 * This routine will ensure the values are within the range specified by
945 * table->extra1 (min) and table->extra2 (max). There is a final sanity
946 * check for UINT_MAX to avoid having to support wrap around uses from
947 * userspace.
948 *
24704f36 949 * Returns 0 on success or -ERANGE on write when the range check fails.
61d9b56a
LR
950 */
951int proc_douintvec_minmax(struct ctl_table *table, int write,
32927393 952 void *buffer, size_t *lenp, loff_t *ppos)
61d9b56a
LR
953{
954 struct do_proc_douintvec_minmax_conv_param param = {
955 .min = (unsigned int *) table->extra1,
956 .max = (unsigned int *) table->extra2,
957 };
958 return do_proc_douintvec(table, write, buffer, lenp, ppos,
959 do_proc_douintvec_minmax_conv, &param);
960}
961
cb944413
ED
962/**
963 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
964 * @table: the sysctl table
965 * @write: %TRUE if this is a write to the sysctl file
966 * @buffer: the user buffer
967 * @lenp: the size of the user buffer
968 * @ppos: file position
969 *
970 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
971 * values from/to the user buffer, treated as an ASCII string. Negative
972 * strings are not allowed.
973 *
974 * This routine will ensure the values are within the range specified by
975 * table->extra1 (min) and table->extra2 (max).
976 *
977 * Returns 0 on success or an error on write when the range check fails.
978 */
979int proc_dou8vec_minmax(struct ctl_table *table, int write,
980 void *buffer, size_t *lenp, loff_t *ppos)
981{
982 struct ctl_table tmp;
983 unsigned int min = 0, max = 255U, val;
984 u8 *data = table->data;
985 struct do_proc_douintvec_minmax_conv_param param = {
986 .min = &min,
987 .max = &max,
988 };
989 int res;
990
991 /* Do not support arrays yet. */
992 if (table->maxlen != sizeof(u8))
993 return -EINVAL;
994
995 if (table->extra1) {
996 min = *(unsigned int *) table->extra1;
997 if (min > 255U)
998 return -EINVAL;
999 }
1000 if (table->extra2) {
1001 max = *(unsigned int *) table->extra2;
1002 if (max > 255U)
1003 return -EINVAL;
1004 }
1005
1006 tmp = *table;
1007
1008 tmp.maxlen = sizeof(val);
1009 tmp.data = &val;
1010 val = *data;
1011 res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1012 do_proc_douintvec_minmax_conv, &param);
1013 if (res)
1014 return res;
1015 if (write)
1016 *data = val;
1017 return 0;
1018}
1019EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1020
eaee4172
DS
1021#ifdef CONFIG_MAGIC_SYSRQ
1022static int sysrq_sysctl_handler(struct ctl_table *table, int write,
32927393 1023 void *buffer, size_t *lenp, loff_t *ppos)
eaee4172
DS
1024{
1025 int tmp, ret;
1026
1027 tmp = sysrq_mask();
1028
1029 ret = __do_proc_dointvec(&tmp, table, write, buffer,
1030 lenp, ppos, NULL, NULL);
1031 if (ret || !write)
1032 return ret;
1033
1034 if (write)
1035 sysrq_toggle_support(tmp);
1036
1037 return 0;
1038}
1039#endif
1040
32927393
CH
1041static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1042 int write, void *buffer, size_t *lenp, loff_t *ppos,
1043 unsigned long convmul, unsigned long convdiv)
1da177e4 1044{
00b7c339
AW
1045 unsigned long *i, *min, *max;
1046 int vleft, first = 1, err = 0;
00b7c339 1047 size_t left;
32927393 1048 char *p;
00b7c339
AW
1049
1050 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
1051 *lenp = 0;
1052 return 0;
1053 }
00b7c339 1054
fcfbd547 1055 i = (unsigned long *) data;
1da177e4
LT
1056 min = (unsigned long *) table->extra1;
1057 max = (unsigned long *) table->extra2;
1058 vleft = table->maxlen / sizeof(unsigned long);
1059 left = *lenp;
00b7c339
AW
1060
1061 if (write) {
d383d484
LR
1062 if (proc_first_pos_non_zero_ignore(ppos, table))
1063 goto out;
f4aacea2 1064
00b7c339
AW
1065 if (left > PAGE_SIZE - 1)
1066 left = PAGE_SIZE - 1;
32927393 1067 p = buffer;
00b7c339
AW
1068 }
1069
27b3d80a 1070 for (; left && vleft--; i++, first = 0) {
00b7c339
AW
1071 unsigned long val;
1072
1da177e4 1073 if (write) {
00b7c339
AW
1074 bool neg;
1075
70f6cbb6 1076 left -= proc_skip_spaces(&p);
09be1784
CL
1077 if (!left)
1078 break;
00b7c339 1079
70f6cbb6 1080 err = proc_get_long(&p, &left, &val, &neg,
00b7c339
AW
1081 proc_wspace_sep,
1082 sizeof(proc_wspace_sep), NULL);
1622ed7d
BL
1083 if (err || neg) {
1084 err = -EINVAL;
1da177e4 1085 break;
1622ed7d
BL
1086 }
1087
ff9f8a7c 1088 val = convmul * val / convdiv;
e260ad01
CB
1089 if ((min && val < *min) || (max && val > *max)) {
1090 err = -EINVAL;
1091 break;
1092 }
1da177e4
LT
1093 *i = val;
1094 } else {
00b7c339 1095 val = convdiv * (*i) / convmul;
32927393
CH
1096 if (!first)
1097 proc_put_char(&buffer, &left, '\t');
1098 proc_put_long(&buffer, &left, val, false);
1da177e4
LT
1099 }
1100 }
1101
00b7c339 1102 if (!write && !first && left && !err)
32927393 1103 proc_put_char(&buffer, &left, '\n');
00b7c339 1104 if (write && !err)
70f6cbb6 1105 left -= proc_skip_spaces(&p);
32927393
CH
1106 if (write && first)
1107 return err ? : -EINVAL;
1da177e4 1108 *lenp -= left;
f4aacea2 1109out:
1da177e4 1110 *ppos += *lenp;
00b7c339 1111 return err;
1da177e4
LT
1112}
1113
d8217f07 1114static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393
CH
1115 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1116 unsigned long convdiv)
fcfbd547
KK
1117{
1118 return __do_proc_doulongvec_minmax(table->data, table, write,
8d65af78 1119 buffer, lenp, ppos, convmul, convdiv);
fcfbd547
KK
1120}
1121
1da177e4
LT
1122/**
1123 * proc_doulongvec_minmax - read a vector of long integers with min/max values
1124 * @table: the sysctl table
1125 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1126 * @buffer: the user buffer
1127 * @lenp: the size of the user buffer
1128 * @ppos: file position
1129 *
1130 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1131 * values from/to the user buffer, treated as an ASCII string.
1132 *
1133 * This routine will ensure the values are within the range specified by
1134 * table->extra1 (min) and table->extra2 (max).
1135 *
1136 * Returns 0 on success.
1137 */
8d65af78 1138int proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393 1139 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1140{
8d65af78 1141 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1da177e4
LT
1142}
1143
1144/**
1145 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1146 * @table: the sysctl table
1147 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1148 * @buffer: the user buffer
1149 * @lenp: the size of the user buffer
1150 * @ppos: file position
1151 *
1152 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1153 * values from/to the user buffer, treated as an ASCII string. The values
1154 * are treated as milliseconds, and converted to jiffies when they are stored.
1155 *
1156 * This routine will ensure the values are within the range specified by
1157 * table->extra1 (min) and table->extra2 (max).
1158 *
1159 * Returns 0 on success.
1160 */
d8217f07 1161int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
32927393 1162 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1163{
8d65af78 1164 return do_proc_doulongvec_minmax(table, write, buffer,
1da177e4
LT
1165 lenp, ppos, HZ, 1000l);
1166}
1167
1168
00b7c339 1169static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1170 int *valp,
1171 int write, void *data)
1172{
1173 if (write) {
63259457 1174 if (*lvalp > INT_MAX / HZ)
cba9f33d 1175 return 1;
1da177e4
LT
1176 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
1177 } else {
1178 int val = *valp;
1179 unsigned long lval;
1180 if (val < 0) {
00b7c339 1181 *negp = true;
9a5bc726 1182 lval = -(unsigned long)val;
1da177e4 1183 } else {
00b7c339 1184 *negp = false;
1da177e4
LT
1185 lval = (unsigned long)val;
1186 }
1187 *lvalp = lval / HZ;
1188 }
1189 return 0;
1190}
1191
00b7c339 1192static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1193 int *valp,
1194 int write, void *data)
1195{
1196 if (write) {
cba9f33d
BS
1197 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1198 return 1;
1da177e4
LT
1199 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1200 } else {
1201 int val = *valp;
1202 unsigned long lval;
1203 if (val < 0) {
00b7c339 1204 *negp = true;
9a5bc726 1205 lval = -(unsigned long)val;
1da177e4 1206 } else {
00b7c339 1207 *negp = false;
1da177e4
LT
1208 lval = (unsigned long)val;
1209 }
1210 *lvalp = jiffies_to_clock_t(lval);
1211 }
1212 return 0;
1213}
1214
00b7c339 1215static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
1216 int *valp,
1217 int write, void *data)
1218{
1219 if (write) {
d738ce8f
FF
1220 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1221
1222 if (jif > INT_MAX)
1223 return 1;
1224 *valp = (int)jif;
1da177e4
LT
1225 } else {
1226 int val = *valp;
1227 unsigned long lval;
1228 if (val < 0) {
00b7c339 1229 *negp = true;
9a5bc726 1230 lval = -(unsigned long)val;
1da177e4 1231 } else {
00b7c339 1232 *negp = false;
1da177e4
LT
1233 lval = (unsigned long)val;
1234 }
1235 *lvalp = jiffies_to_msecs(lval);
1236 }
1237 return 0;
1238}
1239
1240/**
1241 * proc_dointvec_jiffies - read a vector of integers as seconds
1242 * @table: the sysctl table
1243 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1244 * @buffer: the user buffer
1245 * @lenp: the size of the user buffer
1246 * @ppos: file position
1247 *
1248 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1249 * values from/to the user buffer, treated as an ASCII string.
1250 * The values read are assumed to be in seconds, and are converted into
1251 * jiffies.
1252 *
1253 * Returns 0 on success.
1254 */
8d65af78 1255int proc_dointvec_jiffies(struct ctl_table *table, int write,
32927393 1256 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1257{
8d65af78 1258 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
1259 do_proc_dointvec_jiffies_conv,NULL);
1260}
1261
1262/**
1263 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1264 * @table: the sysctl table
1265 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1266 * @buffer: the user buffer
1267 * @lenp: the size of the user buffer
1e5d5331 1268 * @ppos: pointer to the file position
1da177e4
LT
1269 *
1270 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1271 * values from/to the user buffer, treated as an ASCII string.
1272 * The values read are assumed to be in 1/USER_HZ seconds, and
1273 * are converted into jiffies.
1274 *
1275 * Returns 0 on success.
1276 */
8d65af78 1277int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
32927393 1278 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1279{
8d65af78 1280 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
1281 do_proc_dointvec_userhz_jiffies_conv,NULL);
1282}
1283
1284/**
1285 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1286 * @table: the sysctl table
1287 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
1288 * @buffer: the user buffer
1289 * @lenp: the size of the user buffer
67be2dd1
MW
1290 * @ppos: file position
1291 * @ppos: the current position in the file
1da177e4
LT
1292 *
1293 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1294 * values from/to the user buffer, treated as an ASCII string.
1295 * The values read are assumed to be in 1/1000 seconds, and
1296 * are converted into jiffies.
1297 *
1298 * Returns 0 on success.
1299 */
32927393
CH
1300int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1301 size_t *lenp, loff_t *ppos)
1da177e4 1302{
8d65af78 1303 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
1304 do_proc_dointvec_ms_jiffies_conv, NULL);
1305}
1306
32927393
CH
1307static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1308 size_t *lenp, loff_t *ppos)
9ec52099
CLG
1309{
1310 struct pid *new_pid;
1311 pid_t tmp;
1312 int r;
1313
6c5f3e7b 1314 tmp = pid_vnr(cad_pid);
9ec52099 1315
8d65af78 1316 r = __do_proc_dointvec(&tmp, table, write, buffer,
9ec52099
CLG
1317 lenp, ppos, NULL, NULL);
1318 if (r || !write)
1319 return r;
1320
1321 new_pid = find_get_pid(tmp);
1322 if (!new_pid)
1323 return -ESRCH;
1324
1325 put_pid(xchg(&cad_pid, new_pid));
1326 return 0;
1327}
1328
9f977fb7
OP
1329/**
1330 * proc_do_large_bitmap - read/write from/to a large bitmap
1331 * @table: the sysctl table
1332 * @write: %TRUE if this is a write to the sysctl file
1333 * @buffer: the user buffer
1334 * @lenp: the size of the user buffer
1335 * @ppos: file position
1336 *
1337 * The bitmap is stored at table->data and the bitmap length (in bits)
1338 * in table->maxlen.
1339 *
1340 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1341 * large bitmaps may be represented in a compact manner. Writing into
1342 * the file will clear the bitmap then update it with the given input.
1343 *
1344 * Returns 0 on success.
1345 */
1346int proc_do_large_bitmap(struct ctl_table *table, int write,
32927393 1347 void *buffer, size_t *lenp, loff_t *ppos)
9f977fb7
OP
1348{
1349 int err = 0;
9f977fb7
OP
1350 size_t left = *lenp;
1351 unsigned long bitmap_len = table->maxlen;
122ff243 1352 unsigned long *bitmap = *(unsigned long **) table->data;
9f977fb7
OP
1353 unsigned long *tmp_bitmap = NULL;
1354 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1355
122ff243 1356 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
9f977fb7
OP
1357 *lenp = 0;
1358 return 0;
1359 }
1360
1361 if (write) {
32927393 1362 char *p = buffer;
3116ad38 1363 size_t skipped = 0;
9f977fb7 1364
3116ad38 1365 if (left > PAGE_SIZE - 1) {
9f977fb7 1366 left = PAGE_SIZE - 1;
3116ad38
ES
1367 /* How much of the buffer we'll skip this pass */
1368 skipped = *lenp - left;
1369 }
9f977fb7 1370
475dae38 1371 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
32927393 1372 if (!tmp_bitmap)
9f977fb7 1373 return -ENOMEM;
70f6cbb6 1374 proc_skip_char(&p, &left, '\n');
9f977fb7
OP
1375 while (!err && left) {
1376 unsigned long val_a, val_b;
1377 bool neg;
3116ad38 1378 size_t saved_left;
9f977fb7 1379
3116ad38
ES
1380 /* In case we stop parsing mid-number, we can reset */
1381 saved_left = left;
70f6cbb6 1382 err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
9f977fb7 1383 sizeof(tr_a), &c);
3116ad38
ES
1384 /*
1385 * If we consumed the entirety of a truncated buffer or
1386 * only one char is left (may be a "-"), then stop here,
1387 * reset, & come back for more.
1388 */
1389 if ((left <= 1) && skipped) {
1390 left = saved_left;
1391 break;
1392 }
1393
9f977fb7
OP
1394 if (err)
1395 break;
1396 if (val_a >= bitmap_len || neg) {
1397 err = -EINVAL;
1398 break;
1399 }
1400
1401 val_b = val_a;
1402 if (left) {
70f6cbb6 1403 p++;
9f977fb7
OP
1404 left--;
1405 }
1406
1407 if (c == '-') {
70f6cbb6 1408 err = proc_get_long(&p, &left, &val_b,
9f977fb7
OP
1409 &neg, tr_b, sizeof(tr_b),
1410 &c);
3116ad38
ES
1411 /*
1412 * If we consumed all of a truncated buffer or
1413 * then stop here, reset, & come back for more.
1414 */
1415 if (!left && skipped) {
1416 left = saved_left;
1417 break;
1418 }
1419
9f977fb7
OP
1420 if (err)
1421 break;
1422 if (val_b >= bitmap_len || neg ||
1423 val_a > val_b) {
1424 err = -EINVAL;
1425 break;
1426 }
1427 if (left) {
70f6cbb6 1428 p++;
9f977fb7
OP
1429 left--;
1430 }
1431 }
1432
5a04cca6 1433 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
70f6cbb6 1434 proc_skip_char(&p, &left, '\n');
9f977fb7 1435 }
3116ad38 1436 left += skipped;
9f977fb7
OP
1437 } else {
1438 unsigned long bit_a, bit_b = 0;
9a52c5f3 1439 bool first = 1;
9f977fb7
OP
1440
1441 while (left) {
1442 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1443 if (bit_a >= bitmap_len)
1444 break;
1445 bit_b = find_next_zero_bit(bitmap, bitmap_len,
1446 bit_a + 1) - 1;
1447
32927393
CH
1448 if (!first)
1449 proc_put_char(&buffer, &left, ',');
1450 proc_put_long(&buffer, &left, bit_a, false);
9f977fb7 1451 if (bit_a != bit_b) {
32927393
CH
1452 proc_put_char(&buffer, &left, '-');
1453 proc_put_long(&buffer, &left, bit_b, false);
9f977fb7
OP
1454 }
1455
1456 first = 0; bit_b++;
1457 }
32927393 1458 proc_put_char(&buffer, &left, '\n');
9f977fb7
OP
1459 }
1460
1461 if (!err) {
1462 if (write) {
1463 if (*ppos)
1464 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1465 else
5a04cca6 1466 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
9f977fb7 1467 }
9f977fb7
OP
1468 *lenp -= left;
1469 *ppos += *lenp;
9f977fb7 1470 }
f9eb2fdd 1471
475dae38 1472 bitmap_free(tmp_bitmap);
f9eb2fdd 1473 return err;
9f977fb7
OP
1474}
1475
55610500 1476#else /* CONFIG_PROC_SYSCTL */
1da177e4 1477
8d65af78 1478int proc_dostring(struct ctl_table *table, int write,
32927393 1479 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1480{
1481 return -ENOSYS;
1482}
1483
a2071573
JH
1484int proc_dobool(struct ctl_table *table, int write,
1485 void *buffer, size_t *lenp, loff_t *ppos)
1486{
1487 return -ENOSYS;
1488}
1489
f461d2dc 1490int proc_dointvec(struct ctl_table *table, int write,
32927393 1491 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1492{
1493 return -ENOSYS;
1494}
1495
1496int proc_douintvec(struct ctl_table *table, int write,
32927393 1497 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1498{
1499 return -ENOSYS;
1500}
1501
1502int proc_dointvec_minmax(struct ctl_table *table, int write,
32927393 1503 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1504{
1505 return -ENOSYS;
1506}
1507
1508int proc_douintvec_minmax(struct ctl_table *table, int write,
32927393 1509 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1510{
1511 return -ENOSYS;
9f977fb7
OP
1512}
1513
cb944413
ED
1514int proc_dou8vec_minmax(struct ctl_table *table, int write,
1515 void *buffer, size_t *lenp, loff_t *ppos)
1516{
1517 return -ENOSYS;
1518}
1519
f461d2dc 1520int proc_dointvec_jiffies(struct ctl_table *table, int write,
32927393 1521 void *buffer, size_t *lenp, loff_t *ppos)
f461d2dc
CH
1522{
1523 return -ENOSYS;
1524}
1da177e4 1525
f461d2dc 1526int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
32927393 1527 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1528{
1529 return -ENOSYS;
1530}
1531
f461d2dc 1532int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
32927393 1533 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1534{
1535 return -ENOSYS;
1536}
1537
f461d2dc 1538int proc_doulongvec_minmax(struct ctl_table *table, int write,
32927393 1539 void *buffer, size_t *lenp, loff_t *ppos)
e7d316a0
SAK
1540{
1541 return -ENOSYS;
1542}
1543
f461d2dc 1544int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
32927393 1545 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1546{
32927393 1547 return -ENOSYS;
1da177e4
LT
1548}
1549
f461d2dc 1550int proc_do_large_bitmap(struct ctl_table *table, int write,
32927393 1551 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
1552{
1553 return -ENOSYS;
1554}
1555
f461d2dc
CH
1556#endif /* CONFIG_PROC_SYSCTL */
1557
1558#if defined(CONFIG_SYSCTL)
1559int proc_do_static_key(struct ctl_table *table, int write,
32927393 1560 void *buffer, size_t *lenp, loff_t *ppos)
1da177e4 1561{
f461d2dc
CH
1562 struct static_key *key = (struct static_key *)table->data;
1563 static DEFINE_MUTEX(static_key_mutex);
1564 int val, ret;
1565 struct ctl_table tmp = {
1566 .data = &val,
1567 .maxlen = sizeof(val),
1568 .mode = table->mode,
1569 .extra1 = SYSCTL_ZERO,
1570 .extra2 = SYSCTL_ONE,
1571 };
1572
1573 if (write && !capable(CAP_SYS_ADMIN))
1574 return -EPERM;
1575
1576 mutex_lock(&static_key_mutex);
1577 val = static_key_enabled(key);
1578 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1579 if (write && !ret) {
1580 if (val)
1581 static_key_enable(key);
1582 else
1583 static_key_disable(key);
1584 }
1585 mutex_unlock(&static_key_mutex);
1586 return ret;
1da177e4
LT
1587}
1588
f461d2dc 1589static struct ctl_table kern_table[] = {
b7cc6ec7 1590#ifdef CONFIG_NUMA_BALANCING
f461d2dc
CH
1591 {
1592 .procname = "numa_balancing",
1593 .data = NULL, /* filled in by handler */
1594 .maxlen = sizeof(unsigned int),
1595 .mode = 0644,
1596 .proc_handler = sysctl_numa_balancing,
1597 .extra1 = SYSCTL_ZERO,
c574bbe9 1598 .extra2 = SYSCTL_FOUR,
f461d2dc
CH
1599 },
1600#endif /* CONFIG_NUMA_BALANCING */
f461d2dc
CH
1601 {
1602 .procname = "panic",
1603 .data = &panic_timeout,
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = proc_dointvec,
1607 },
f461d2dc
CH
1608#ifdef CONFIG_PROC_SYSCTL
1609 {
1610 .procname = "tainted",
1611 .maxlen = sizeof(long),
1612 .mode = 0644,
1613 .proc_handler = proc_taint,
1614 },
1615 {
1616 .procname = "sysctl_writes_strict",
1617 .data = &sysctl_writes_strict,
1618 .maxlen = sizeof(int),
1619 .mode = 0644,
1620 .proc_handler = proc_dointvec_minmax,
78e36f3b 1621 .extra1 = SYSCTL_NEG_ONE,
f461d2dc
CH
1622 .extra2 = SYSCTL_ONE,
1623 },
f461d2dc
CH
1624#endif
1625 {
1626 .procname = "print-fatal-signals",
1627 .data = &print_fatal_signals,
1628 .maxlen = sizeof(int),
1629 .mode = 0644,
1630 .proc_handler = proc_dointvec,
1631 },
1632#ifdef CONFIG_SPARC
1633 {
1634 .procname = "reboot-cmd",
1635 .data = reboot_command,
1636 .maxlen = 256,
1637 .mode = 0644,
1638 .proc_handler = proc_dostring,
1639 },
1640 {
1641 .procname = "stop-a",
1642 .data = &stop_a_enabled,
1643 .maxlen = sizeof (int),
1644 .mode = 0644,
1645 .proc_handler = proc_dointvec,
1646 },
1647 {
1648 .procname = "scons-poweroff",
1649 .data = &scons_pwroff,
1650 .maxlen = sizeof (int),
1651 .mode = 0644,
1652 .proc_handler = proc_dointvec,
1653 },
1654#endif
1655#ifdef CONFIG_SPARC64
1656 {
1657 .procname = "tsb-ratio",
1658 .data = &sysctl_tsb_ratio,
1659 .maxlen = sizeof (int),
1660 .mode = 0644,
1661 .proc_handler = proc_dointvec,
1662 },
1663#endif
1664#ifdef CONFIG_PARISC
1665 {
1666 .procname = "soft-power",
1667 .data = &pwrsw_enabled,
1668 .maxlen = sizeof (int),
1669 .mode = 0644,
1670 .proc_handler = proc_dointvec,
1671 },
1672#endif
1673#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1674 {
1675 .procname = "unaligned-trap",
1676 .data = &unaligned_enabled,
1677 .maxlen = sizeof (int),
1678 .mode = 0644,
1679 .proc_handler = proc_dointvec,
1680 },
f461d2dc
CH
1681#endif
1682#ifdef CONFIG_STACK_TRACER
1683 {
1684 .procname = "stack_tracer_enabled",
1685 .data = &stack_tracer_enabled,
1686 .maxlen = sizeof(int),
1687 .mode = 0644,
1688 .proc_handler = stack_trace_sysctl,
1689 },
1690#endif
1691#ifdef CONFIG_TRACING
1692 {
1693 .procname = "ftrace_dump_on_oops",
1694 .data = &ftrace_dump_on_oops,
1695 .maxlen = sizeof(int),
1696 .mode = 0644,
1697 .proc_handler = proc_dointvec,
1698 },
1699 {
1700 .procname = "traceoff_on_warning",
1701 .data = &__disable_trace_on_warning,
1702 .maxlen = sizeof(__disable_trace_on_warning),
1703 .mode = 0644,
1704 .proc_handler = proc_dointvec,
1705 },
1706 {
1707 .procname = "tracepoint_printk",
1708 .data = &tracepoint_printk,
1709 .maxlen = sizeof(tracepoint_printk),
1710 .mode = 0644,
1711 .proc_handler = tracepoint_printk_sysctl,
1712 },
1713#endif
f461d2dc
CH
1714#ifdef CONFIG_MODULES
1715 {
1716 .procname = "modprobe",
1717 .data = &modprobe_path,
1718 .maxlen = KMOD_PATH_LEN,
1719 .mode = 0644,
1720 .proc_handler = proc_dostring,
1721 },
1722 {
1723 .procname = "modules_disabled",
1724 .data = &modules_disabled,
1725 .maxlen = sizeof(int),
1726 .mode = 0644,
1727 /* only handle a transition from default "0" to "1" */
1728 .proc_handler = proc_dointvec_minmax,
1729 .extra1 = SYSCTL_ONE,
1730 .extra2 = SYSCTL_ONE,
1731 },
1732#endif
1733#ifdef CONFIG_UEVENT_HELPER
1734 {
1735 .procname = "hotplug",
1736 .data = &uevent_helper,
1737 .maxlen = UEVENT_HELPER_PATH_LEN,
1738 .mode = 0644,
1739 .proc_handler = proc_dostring,
1740 },
1741#endif
f461d2dc
CH
1742#ifdef CONFIG_MAGIC_SYSRQ
1743 {
1744 .procname = "sysrq",
1745 .data = NULL,
1746 .maxlen = sizeof (int),
1747 .mode = 0644,
1748 .proc_handler = sysrq_sysctl_handler,
1749 },
1750#endif
1751#ifdef CONFIG_PROC_SYSCTL
1752 {
1753 .procname = "cad_pid",
1754 .data = NULL,
1755 .maxlen = sizeof (int),
1756 .mode = 0600,
1757 .proc_handler = proc_do_cad_pid,
1758 },
1759#endif
1760 {
1761 .procname = "threads-max",
1762 .data = NULL,
1763 .maxlen = sizeof(int),
1764 .mode = 0644,
1765 .proc_handler = sysctl_max_threads,
1766 },
f461d2dc
CH
1767 {
1768 .procname = "usermodehelper",
1769 .mode = 0555,
1770 .child = usermodehelper_table,
1771 },
f461d2dc
CH
1772 {
1773 .procname = "overflowuid",
1774 .data = &overflowuid,
1775 .maxlen = sizeof(int),
1776 .mode = 0644,
1777 .proc_handler = proc_dointvec_minmax,
2452dcb9 1778 .extra1 = SYSCTL_ZERO,
54771613 1779 .extra2 = SYSCTL_MAXOLDUID,
f461d2dc
CH
1780 },
1781 {
1782 .procname = "overflowgid",
1783 .data = &overflowgid,
1784 .maxlen = sizeof(int),
1785 .mode = 0644,
1786 .proc_handler = proc_dointvec_minmax,
2452dcb9 1787 .extra1 = SYSCTL_ZERO,
54771613 1788 .extra2 = SYSCTL_MAXOLDUID,
f461d2dc
CH
1789 },
1790#ifdef CONFIG_S390
1791 {
1792 .procname = "userprocess_debug",
1793 .data = &show_unhandled_signals,
1794 .maxlen = sizeof(int),
1795 .mode = 0644,
1796 .proc_handler = proc_dointvec,
1797 },
1798#endif
1799 {
1800 .procname = "pid_max",
1801 .data = &pid_max,
1802 .maxlen = sizeof (int),
1803 .mode = 0644,
1804 .proc_handler = proc_dointvec_minmax,
1805 .extra1 = &pid_max_min,
1806 .extra2 = &pid_max_max,
1807 },
1808 {
1809 .procname = "panic_on_oops",
1810 .data = &panic_on_oops,
1811 .maxlen = sizeof(int),
1812 .mode = 0644,
1813 .proc_handler = proc_dointvec,
1814 },
1815 {
1816 .procname = "panic_print",
1817 .data = &panic_print,
1818 .maxlen = sizeof(unsigned long),
1819 .mode = 0644,
1820 .proc_handler = proc_doulongvec_minmax,
1821 },
f461d2dc
CH
1822 {
1823 .procname = "ngroups_max",
f628867d 1824 .data = (void *)&ngroups_max,
f461d2dc
CH
1825 .maxlen = sizeof (int),
1826 .mode = 0444,
1827 .proc_handler = proc_dointvec,
1828 },
1829 {
1830 .procname = "cap_last_cap",
1831 .data = (void *)&cap_last_cap,
1832 .maxlen = sizeof(int),
1833 .mode = 0444,
1834 .proc_handler = proc_dointvec,
1835 },
f461d2dc
CH
1836#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1837 {
1838 .procname = "unknown_nmi_panic",
1839 .data = &unknown_nmi_panic,
1840 .maxlen = sizeof (int),
1841 .mode = 0644,
1842 .proc_handler = proc_dointvec,
1843 },
1844#endif
61d9b56a 1845
cb8e59cc
LT
1846#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1847 defined(CONFIG_DEBUG_STACKOVERFLOW)
f461d2dc 1848 {
cb8e59cc
LT
1849 .procname = "panic_on_stackoverflow",
1850 .data = &sysctl_panic_on_stackoverflow,
f461d2dc
CH
1851 .maxlen = sizeof(int),
1852 .mode = 0644,
1853 .proc_handler = proc_dointvec,
1854 },
cb8e59cc
LT
1855#endif
1856#if defined(CONFIG_X86)
f461d2dc 1857 {
cb8e59cc
LT
1858 .procname = "panic_on_unrecovered_nmi",
1859 .data = &panic_on_unrecovered_nmi,
f461d2dc
CH
1860 .maxlen = sizeof(int),
1861 .mode = 0644,
1862 .proc_handler = proc_dointvec,
1863 },
f461d2dc 1864 {
cb8e59cc
LT
1865 .procname = "panic_on_io_nmi",
1866 .data = &panic_on_io_nmi,
f461d2dc
CH
1867 .maxlen = sizeof(int),
1868 .mode = 0644,
1869 .proc_handler = proc_dointvec,
1870 },
f461d2dc
CH
1871 {
1872 .procname = "bootloader_type",
1873 .data = &bootloader_type,
1874 .maxlen = sizeof (int),
1875 .mode = 0444,
1876 .proc_handler = proc_dointvec,
1877 },
1878 {
1879 .procname = "bootloader_version",
1880 .data = &bootloader_version,
1881 .maxlen = sizeof (int),
1882 .mode = 0444,
1883 .proc_handler = proc_dointvec,
1884 },
1885 {
1886 .procname = "io_delay_type",
1887 .data = &io_delay_type,
1888 .maxlen = sizeof(int),
1889 .mode = 0644,
1890 .proc_handler = proc_dointvec,
1891 },
1892#endif
1893#if defined(CONFIG_MMU)
1894 {
1895 .procname = "randomize_va_space",
1896 .data = &randomize_va_space,
1897 .maxlen = sizeof(int),
1898 .mode = 0644,
1899 .proc_handler = proc_dointvec,
1900 },
1901#endif
1902#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1903 {
1904 .procname = "spin_retry",
1905 .data = &spin_retry,
1906 .maxlen = sizeof (int),
1907 .mode = 0644,
1908 .proc_handler = proc_dointvec,
1909 },
1910#endif
1911#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1912 {
1913 .procname = "acpi_video_flags",
1914 .data = &acpi_realmode_flags,
1915 .maxlen = sizeof (unsigned long),
1916 .mode = 0644,
1917 .proc_handler = proc_doulongvec_minmax,
1918 },
1919#endif
1920#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1921 {
1922 .procname = "ignore-unaligned-usertrap",
1923 .data = &no_unaligned_warning,
1924 .maxlen = sizeof (int),
1925 .mode = 0644,
1926 .proc_handler = proc_dointvec,
1927 },
1928#endif
1929#ifdef CONFIG_IA64
1930 {
1931 .procname = "unaligned-dump-stack",
1932 .data = &unaligned_dump_stack,
1933 .maxlen = sizeof (int),
1934 .mode = 0644,
1935 .proc_handler = proc_dointvec,
1936 },
1937#endif
f461d2dc
CH
1938#ifdef CONFIG_RT_MUTEXES
1939 {
1940 .procname = "max_lock_depth",
1941 .data = &max_lock_depth,
1942 .maxlen = sizeof(int),
1943 .mode = 0644,
1944 .proc_handler = proc_dointvec,
1945 },
1946#endif
f461d2dc
CH
1947#ifdef CONFIG_KEYS
1948 {
1949 .procname = "keys",
1950 .mode = 0555,
1951 .child = key_sysctls,
1952 },
1953#endif
1954#ifdef CONFIG_PERF_EVENTS
1955 /*
1956 * User-space scripts rely on the existence of this file
1957 * as a feature check for perf_events being enabled.
1958 *
1959 * So it's an ABI, do not remove!
1960 */
1961 {
1962 .procname = "perf_event_paranoid",
1963 .data = &sysctl_perf_event_paranoid,
1964 .maxlen = sizeof(sysctl_perf_event_paranoid),
1965 .mode = 0644,
1966 .proc_handler = proc_dointvec,
1967 },
1968 {
1969 .procname = "perf_event_mlock_kb",
1970 .data = &sysctl_perf_event_mlock,
1971 .maxlen = sizeof(sysctl_perf_event_mlock),
1972 .mode = 0644,
1973 .proc_handler = proc_dointvec,
1974 },
1975 {
1976 .procname = "perf_event_max_sample_rate",
1977 .data = &sysctl_perf_event_sample_rate,
1978 .maxlen = sizeof(sysctl_perf_event_sample_rate),
1979 .mode = 0644,
1980 .proc_handler = perf_proc_update_handler,
1981 .extra1 = SYSCTL_ONE,
1982 },
1983 {
1984 .procname = "perf_cpu_time_max_percent",
1985 .data = &sysctl_perf_cpu_time_max_percent,
1986 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
1987 .mode = 0644,
1988 .proc_handler = perf_cpu_time_max_percent_handler,
1989 .extra1 = SYSCTL_ZERO,
78e36f3b 1990 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
1991 },
1992 {
1993 .procname = "perf_event_max_stack",
1994 .data = &sysctl_perf_event_max_stack,
1995 .maxlen = sizeof(sysctl_perf_event_max_stack),
1996 .mode = 0644,
1997 .proc_handler = perf_event_max_stack_handler,
1998 .extra1 = SYSCTL_ZERO,
d73840ec 1999 .extra2 = (void *)&six_hundred_forty_kb,
f461d2dc
CH
2000 },
2001 {
2002 .procname = "perf_event_max_contexts_per_stack",
2003 .data = &sysctl_perf_event_max_contexts_per_stack,
2004 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack),
2005 .mode = 0644,
2006 .proc_handler = perf_event_max_stack_handler,
2007 .extra1 = SYSCTL_ZERO,
78e36f3b 2008 .extra2 = SYSCTL_ONE_THOUSAND,
f461d2dc
CH
2009 },
2010#endif
2011 {
2012 .procname = "panic_on_warn",
2013 .data = &panic_on_warn,
2014 .maxlen = sizeof(int),
2015 .mode = 0644,
2016 .proc_handler = proc_dointvec_minmax,
2017 .extra1 = SYSCTL_ZERO,
2018 .extra2 = SYSCTL_ONE,
2019 },
f461d2dc
CH
2020#if defined(CONFIG_TREE_RCU)
2021 {
2022 .procname = "panic_on_rcu_stall",
2023 .data = &sysctl_panic_on_rcu_stall,
2024 .maxlen = sizeof(sysctl_panic_on_rcu_stall),
2025 .mode = 0644,
2026 .proc_handler = proc_dointvec_minmax,
2027 .extra1 = SYSCTL_ZERO,
2028 .extra2 = SYSCTL_ONE,
2029 },
2030#endif
dfe56404 2031#if defined(CONFIG_TREE_RCU)
2032 {
2033 .procname = "max_rcu_stall_to_panic",
2034 .data = &sysctl_max_rcu_stall_to_panic,
2035 .maxlen = sizeof(sysctl_max_rcu_stall_to_panic),
2036 .mode = 0644,
2037 .proc_handler = proc_dointvec_minmax,
2038 .extra1 = SYSCTL_ONE,
2039 .extra2 = SYSCTL_INT_MAX,
2040 },
f461d2dc
CH
2041#endif
2042 { }
2043};
1da177e4 2044
f461d2dc
CH
2045static struct ctl_table vm_table[] = {
2046 {
2047 .procname = "overcommit_memory",
2048 .data = &sysctl_overcommit_memory,
2049 .maxlen = sizeof(sysctl_overcommit_memory),
2050 .mode = 0644,
56f3547b 2051 .proc_handler = overcommit_policy_handler,
f461d2dc 2052 .extra1 = SYSCTL_ZERO,
78e36f3b 2053 .extra2 = SYSCTL_TWO,
f461d2dc 2054 },
f461d2dc
CH
2055 {
2056 .procname = "overcommit_ratio",
2057 .data = &sysctl_overcommit_ratio,
2058 .maxlen = sizeof(sysctl_overcommit_ratio),
2059 .mode = 0644,
2060 .proc_handler = overcommit_ratio_handler,
2061 },
2062 {
2063 .procname = "overcommit_kbytes",
2064 .data = &sysctl_overcommit_kbytes,
2065 .maxlen = sizeof(sysctl_overcommit_kbytes),
2066 .mode = 0644,
2067 .proc_handler = overcommit_kbytes_handler,
2068 },
2069 {
2070 .procname = "page-cluster",
2071 .data = &page_cluster,
2072 .maxlen = sizeof(int),
2073 .mode = 0644,
2074 .proc_handler = proc_dointvec_minmax,
2075 .extra1 = SYSCTL_ZERO,
2076 },
f461d2dc
CH
2077 {
2078 .procname = "dirtytime_expire_seconds",
2079 .data = &dirtytime_expire_interval,
2080 .maxlen = sizeof(dirtytime_expire_interval),
2081 .mode = 0644,
2082 .proc_handler = dirtytime_interval_handler,
2083 .extra1 = SYSCTL_ZERO,
2084 },
2085 {
2086 .procname = "swappiness",
2087 .data = &vm_swappiness,
2088 .maxlen = sizeof(vm_swappiness),
2089 .mode = 0644,
2090 .proc_handler = proc_dointvec_minmax,
2091 .extra1 = SYSCTL_ZERO,
78e36f3b 2092 .extra2 = SYSCTL_TWO_HUNDRED,
f461d2dc
CH
2093 },
2094#ifdef CONFIG_HUGETLB_PAGE
2095 {
2096 .procname = "nr_hugepages",
2097 .data = NULL,
2098 .maxlen = sizeof(unsigned long),
2099 .mode = 0644,
2100 .proc_handler = hugetlb_sysctl_handler,
2101 },
2102#ifdef CONFIG_NUMA
2103 {
2104 .procname = "nr_hugepages_mempolicy",
2105 .data = NULL,
2106 .maxlen = sizeof(unsigned long),
2107 .mode = 0644,
2108 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
2109 },
2110 {
2111 .procname = "numa_stat",
2112 .data = &sysctl_vm_numa_stat,
2113 .maxlen = sizeof(int),
2114 .mode = 0644,
2115 .proc_handler = sysctl_vm_numa_stat_handler,
2116 .extra1 = SYSCTL_ZERO,
2117 .extra2 = SYSCTL_ONE,
2118 },
2119#endif
2120 {
2121 .procname = "hugetlb_shm_group",
2122 .data = &sysctl_hugetlb_shm_group,
2123 .maxlen = sizeof(gid_t),
2124 .mode = 0644,
2125 .proc_handler = proc_dointvec,
2126 },
2127 {
2128 .procname = "nr_overcommit_hugepages",
2129 .data = NULL,
2130 .maxlen = sizeof(unsigned long),
2131 .mode = 0644,
2132 .proc_handler = hugetlb_overcommit_handler,
2133 },
2134#endif
2135 {
2136 .procname = "lowmem_reserve_ratio",
2137 .data = &sysctl_lowmem_reserve_ratio,
2138 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
2139 .mode = 0644,
2140 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
2141 },
2142 {
2143 .procname = "drop_caches",
2144 .data = &sysctl_drop_caches,
2145 .maxlen = sizeof(int),
2146 .mode = 0200,
2147 .proc_handler = drop_caches_sysctl_handler,
2148 .extra1 = SYSCTL_ONE,
78e36f3b 2149 .extra2 = SYSCTL_FOUR,
f461d2dc
CH
2150 },
2151#ifdef CONFIG_COMPACTION
2152 {
2153 .procname = "compact_memory",
ef498438 2154 .data = NULL,
f461d2dc
CH
2155 .maxlen = sizeof(int),
2156 .mode = 0200,
2157 .proc_handler = sysctl_compaction_handler,
2158 },
facdaa91
NG
2159 {
2160 .procname = "compaction_proactiveness",
2161 .data = &sysctl_compaction_proactiveness,
d34c0a75 2162 .maxlen = sizeof(sysctl_compaction_proactiveness),
facdaa91 2163 .mode = 0644,
65d759c8 2164 .proc_handler = compaction_proactiveness_sysctl_handler,
facdaa91 2165 .extra1 = SYSCTL_ZERO,
78e36f3b 2166 .extra2 = SYSCTL_ONE_HUNDRED,
facdaa91 2167 },
f461d2dc
CH
2168 {
2169 .procname = "extfrag_threshold",
2170 .data = &sysctl_extfrag_threshold,
2171 .maxlen = sizeof(int),
2172 .mode = 0644,
2173 .proc_handler = proc_dointvec_minmax,
2452dcb9 2174 .extra1 = SYSCTL_ZERO,
d73840ec 2175 .extra2 = (void *)&max_extfrag_threshold,
f461d2dc
CH
2176 },
2177 {
2178 .procname = "compact_unevictable_allowed",
2179 .data = &sysctl_compact_unevictable_allowed,
2180 .maxlen = sizeof(int),
2181 .mode = 0644,
2182 .proc_handler = proc_dointvec_minmax_warn_RT_change,
2183 .extra1 = SYSCTL_ZERO,
2184 .extra2 = SYSCTL_ONE,
2185 },
1da177e4 2186
f461d2dc
CH
2187#endif /* CONFIG_COMPACTION */
2188 {
2189 .procname = "min_free_kbytes",
2190 .data = &min_free_kbytes,
2191 .maxlen = sizeof(min_free_kbytes),
2192 .mode = 0644,
2193 .proc_handler = min_free_kbytes_sysctl_handler,
2194 .extra1 = SYSCTL_ZERO,
2195 },
2196 {
2197 .procname = "watermark_boost_factor",
2198 .data = &watermark_boost_factor,
2199 .maxlen = sizeof(watermark_boost_factor),
2200 .mode = 0644,
2201 .proc_handler = proc_dointvec_minmax,
2202 .extra1 = SYSCTL_ZERO,
2203 },
2204 {
2205 .procname = "watermark_scale_factor",
2206 .data = &watermark_scale_factor,
2207 .maxlen = sizeof(watermark_scale_factor),
2208 .mode = 0644,
2209 .proc_handler = watermark_scale_factor_sysctl_handler,
2210 .extra1 = SYSCTL_ONE,
78e36f3b 2211 .extra2 = SYSCTL_THREE_THOUSAND,
f461d2dc
CH
2212 },
2213 {
74f44822
MG
2214 .procname = "percpu_pagelist_high_fraction",
2215 .data = &percpu_pagelist_high_fraction,
2216 .maxlen = sizeof(percpu_pagelist_high_fraction),
f461d2dc 2217 .mode = 0644,
74f44822 2218 .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
f461d2dc
CH
2219 .extra1 = SYSCTL_ZERO,
2220 },
5ef64cc8
LT
2221 {
2222 .procname = "page_lock_unfairness",
2223 .data = &sysctl_page_lock_unfairness,
2224 .maxlen = sizeof(sysctl_page_lock_unfairness),
2225 .mode = 0644,
2226 .proc_handler = proc_dointvec_minmax,
2227 .extra1 = SYSCTL_ZERO,
2228 },
f461d2dc
CH
2229#ifdef CONFIG_MMU
2230 {
2231 .procname = "max_map_count",
2232 .data = &sysctl_max_map_count,
2233 .maxlen = sizeof(sysctl_max_map_count),
2234 .mode = 0644,
2235 .proc_handler = proc_dointvec_minmax,
2236 .extra1 = SYSCTL_ZERO,
2237 },
2238#else
2239 {
2240 .procname = "nr_trim_pages",
2241 .data = &sysctl_nr_trim_pages,
2242 .maxlen = sizeof(sysctl_nr_trim_pages),
2243 .mode = 0644,
2244 .proc_handler = proc_dointvec_minmax,
2245 .extra1 = SYSCTL_ZERO,
2246 },
2247#endif
f461d2dc
CH
2248 {
2249 .procname = "vfs_cache_pressure",
2250 .data = &sysctl_vfs_cache_pressure,
2251 .maxlen = sizeof(sysctl_vfs_cache_pressure),
2252 .mode = 0644,
3b3376f2 2253 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2254 .extra1 = SYSCTL_ZERO,
2255 },
2256#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2257 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2258 {
2259 .procname = "legacy_va_layout",
2260 .data = &sysctl_legacy_va_layout,
2261 .maxlen = sizeof(sysctl_legacy_va_layout),
2262 .mode = 0644,
3b3376f2 2263 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2264 .extra1 = SYSCTL_ZERO,
2265 },
2266#endif
2267#ifdef CONFIG_NUMA
2268 {
2269 .procname = "zone_reclaim_mode",
2270 .data = &node_reclaim_mode,
2271 .maxlen = sizeof(node_reclaim_mode),
2272 .mode = 0644,
3b3376f2 2273 .proc_handler = proc_dointvec_minmax,
f461d2dc
CH
2274 .extra1 = SYSCTL_ZERO,
2275 },
2276 {
2277 .procname = "min_unmapped_ratio",
2278 .data = &sysctl_min_unmapped_ratio,
2279 .maxlen = sizeof(sysctl_min_unmapped_ratio),
2280 .mode = 0644,
2281 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
2282 .extra1 = SYSCTL_ZERO,
78e36f3b 2283 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
2284 },
2285 {
2286 .procname = "min_slab_ratio",
2287 .data = &sysctl_min_slab_ratio,
2288 .maxlen = sizeof(sysctl_min_slab_ratio),
2289 .mode = 0644,
2290 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
2291 .extra1 = SYSCTL_ZERO,
78e36f3b 2292 .extra2 = SYSCTL_ONE_HUNDRED,
f461d2dc
CH
2293 },
2294#endif
2295#ifdef CONFIG_SMP
2296 {
2297 .procname = "stat_interval",
2298 .data = &sysctl_stat_interval,
2299 .maxlen = sizeof(sysctl_stat_interval),
2300 .mode = 0644,
2301 .proc_handler = proc_dointvec_jiffies,
2302 },
2303 {
2304 .procname = "stat_refresh",
2305 .data = NULL,
2306 .maxlen = 0,
2307 .mode = 0600,
2308 .proc_handler = vmstat_refresh,
2309 },
2310#endif
2311#ifdef CONFIG_MMU
2312 {
2313 .procname = "mmap_min_addr",
2314 .data = &dac_mmap_min_addr,
2315 .maxlen = sizeof(unsigned long),
2316 .mode = 0644,
2317 .proc_handler = mmap_min_addr_handler,
2318 },
2319#endif
2320#ifdef CONFIG_NUMA
2321 {
2322 .procname = "numa_zonelist_order",
2323 .data = &numa_zonelist_order,
2324 .maxlen = NUMA_ZONELIST_ORDER_LEN,
2325 .mode = 0644,
2326 .proc_handler = numa_zonelist_order_handler,
2327 },
2328#endif
2329#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2330 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2331 {
2332 .procname = "vdso_enabled",
2333#ifdef CONFIG_X86_32
2334 .data = &vdso32_enabled,
2335 .maxlen = sizeof(vdso32_enabled),
2336#else
2337 .data = &vdso_enabled,
2338 .maxlen = sizeof(vdso_enabled),
2339#endif
2340 .mode = 0644,
2341 .proc_handler = proc_dointvec,
2342 .extra1 = SYSCTL_ZERO,
2343 },
2344#endif
f461d2dc
CH
2345#ifdef CONFIG_MEMORY_FAILURE
2346 {
2347 .procname = "memory_failure_early_kill",
2348 .data = &sysctl_memory_failure_early_kill,
2349 .maxlen = sizeof(sysctl_memory_failure_early_kill),
2350 .mode = 0644,
2351 .proc_handler = proc_dointvec_minmax,
2352 .extra1 = SYSCTL_ZERO,
2353 .extra2 = SYSCTL_ONE,
2354 },
2355 {
2356 .procname = "memory_failure_recovery",
2357 .data = &sysctl_memory_failure_recovery,
2358 .maxlen = sizeof(sysctl_memory_failure_recovery),
2359 .mode = 0644,
2360 .proc_handler = proc_dointvec_minmax,
2361 .extra1 = SYSCTL_ZERO,
2362 .extra2 = SYSCTL_ONE,
2363 },
2364#endif
2365 {
2366 .procname = "user_reserve_kbytes",
2367 .data = &sysctl_user_reserve_kbytes,
2368 .maxlen = sizeof(sysctl_user_reserve_kbytes),
2369 .mode = 0644,
2370 .proc_handler = proc_doulongvec_minmax,
2371 },
2372 {
2373 .procname = "admin_reserve_kbytes",
2374 .data = &sysctl_admin_reserve_kbytes,
2375 .maxlen = sizeof(sysctl_admin_reserve_kbytes),
2376 .mode = 0644,
2377 .proc_handler = proc_doulongvec_minmax,
2378 },
2379#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2380 {
2381 .procname = "mmap_rnd_bits",
2382 .data = &mmap_rnd_bits,
2383 .maxlen = sizeof(mmap_rnd_bits),
2384 .mode = 0600,
2385 .proc_handler = proc_dointvec_minmax,
2386 .extra1 = (void *)&mmap_rnd_bits_min,
2387 .extra2 = (void *)&mmap_rnd_bits_max,
2388 },
2389#endif
2390#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2391 {
2392 .procname = "mmap_rnd_compat_bits",
2393 .data = &mmap_rnd_compat_bits,
2394 .maxlen = sizeof(mmap_rnd_compat_bits),
2395 .mode = 0600,
2396 .proc_handler = proc_dointvec_minmax,
2397 .extra1 = (void *)&mmap_rnd_compat_bits_min,
2398 .extra2 = (void *)&mmap_rnd_compat_bits_max,
2399 },
2400#endif
2401#ifdef CONFIG_USERFAULTFD
2402 {
2403 .procname = "unprivileged_userfaultfd",
2404 .data = &sysctl_unprivileged_userfaultfd,
2405 .maxlen = sizeof(sysctl_unprivileged_userfaultfd),
2406 .mode = 0644,
2407 .proc_handler = proc_dointvec_minmax,
2408 .extra1 = SYSCTL_ZERO,
2409 .extra2 = SYSCTL_ONE,
2410 },
2411#endif
2412 { }
2413};
1da177e4 2414
f461d2dc
CH
2415static struct ctl_table debug_table[] = {
2416#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2417 {
2418 .procname = "exception-trace",
2419 .data = &show_unhandled_signals,
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
2422 .proc_handler = proc_dointvec
2423 },
f461d2dc
CH
2424#endif
2425 { }
2426};
1da177e4 2427
f461d2dc
CH
2428static struct ctl_table dev_table[] = {
2429 { }
2430};
1da177e4 2431
51cb8dfc
LC
2432DECLARE_SYSCTL_BASE(kernel, kern_table);
2433DECLARE_SYSCTL_BASE(vm, vm_table);
51cb8dfc
LC
2434DECLARE_SYSCTL_BASE(debug, debug_table);
2435DECLARE_SYSCTL_BASE(dev, dev_table);
1da177e4 2436
d8c0418a 2437int __init sysctl_init_bases(void)
492ecee8 2438{
51cb8dfc
LC
2439 register_sysctl_base(kernel);
2440 register_sysctl_base(vm);
51cb8dfc
LC
2441 register_sysctl_base(debug);
2442 register_sysctl_base(dev);
492ecee8 2443
f461d2dc 2444 return 0;
492ecee8 2445}
f461d2dc 2446#endif /* CONFIG_SYSCTL */
1da177e4
LT
2447/*
2448 * No sense putting this after each symbol definition, twice,
2449 * exception granted :-)
2450 */
a2071573 2451EXPORT_SYMBOL(proc_dobool);
1da177e4 2452EXPORT_SYMBOL(proc_dointvec);
e7d316a0 2453EXPORT_SYMBOL(proc_douintvec);
1da177e4
LT
2454EXPORT_SYMBOL(proc_dointvec_jiffies);
2455EXPORT_SYMBOL(proc_dointvec_minmax);
61d9b56a 2456EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
1da177e4
LT
2457EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2458EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2459EXPORT_SYMBOL(proc_dostring);
2460EXPORT_SYMBOL(proc_doulongvec_minmax);
2461EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
0bc19985 2462EXPORT_SYMBOL(proc_do_large_bitmap);