Merge tag 'fuse-update-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
[sfrench/cifs-2.6.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include "../lib/kstrtox.h"
71
72 #include <linux/uaccess.h>
73 #include <asm/processor.h>
74
75 #ifdef CONFIG_X86
76 #include <asm/nmi.h>
77 #include <asm/stacktrace.h>
78 #include <asm/io.h>
79 #endif
80 #ifdef CONFIG_SPARC
81 #include <asm/setup.h>
82 #endif
83 #ifdef CONFIG_BSD_PROCESS_ACCT
84 #include <linux/acct.h>
85 #endif
86 #ifdef CONFIG_RT_MUTEXES
87 #include <linux/rtmutex.h>
88 #endif
89 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
90 #include <linux/lockdep.h>
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 #include <scsi/sg.h>
94 #endif
95 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
96 #include <linux/stackleak.h>
97 #endif
98 #ifdef CONFIG_LOCKUP_DETECTOR
99 #include <linux/nmi.h>
100 #endif
101
102 #if defined(CONFIG_SYSCTL)
103
104 /* External variables not in a header file. */
105 extern int suid_dumpable;
106 #ifdef CONFIG_COREDUMP
107 extern int core_uses_pid;
108 extern char core_pattern[];
109 extern unsigned int core_pipe_limit;
110 #endif
111 extern int pid_max;
112 extern int pid_max_min, pid_max_max;
113 extern int percpu_pagelist_fraction;
114 extern int latencytop_enabled;
115 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
116 #ifndef CONFIG_MMU
117 extern int sysctl_nr_trim_pages;
118 #endif
119
120 /* Constants used for minimum and  maximum */
121 #ifdef CONFIG_LOCKUP_DETECTOR
122 static int sixty = 60;
123 #endif
124
125 static int __maybe_unused neg_one = -1;
126
127 static int zero;
128 static int __maybe_unused one = 1;
129 static int __maybe_unused two = 2;
130 static int __maybe_unused four = 4;
131 static unsigned long zero_ul;
132 static unsigned long one_ul = 1;
133 static unsigned long long_max = LONG_MAX;
134 static int one_hundred = 100;
135 static int one_thousand = 1000;
136 #ifdef CONFIG_PRINTK
137 static int ten_thousand = 10000;
138 #endif
139 #ifdef CONFIG_PERF_EVENTS
140 static int six_hundred_forty_kb = 640 * 1024;
141 #endif
142
143 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
144 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
145
146 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
147 static int maxolduid = 65535;
148 static int minolduid;
149
150 static int ngroups_max = NGROUPS_MAX;
151 static const int cap_last_cap = CAP_LAST_CAP;
152
153 /*
154  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
155  * and hung_task_check_interval_secs
156  */
157 #ifdef CONFIG_DETECT_HUNG_TASK
158 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
159 #endif
160
161 #ifdef CONFIG_INOTIFY_USER
162 #include <linux/inotify.h>
163 #endif
164 #ifdef CONFIG_SPARC
165 #endif
166
167 #ifdef __hppa__
168 extern int pwrsw_enabled;
169 #endif
170
171 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
172 extern int unaligned_enabled;
173 #endif
174
175 #ifdef CONFIG_IA64
176 extern int unaligned_dump_stack;
177 #endif
178
179 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
180 extern int no_unaligned_warning;
181 #endif
182
183 #ifdef CONFIG_PROC_SYSCTL
184
185 /**
186  * enum sysctl_writes_mode - supported sysctl write modes
187  *
188  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
189  *      to be written, and multiple writes on the same sysctl file descriptor
190  *      will rewrite the sysctl value, regardless of file position. No warning
191  *      is issued when the initial position is not 0.
192  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
193  *      not 0.
194  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
195  *      file position 0 and the value must be fully contained in the buffer
196  *      sent to the write syscall. If dealing with strings respect the file
197  *      position, but restrict this to the max length of the buffer, anything
198  *      passed the max lenght will be ignored. Multiple writes will append
199  *      to the buffer.
200  *
201  * These write modes control how current file position affects the behavior of
202  * updating sysctl values through the proc interface on each write.
203  */
204 enum sysctl_writes_mode {
205         SYSCTL_WRITES_LEGACY            = -1,
206         SYSCTL_WRITES_WARN              = 0,
207         SYSCTL_WRITES_STRICT            = 1,
208 };
209
210 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
211
212 static int proc_do_cad_pid(struct ctl_table *table, int write,
213                   void __user *buffer, size_t *lenp, loff_t *ppos);
214 static int proc_taint(struct ctl_table *table, int write,
215                                void __user *buffer, size_t *lenp, loff_t *ppos);
216 #endif
217
218 #ifdef CONFIG_PRINTK
219 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
220                                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #endif
222
223 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
224                 void __user *buffer, size_t *lenp, loff_t *ppos);
225 #ifdef CONFIG_COREDUMP
226 static int proc_dostring_coredump(struct ctl_table *table, int write,
227                 void __user *buffer, size_t *lenp, loff_t *ppos);
228 #endif
229 static int proc_dopipe_max_size(struct ctl_table *table, int write,
230                 void __user *buffer, size_t *lenp, loff_t *ppos);
231 #ifdef CONFIG_BPF_SYSCALL
232 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
233                                           void __user *buffer, size_t *lenp,
234                                           loff_t *ppos);
235 #endif
236
237 #ifdef CONFIG_MAGIC_SYSRQ
238 /* Note: sysrq code uses its own private copy */
239 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
240
241 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
242                                 void __user *buffer, size_t *lenp,
243                                 loff_t *ppos)
244 {
245         int error;
246
247         error = proc_dointvec(table, write, buffer, lenp, ppos);
248         if (error)
249                 return error;
250
251         if (write)
252                 sysrq_toggle_support(__sysrq_enabled);
253
254         return 0;
255 }
256
257 #endif
258
259 static struct ctl_table kern_table[];
260 static struct ctl_table vm_table[];
261 static struct ctl_table fs_table[];
262 static struct ctl_table debug_table[];
263 static struct ctl_table dev_table[];
264 extern struct ctl_table random_table[];
265 #ifdef CONFIG_EPOLL
266 extern struct ctl_table epoll_table[];
267 #endif
268
269 #ifdef CONFIG_FW_LOADER_USER_HELPER
270 extern struct ctl_table firmware_config_table[];
271 #endif
272
273 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
274 int sysctl_legacy_va_layout;
275 #endif
276
277 /* The default sysctl tables: */
278
279 static struct ctl_table sysctl_base_table[] = {
280         {
281                 .procname       = "kernel",
282                 .mode           = 0555,
283                 .child          = kern_table,
284         },
285         {
286                 .procname       = "vm",
287                 .mode           = 0555,
288                 .child          = vm_table,
289         },
290         {
291                 .procname       = "fs",
292                 .mode           = 0555,
293                 .child          = fs_table,
294         },
295         {
296                 .procname       = "debug",
297                 .mode           = 0555,
298                 .child          = debug_table,
299         },
300         {
301                 .procname       = "dev",
302                 .mode           = 0555,
303                 .child          = dev_table,
304         },
305         { }
306 };
307
308 #ifdef CONFIG_SCHED_DEBUG
309 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
310 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
311 static int min_wakeup_granularity_ns;                   /* 0 usecs */
312 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
313 #ifdef CONFIG_SMP
314 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
315 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
316 #endif /* CONFIG_SMP */
317 #endif /* CONFIG_SCHED_DEBUG */
318
319 #ifdef CONFIG_COMPACTION
320 static int min_extfrag_threshold;
321 static int max_extfrag_threshold = 1000;
322 #endif
323
324 static struct ctl_table kern_table[] = {
325         {
326                 .procname       = "sched_child_runs_first",
327                 .data           = &sysctl_sched_child_runs_first,
328                 .maxlen         = sizeof(unsigned int),
329                 .mode           = 0644,
330                 .proc_handler   = proc_dointvec,
331         },
332 #ifdef CONFIG_SCHED_DEBUG
333         {
334                 .procname       = "sched_min_granularity_ns",
335                 .data           = &sysctl_sched_min_granularity,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = sched_proc_update_handler,
339                 .extra1         = &min_sched_granularity_ns,
340                 .extra2         = &max_sched_granularity_ns,
341         },
342         {
343                 .procname       = "sched_latency_ns",
344                 .data           = &sysctl_sched_latency,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = sched_proc_update_handler,
348                 .extra1         = &min_sched_granularity_ns,
349                 .extra2         = &max_sched_granularity_ns,
350         },
351         {
352                 .procname       = "sched_wakeup_granularity_ns",
353                 .data           = &sysctl_sched_wakeup_granularity,
354                 .maxlen         = sizeof(unsigned int),
355                 .mode           = 0644,
356                 .proc_handler   = sched_proc_update_handler,
357                 .extra1         = &min_wakeup_granularity_ns,
358                 .extra2         = &max_wakeup_granularity_ns,
359         },
360 #ifdef CONFIG_SMP
361         {
362                 .procname       = "sched_tunable_scaling",
363                 .data           = &sysctl_sched_tunable_scaling,
364                 .maxlen         = sizeof(enum sched_tunable_scaling),
365                 .mode           = 0644,
366                 .proc_handler   = sched_proc_update_handler,
367                 .extra1         = &min_sched_tunable_scaling,
368                 .extra2         = &max_sched_tunable_scaling,
369         },
370         {
371                 .procname       = "sched_migration_cost_ns",
372                 .data           = &sysctl_sched_migration_cost,
373                 .maxlen         = sizeof(unsigned int),
374                 .mode           = 0644,
375                 .proc_handler   = proc_dointvec,
376         },
377         {
378                 .procname       = "sched_nr_migrate",
379                 .data           = &sysctl_sched_nr_migrate,
380                 .maxlen         = sizeof(unsigned int),
381                 .mode           = 0644,
382                 .proc_handler   = proc_dointvec,
383         },
384 #ifdef CONFIG_SCHEDSTATS
385         {
386                 .procname       = "sched_schedstats",
387                 .data           = NULL,
388                 .maxlen         = sizeof(unsigned int),
389                 .mode           = 0644,
390                 .proc_handler   = sysctl_schedstats,
391                 .extra1         = &zero,
392                 .extra2         = &one,
393         },
394 #endif /* CONFIG_SCHEDSTATS */
395 #endif /* CONFIG_SMP */
396 #ifdef CONFIG_NUMA_BALANCING
397         {
398                 .procname       = "numa_balancing_scan_delay_ms",
399                 .data           = &sysctl_numa_balancing_scan_delay,
400                 .maxlen         = sizeof(unsigned int),
401                 .mode           = 0644,
402                 .proc_handler   = proc_dointvec,
403         },
404         {
405                 .procname       = "numa_balancing_scan_period_min_ms",
406                 .data           = &sysctl_numa_balancing_scan_period_min,
407                 .maxlen         = sizeof(unsigned int),
408                 .mode           = 0644,
409                 .proc_handler   = proc_dointvec,
410         },
411         {
412                 .procname       = "numa_balancing_scan_period_max_ms",
413                 .data           = &sysctl_numa_balancing_scan_period_max,
414                 .maxlen         = sizeof(unsigned int),
415                 .mode           = 0644,
416                 .proc_handler   = proc_dointvec,
417         },
418         {
419                 .procname       = "numa_balancing_scan_size_mb",
420                 .data           = &sysctl_numa_balancing_scan_size,
421                 .maxlen         = sizeof(unsigned int),
422                 .mode           = 0644,
423                 .proc_handler   = proc_dointvec_minmax,
424                 .extra1         = &one,
425         },
426         {
427                 .procname       = "numa_balancing",
428                 .data           = NULL, /* filled in by handler */
429                 .maxlen         = sizeof(unsigned int),
430                 .mode           = 0644,
431                 .proc_handler   = sysctl_numa_balancing,
432                 .extra1         = &zero,
433                 .extra2         = &one,
434         },
435 #endif /* CONFIG_NUMA_BALANCING */
436 #endif /* CONFIG_SCHED_DEBUG */
437         {
438                 .procname       = "sched_rt_period_us",
439                 .data           = &sysctl_sched_rt_period,
440                 .maxlen         = sizeof(unsigned int),
441                 .mode           = 0644,
442                 .proc_handler   = sched_rt_handler,
443         },
444         {
445                 .procname       = "sched_rt_runtime_us",
446                 .data           = &sysctl_sched_rt_runtime,
447                 .maxlen         = sizeof(int),
448                 .mode           = 0644,
449                 .proc_handler   = sched_rt_handler,
450         },
451         {
452                 .procname       = "sched_rr_timeslice_ms",
453                 .data           = &sysctl_sched_rr_timeslice,
454                 .maxlen         = sizeof(int),
455                 .mode           = 0644,
456                 .proc_handler   = sched_rr_handler,
457         },
458 #ifdef CONFIG_SCHED_AUTOGROUP
459         {
460                 .procname       = "sched_autogroup_enabled",
461                 .data           = &sysctl_sched_autogroup_enabled,
462                 .maxlen         = sizeof(unsigned int),
463                 .mode           = 0644,
464                 .proc_handler   = proc_dointvec_minmax,
465                 .extra1         = &zero,
466                 .extra2         = &one,
467         },
468 #endif
469 #ifdef CONFIG_CFS_BANDWIDTH
470         {
471                 .procname       = "sched_cfs_bandwidth_slice_us",
472                 .data           = &sysctl_sched_cfs_bandwidth_slice,
473                 .maxlen         = sizeof(unsigned int),
474                 .mode           = 0644,
475                 .proc_handler   = proc_dointvec_minmax,
476                 .extra1         = &one,
477         },
478 #endif
479 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
480         {
481                 .procname       = "sched_energy_aware",
482                 .data           = &sysctl_sched_energy_aware,
483                 .maxlen         = sizeof(unsigned int),
484                 .mode           = 0644,
485                 .proc_handler   = sched_energy_aware_handler,
486                 .extra1         = &zero,
487                 .extra2         = &one,
488         },
489 #endif
490 #ifdef CONFIG_PROVE_LOCKING
491         {
492                 .procname       = "prove_locking",
493                 .data           = &prove_locking,
494                 .maxlen         = sizeof(int),
495                 .mode           = 0644,
496                 .proc_handler   = proc_dointvec,
497         },
498 #endif
499 #ifdef CONFIG_LOCK_STAT
500         {
501                 .procname       = "lock_stat",
502                 .data           = &lock_stat,
503                 .maxlen         = sizeof(int),
504                 .mode           = 0644,
505                 .proc_handler   = proc_dointvec,
506         },
507 #endif
508         {
509                 .procname       = "panic",
510                 .data           = &panic_timeout,
511                 .maxlen         = sizeof(int),
512                 .mode           = 0644,
513                 .proc_handler   = proc_dointvec,
514         },
515 #ifdef CONFIG_COREDUMP
516         {
517                 .procname       = "core_uses_pid",
518                 .data           = &core_uses_pid,
519                 .maxlen         = sizeof(int),
520                 .mode           = 0644,
521                 .proc_handler   = proc_dointvec,
522         },
523         {
524                 .procname       = "core_pattern",
525                 .data           = core_pattern,
526                 .maxlen         = CORENAME_MAX_SIZE,
527                 .mode           = 0644,
528                 .proc_handler   = proc_dostring_coredump,
529         },
530         {
531                 .procname       = "core_pipe_limit",
532                 .data           = &core_pipe_limit,
533                 .maxlen         = sizeof(unsigned int),
534                 .mode           = 0644,
535                 .proc_handler   = proc_dointvec,
536         },
537 #endif
538 #ifdef CONFIG_PROC_SYSCTL
539         {
540                 .procname       = "tainted",
541                 .maxlen         = sizeof(long),
542                 .mode           = 0644,
543                 .proc_handler   = proc_taint,
544         },
545         {
546                 .procname       = "sysctl_writes_strict",
547                 .data           = &sysctl_writes_strict,
548                 .maxlen         = sizeof(int),
549                 .mode           = 0644,
550                 .proc_handler   = proc_dointvec_minmax,
551                 .extra1         = &neg_one,
552                 .extra2         = &one,
553         },
554 #endif
555 #ifdef CONFIG_LATENCYTOP
556         {
557                 .procname       = "latencytop",
558                 .data           = &latencytop_enabled,
559                 .maxlen         = sizeof(int),
560                 .mode           = 0644,
561                 .proc_handler   = sysctl_latencytop,
562         },
563 #endif
564 #ifdef CONFIG_BLK_DEV_INITRD
565         {
566                 .procname       = "real-root-dev",
567                 .data           = &real_root_dev,
568                 .maxlen         = sizeof(int),
569                 .mode           = 0644,
570                 .proc_handler   = proc_dointvec,
571         },
572 #endif
573         {
574                 .procname       = "print-fatal-signals",
575                 .data           = &print_fatal_signals,
576                 .maxlen         = sizeof(int),
577                 .mode           = 0644,
578                 .proc_handler   = proc_dointvec,
579         },
580 #ifdef CONFIG_SPARC
581         {
582                 .procname       = "reboot-cmd",
583                 .data           = reboot_command,
584                 .maxlen         = 256,
585                 .mode           = 0644,
586                 .proc_handler   = proc_dostring,
587         },
588         {
589                 .procname       = "stop-a",
590                 .data           = &stop_a_enabled,
591                 .maxlen         = sizeof (int),
592                 .mode           = 0644,
593                 .proc_handler   = proc_dointvec,
594         },
595         {
596                 .procname       = "scons-poweroff",
597                 .data           = &scons_pwroff,
598                 .maxlen         = sizeof (int),
599                 .mode           = 0644,
600                 .proc_handler   = proc_dointvec,
601         },
602 #endif
603 #ifdef CONFIG_SPARC64
604         {
605                 .procname       = "tsb-ratio",
606                 .data           = &sysctl_tsb_ratio,
607                 .maxlen         = sizeof (int),
608                 .mode           = 0644,
609                 .proc_handler   = proc_dointvec,
610         },
611 #endif
612 #ifdef __hppa__
613         {
614                 .procname       = "soft-power",
615                 .data           = &pwrsw_enabled,
616                 .maxlen         = sizeof (int),
617                 .mode           = 0644,
618                 .proc_handler   = proc_dointvec,
619         },
620 #endif
621 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
622         {
623                 .procname       = "unaligned-trap",
624                 .data           = &unaligned_enabled,
625                 .maxlen         = sizeof (int),
626                 .mode           = 0644,
627                 .proc_handler   = proc_dointvec,
628         },
629 #endif
630         {
631                 .procname       = "ctrl-alt-del",
632                 .data           = &C_A_D,
633                 .maxlen         = sizeof(int),
634                 .mode           = 0644,
635                 .proc_handler   = proc_dointvec,
636         },
637 #ifdef CONFIG_FUNCTION_TRACER
638         {
639                 .procname       = "ftrace_enabled",
640                 .data           = &ftrace_enabled,
641                 .maxlen         = sizeof(int),
642                 .mode           = 0644,
643                 .proc_handler   = ftrace_enable_sysctl,
644         },
645 #endif
646 #ifdef CONFIG_STACK_TRACER
647         {
648                 .procname       = "stack_tracer_enabled",
649                 .data           = &stack_tracer_enabled,
650                 .maxlen         = sizeof(int),
651                 .mode           = 0644,
652                 .proc_handler   = stack_trace_sysctl,
653         },
654 #endif
655 #ifdef CONFIG_TRACING
656         {
657                 .procname       = "ftrace_dump_on_oops",
658                 .data           = &ftrace_dump_on_oops,
659                 .maxlen         = sizeof(int),
660                 .mode           = 0644,
661                 .proc_handler   = proc_dointvec,
662         },
663         {
664                 .procname       = "traceoff_on_warning",
665                 .data           = &__disable_trace_on_warning,
666                 .maxlen         = sizeof(__disable_trace_on_warning),
667                 .mode           = 0644,
668                 .proc_handler   = proc_dointvec,
669         },
670         {
671                 .procname       = "tracepoint_printk",
672                 .data           = &tracepoint_printk,
673                 .maxlen         = sizeof(tracepoint_printk),
674                 .mode           = 0644,
675                 .proc_handler   = tracepoint_printk_sysctl,
676         },
677 #endif
678 #ifdef CONFIG_KEXEC_CORE
679         {
680                 .procname       = "kexec_load_disabled",
681                 .data           = &kexec_load_disabled,
682                 .maxlen         = sizeof(int),
683                 .mode           = 0644,
684                 /* only handle a transition from default "0" to "1" */
685                 .proc_handler   = proc_dointvec_minmax,
686                 .extra1         = &one,
687                 .extra2         = &one,
688         },
689 #endif
690 #ifdef CONFIG_MODULES
691         {
692                 .procname       = "modprobe",
693                 .data           = &modprobe_path,
694                 .maxlen         = KMOD_PATH_LEN,
695                 .mode           = 0644,
696                 .proc_handler   = proc_dostring,
697         },
698         {
699                 .procname       = "modules_disabled",
700                 .data           = &modules_disabled,
701                 .maxlen         = sizeof(int),
702                 .mode           = 0644,
703                 /* only handle a transition from default "0" to "1" */
704                 .proc_handler   = proc_dointvec_minmax,
705                 .extra1         = &one,
706                 .extra2         = &one,
707         },
708 #endif
709 #ifdef CONFIG_UEVENT_HELPER
710         {
711                 .procname       = "hotplug",
712                 .data           = &uevent_helper,
713                 .maxlen         = UEVENT_HELPER_PATH_LEN,
714                 .mode           = 0644,
715                 .proc_handler   = proc_dostring,
716         },
717 #endif
718 #ifdef CONFIG_CHR_DEV_SG
719         {
720                 .procname       = "sg-big-buff",
721                 .data           = &sg_big_buff,
722                 .maxlen         = sizeof (int),
723                 .mode           = 0444,
724                 .proc_handler   = proc_dointvec,
725         },
726 #endif
727 #ifdef CONFIG_BSD_PROCESS_ACCT
728         {
729                 .procname       = "acct",
730                 .data           = &acct_parm,
731                 .maxlen         = 3*sizeof(int),
732                 .mode           = 0644,
733                 .proc_handler   = proc_dointvec,
734         },
735 #endif
736 #ifdef CONFIG_MAGIC_SYSRQ
737         {
738                 .procname       = "sysrq",
739                 .data           = &__sysrq_enabled,
740                 .maxlen         = sizeof (int),
741                 .mode           = 0644,
742                 .proc_handler   = sysrq_sysctl_handler,
743         },
744 #endif
745 #ifdef CONFIG_PROC_SYSCTL
746         {
747                 .procname       = "cad_pid",
748                 .data           = NULL,
749                 .maxlen         = sizeof (int),
750                 .mode           = 0600,
751                 .proc_handler   = proc_do_cad_pid,
752         },
753 #endif
754         {
755                 .procname       = "threads-max",
756                 .data           = NULL,
757                 .maxlen         = sizeof(int),
758                 .mode           = 0644,
759                 .proc_handler   = sysctl_max_threads,
760         },
761         {
762                 .procname       = "random",
763                 .mode           = 0555,
764                 .child          = random_table,
765         },
766         {
767                 .procname       = "usermodehelper",
768                 .mode           = 0555,
769                 .child          = usermodehelper_table,
770         },
771 #ifdef CONFIG_FW_LOADER_USER_HELPER
772         {
773                 .procname       = "firmware_config",
774                 .mode           = 0555,
775                 .child          = firmware_config_table,
776         },
777 #endif
778         {
779                 .procname       = "overflowuid",
780                 .data           = &overflowuid,
781                 .maxlen         = sizeof(int),
782                 .mode           = 0644,
783                 .proc_handler   = proc_dointvec_minmax,
784                 .extra1         = &minolduid,
785                 .extra2         = &maxolduid,
786         },
787         {
788                 .procname       = "overflowgid",
789                 .data           = &overflowgid,
790                 .maxlen         = sizeof(int),
791                 .mode           = 0644,
792                 .proc_handler   = proc_dointvec_minmax,
793                 .extra1         = &minolduid,
794                 .extra2         = &maxolduid,
795         },
796 #ifdef CONFIG_S390
797 #ifdef CONFIG_MATHEMU
798         {
799                 .procname       = "ieee_emulation_warnings",
800                 .data           = &sysctl_ieee_emulation_warnings,
801                 .maxlen         = sizeof(int),
802                 .mode           = 0644,
803                 .proc_handler   = proc_dointvec,
804         },
805 #endif
806         {
807                 .procname       = "userprocess_debug",
808                 .data           = &show_unhandled_signals,
809                 .maxlen         = sizeof(int),
810                 .mode           = 0644,
811                 .proc_handler   = proc_dointvec,
812         },
813 #endif
814         {
815                 .procname       = "pid_max",
816                 .data           = &pid_max,
817                 .maxlen         = sizeof (int),
818                 .mode           = 0644,
819                 .proc_handler   = proc_dointvec_minmax,
820                 .extra1         = &pid_max_min,
821                 .extra2         = &pid_max_max,
822         },
823         {
824                 .procname       = "panic_on_oops",
825                 .data           = &panic_on_oops,
826                 .maxlen         = sizeof(int),
827                 .mode           = 0644,
828                 .proc_handler   = proc_dointvec,
829         },
830         {
831                 .procname       = "panic_print",
832                 .data           = &panic_print,
833                 .maxlen         = sizeof(unsigned long),
834                 .mode           = 0644,
835                 .proc_handler   = proc_doulongvec_minmax,
836         },
837 #if defined CONFIG_PRINTK
838         {
839                 .procname       = "printk",
840                 .data           = &console_loglevel,
841                 .maxlen         = 4*sizeof(int),
842                 .mode           = 0644,
843                 .proc_handler   = proc_dointvec,
844         },
845         {
846                 .procname       = "printk_ratelimit",
847                 .data           = &printk_ratelimit_state.interval,
848                 .maxlen         = sizeof(int),
849                 .mode           = 0644,
850                 .proc_handler   = proc_dointvec_jiffies,
851         },
852         {
853                 .procname       = "printk_ratelimit_burst",
854                 .data           = &printk_ratelimit_state.burst,
855                 .maxlen         = sizeof(int),
856                 .mode           = 0644,
857                 .proc_handler   = proc_dointvec,
858         },
859         {
860                 .procname       = "printk_delay",
861                 .data           = &printk_delay_msec,
862                 .maxlen         = sizeof(int),
863                 .mode           = 0644,
864                 .proc_handler   = proc_dointvec_minmax,
865                 .extra1         = &zero,
866                 .extra2         = &ten_thousand,
867         },
868         {
869                 .procname       = "printk_devkmsg",
870                 .data           = devkmsg_log_str,
871                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
872                 .mode           = 0644,
873                 .proc_handler   = devkmsg_sysctl_set_loglvl,
874         },
875         {
876                 .procname       = "dmesg_restrict",
877                 .data           = &dmesg_restrict,
878                 .maxlen         = sizeof(int),
879                 .mode           = 0644,
880                 .proc_handler   = proc_dointvec_minmax_sysadmin,
881                 .extra1         = &zero,
882                 .extra2         = &one,
883         },
884         {
885                 .procname       = "kptr_restrict",
886                 .data           = &kptr_restrict,
887                 .maxlen         = sizeof(int),
888                 .mode           = 0644,
889                 .proc_handler   = proc_dointvec_minmax_sysadmin,
890                 .extra1         = &zero,
891                 .extra2         = &two,
892         },
893 #endif
894         {
895                 .procname       = "ngroups_max",
896                 .data           = &ngroups_max,
897                 .maxlen         = sizeof (int),
898                 .mode           = 0444,
899                 .proc_handler   = proc_dointvec,
900         },
901         {
902                 .procname       = "cap_last_cap",
903                 .data           = (void *)&cap_last_cap,
904                 .maxlen         = sizeof(int),
905                 .mode           = 0444,
906                 .proc_handler   = proc_dointvec,
907         },
908 #if defined(CONFIG_LOCKUP_DETECTOR)
909         {
910                 .procname       = "watchdog",
911                 .data           = &watchdog_user_enabled,
912                 .maxlen         = sizeof(int),
913                 .mode           = 0644,
914                 .proc_handler   = proc_watchdog,
915                 .extra1         = &zero,
916                 .extra2         = &one,
917         },
918         {
919                 .procname       = "watchdog_thresh",
920                 .data           = &watchdog_thresh,
921                 .maxlen         = sizeof(int),
922                 .mode           = 0644,
923                 .proc_handler   = proc_watchdog_thresh,
924                 .extra1         = &zero,
925                 .extra2         = &sixty,
926         },
927         {
928                 .procname       = "nmi_watchdog",
929                 .data           = &nmi_watchdog_user_enabled,
930                 .maxlen         = sizeof(int),
931                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
932                 .proc_handler   = proc_nmi_watchdog,
933                 .extra1         = &zero,
934                 .extra2         = &one,
935         },
936         {
937                 .procname       = "watchdog_cpumask",
938                 .data           = &watchdog_cpumask_bits,
939                 .maxlen         = NR_CPUS,
940                 .mode           = 0644,
941                 .proc_handler   = proc_watchdog_cpumask,
942         },
943 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
944         {
945                 .procname       = "soft_watchdog",
946                 .data           = &soft_watchdog_user_enabled,
947                 .maxlen         = sizeof(int),
948                 .mode           = 0644,
949                 .proc_handler   = proc_soft_watchdog,
950                 .extra1         = &zero,
951                 .extra2         = &one,
952         },
953         {
954                 .procname       = "softlockup_panic",
955                 .data           = &softlockup_panic,
956                 .maxlen         = sizeof(int),
957                 .mode           = 0644,
958                 .proc_handler   = proc_dointvec_minmax,
959                 .extra1         = &zero,
960                 .extra2         = &one,
961         },
962 #ifdef CONFIG_SMP
963         {
964                 .procname       = "softlockup_all_cpu_backtrace",
965                 .data           = &sysctl_softlockup_all_cpu_backtrace,
966                 .maxlen         = sizeof(int),
967                 .mode           = 0644,
968                 .proc_handler   = proc_dointvec_minmax,
969                 .extra1         = &zero,
970                 .extra2         = &one,
971         },
972 #endif /* CONFIG_SMP */
973 #endif
974 #ifdef CONFIG_HARDLOCKUP_DETECTOR
975         {
976                 .procname       = "hardlockup_panic",
977                 .data           = &hardlockup_panic,
978                 .maxlen         = sizeof(int),
979                 .mode           = 0644,
980                 .proc_handler   = proc_dointvec_minmax,
981                 .extra1         = &zero,
982                 .extra2         = &one,
983         },
984 #ifdef CONFIG_SMP
985         {
986                 .procname       = "hardlockup_all_cpu_backtrace",
987                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
988                 .maxlen         = sizeof(int),
989                 .mode           = 0644,
990                 .proc_handler   = proc_dointvec_minmax,
991                 .extra1         = &zero,
992                 .extra2         = &one,
993         },
994 #endif /* CONFIG_SMP */
995 #endif
996 #endif
997
998 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
999         {
1000                 .procname       = "unknown_nmi_panic",
1001                 .data           = &unknown_nmi_panic,
1002                 .maxlen         = sizeof (int),
1003                 .mode           = 0644,
1004                 .proc_handler   = proc_dointvec,
1005         },
1006 #endif
1007 #if defined(CONFIG_X86)
1008         {
1009                 .procname       = "panic_on_unrecovered_nmi",
1010                 .data           = &panic_on_unrecovered_nmi,
1011                 .maxlen         = sizeof(int),
1012                 .mode           = 0644,
1013                 .proc_handler   = proc_dointvec,
1014         },
1015         {
1016                 .procname       = "panic_on_io_nmi",
1017                 .data           = &panic_on_io_nmi,
1018                 .maxlen         = sizeof(int),
1019                 .mode           = 0644,
1020                 .proc_handler   = proc_dointvec,
1021         },
1022 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1023         {
1024                 .procname       = "panic_on_stackoverflow",
1025                 .data           = &sysctl_panic_on_stackoverflow,
1026                 .maxlen         = sizeof(int),
1027                 .mode           = 0644,
1028                 .proc_handler   = proc_dointvec,
1029         },
1030 #endif
1031         {
1032                 .procname       = "bootloader_type",
1033                 .data           = &bootloader_type,
1034                 .maxlen         = sizeof (int),
1035                 .mode           = 0444,
1036                 .proc_handler   = proc_dointvec,
1037         },
1038         {
1039                 .procname       = "bootloader_version",
1040                 .data           = &bootloader_version,
1041                 .maxlen         = sizeof (int),
1042                 .mode           = 0444,
1043                 .proc_handler   = proc_dointvec,
1044         },
1045         {
1046                 .procname       = "io_delay_type",
1047                 .data           = &io_delay_type,
1048                 .maxlen         = sizeof(int),
1049                 .mode           = 0644,
1050                 .proc_handler   = proc_dointvec,
1051         },
1052 #endif
1053 #if defined(CONFIG_MMU)
1054         {
1055                 .procname       = "randomize_va_space",
1056                 .data           = &randomize_va_space,
1057                 .maxlen         = sizeof(int),
1058                 .mode           = 0644,
1059                 .proc_handler   = proc_dointvec,
1060         },
1061 #endif
1062 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1063         {
1064                 .procname       = "spin_retry",
1065                 .data           = &spin_retry,
1066                 .maxlen         = sizeof (int),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec,
1069         },
1070 #endif
1071 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1072         {
1073                 .procname       = "acpi_video_flags",
1074                 .data           = &acpi_realmode_flags,
1075                 .maxlen         = sizeof (unsigned long),
1076                 .mode           = 0644,
1077                 .proc_handler   = proc_doulongvec_minmax,
1078         },
1079 #endif
1080 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1081         {
1082                 .procname       = "ignore-unaligned-usertrap",
1083                 .data           = &no_unaligned_warning,
1084                 .maxlen         = sizeof (int),
1085                 .mode           = 0644,
1086                 .proc_handler   = proc_dointvec,
1087         },
1088 #endif
1089 #ifdef CONFIG_IA64
1090         {
1091                 .procname       = "unaligned-dump-stack",
1092                 .data           = &unaligned_dump_stack,
1093                 .maxlen         = sizeof (int),
1094                 .mode           = 0644,
1095                 .proc_handler   = proc_dointvec,
1096         },
1097 #endif
1098 #ifdef CONFIG_DETECT_HUNG_TASK
1099         {
1100                 .procname       = "hung_task_panic",
1101                 .data           = &sysctl_hung_task_panic,
1102                 .maxlen         = sizeof(int),
1103                 .mode           = 0644,
1104                 .proc_handler   = proc_dointvec_minmax,
1105                 .extra1         = &zero,
1106                 .extra2         = &one,
1107         },
1108         {
1109                 .procname       = "hung_task_check_count",
1110                 .data           = &sysctl_hung_task_check_count,
1111                 .maxlen         = sizeof(int),
1112                 .mode           = 0644,
1113                 .proc_handler   = proc_dointvec_minmax,
1114                 .extra1         = &zero,
1115         },
1116         {
1117                 .procname       = "hung_task_timeout_secs",
1118                 .data           = &sysctl_hung_task_timeout_secs,
1119                 .maxlen         = sizeof(unsigned long),
1120                 .mode           = 0644,
1121                 .proc_handler   = proc_dohung_task_timeout_secs,
1122                 .extra2         = &hung_task_timeout_max,
1123         },
1124         {
1125                 .procname       = "hung_task_check_interval_secs",
1126                 .data           = &sysctl_hung_task_check_interval_secs,
1127                 .maxlen         = sizeof(unsigned long),
1128                 .mode           = 0644,
1129                 .proc_handler   = proc_dohung_task_timeout_secs,
1130                 .extra2         = &hung_task_timeout_max,
1131         },
1132         {
1133                 .procname       = "hung_task_warnings",
1134                 .data           = &sysctl_hung_task_warnings,
1135                 .maxlen         = sizeof(int),
1136                 .mode           = 0644,
1137                 .proc_handler   = proc_dointvec_minmax,
1138                 .extra1         = &neg_one,
1139         },
1140 #endif
1141 #ifdef CONFIG_RT_MUTEXES
1142         {
1143                 .procname       = "max_lock_depth",
1144                 .data           = &max_lock_depth,
1145                 .maxlen         = sizeof(int),
1146                 .mode           = 0644,
1147                 .proc_handler   = proc_dointvec,
1148         },
1149 #endif
1150         {
1151                 .procname       = "poweroff_cmd",
1152                 .data           = &poweroff_cmd,
1153                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1154                 .mode           = 0644,
1155                 .proc_handler   = proc_dostring,
1156         },
1157 #ifdef CONFIG_KEYS
1158         {
1159                 .procname       = "keys",
1160                 .mode           = 0555,
1161                 .child          = key_sysctls,
1162         },
1163 #endif
1164 #ifdef CONFIG_PERF_EVENTS
1165         /*
1166          * User-space scripts rely on the existence of this file
1167          * as a feature check for perf_events being enabled.
1168          *
1169          * So it's an ABI, do not remove!
1170          */
1171         {
1172                 .procname       = "perf_event_paranoid",
1173                 .data           = &sysctl_perf_event_paranoid,
1174                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1175                 .mode           = 0644,
1176                 .proc_handler   = proc_dointvec,
1177         },
1178         {
1179                 .procname       = "perf_event_mlock_kb",
1180                 .data           = &sysctl_perf_event_mlock,
1181                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1182                 .mode           = 0644,
1183                 .proc_handler   = proc_dointvec,
1184         },
1185         {
1186                 .procname       = "perf_event_max_sample_rate",
1187                 .data           = &sysctl_perf_event_sample_rate,
1188                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1189                 .mode           = 0644,
1190                 .proc_handler   = perf_proc_update_handler,
1191                 .extra1         = &one,
1192         },
1193         {
1194                 .procname       = "perf_cpu_time_max_percent",
1195                 .data           = &sysctl_perf_cpu_time_max_percent,
1196                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1197                 .mode           = 0644,
1198                 .proc_handler   = perf_cpu_time_max_percent_handler,
1199                 .extra1         = &zero,
1200                 .extra2         = &one_hundred,
1201         },
1202         {
1203                 .procname       = "perf_event_max_stack",
1204                 .data           = &sysctl_perf_event_max_stack,
1205                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1206                 .mode           = 0644,
1207                 .proc_handler   = perf_event_max_stack_handler,
1208                 .extra1         = &zero,
1209                 .extra2         = &six_hundred_forty_kb,
1210         },
1211         {
1212                 .procname       = "perf_event_max_contexts_per_stack",
1213                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1214                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1215                 .mode           = 0644,
1216                 .proc_handler   = perf_event_max_stack_handler,
1217                 .extra1         = &zero,
1218                 .extra2         = &one_thousand,
1219         },
1220 #endif
1221         {
1222                 .procname       = "panic_on_warn",
1223                 .data           = &panic_on_warn,
1224                 .maxlen         = sizeof(int),
1225                 .mode           = 0644,
1226                 .proc_handler   = proc_dointvec_minmax,
1227                 .extra1         = &zero,
1228                 .extra2         = &one,
1229         },
1230 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1231         {
1232                 .procname       = "timer_migration",
1233                 .data           = &sysctl_timer_migration,
1234                 .maxlen         = sizeof(unsigned int),
1235                 .mode           = 0644,
1236                 .proc_handler   = timer_migration_handler,
1237                 .extra1         = &zero,
1238                 .extra2         = &one,
1239         },
1240 #endif
1241 #ifdef CONFIG_BPF_SYSCALL
1242         {
1243                 .procname       = "unprivileged_bpf_disabled",
1244                 .data           = &sysctl_unprivileged_bpf_disabled,
1245                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1246                 .mode           = 0644,
1247                 /* only handle a transition from default "0" to "1" */
1248                 .proc_handler   = proc_dointvec_minmax,
1249                 .extra1         = &one,
1250                 .extra2         = &one,
1251         },
1252         {
1253                 .procname       = "bpf_stats_enabled",
1254                 .data           = &sysctl_bpf_stats_enabled,
1255                 .maxlen         = sizeof(sysctl_bpf_stats_enabled),
1256                 .mode           = 0644,
1257                 .proc_handler   = proc_dointvec_minmax_bpf_stats,
1258                 .extra1         = &zero,
1259                 .extra2         = &one,
1260         },
1261 #endif
1262 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1263         {
1264                 .procname       = "panic_on_rcu_stall",
1265                 .data           = &sysctl_panic_on_rcu_stall,
1266                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1267                 .mode           = 0644,
1268                 .proc_handler   = proc_dointvec_minmax,
1269                 .extra1         = &zero,
1270                 .extra2         = &one,
1271         },
1272 #endif
1273 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1274         {
1275                 .procname       = "stack_erasing",
1276                 .data           = NULL,
1277                 .maxlen         = sizeof(int),
1278                 .mode           = 0600,
1279                 .proc_handler   = stack_erasing_sysctl,
1280                 .extra1         = &zero,
1281                 .extra2         = &one,
1282         },
1283 #endif
1284         { }
1285 };
1286
1287 static struct ctl_table vm_table[] = {
1288         {
1289                 .procname       = "overcommit_memory",
1290                 .data           = &sysctl_overcommit_memory,
1291                 .maxlen         = sizeof(sysctl_overcommit_memory),
1292                 .mode           = 0644,
1293                 .proc_handler   = proc_dointvec_minmax,
1294                 .extra1         = &zero,
1295                 .extra2         = &two,
1296         },
1297         {
1298                 .procname       = "panic_on_oom",
1299                 .data           = &sysctl_panic_on_oom,
1300                 .maxlen         = sizeof(sysctl_panic_on_oom),
1301                 .mode           = 0644,
1302                 .proc_handler   = proc_dointvec_minmax,
1303                 .extra1         = &zero,
1304                 .extra2         = &two,
1305         },
1306         {
1307                 .procname       = "oom_kill_allocating_task",
1308                 .data           = &sysctl_oom_kill_allocating_task,
1309                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1310                 .mode           = 0644,
1311                 .proc_handler   = proc_dointvec,
1312         },
1313         {
1314                 .procname       = "oom_dump_tasks",
1315                 .data           = &sysctl_oom_dump_tasks,
1316                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1317                 .mode           = 0644,
1318                 .proc_handler   = proc_dointvec,
1319         },
1320         {
1321                 .procname       = "overcommit_ratio",
1322                 .data           = &sysctl_overcommit_ratio,
1323                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1324                 .mode           = 0644,
1325                 .proc_handler   = overcommit_ratio_handler,
1326         },
1327         {
1328                 .procname       = "overcommit_kbytes",
1329                 .data           = &sysctl_overcommit_kbytes,
1330                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1331                 .mode           = 0644,
1332                 .proc_handler   = overcommit_kbytes_handler,
1333         },
1334         {
1335                 .procname       = "page-cluster", 
1336                 .data           = &page_cluster,
1337                 .maxlen         = sizeof(int),
1338                 .mode           = 0644,
1339                 .proc_handler   = proc_dointvec_minmax,
1340                 .extra1         = &zero,
1341         },
1342         {
1343                 .procname       = "dirty_background_ratio",
1344                 .data           = &dirty_background_ratio,
1345                 .maxlen         = sizeof(dirty_background_ratio),
1346                 .mode           = 0644,
1347                 .proc_handler   = dirty_background_ratio_handler,
1348                 .extra1         = &zero,
1349                 .extra2         = &one_hundred,
1350         },
1351         {
1352                 .procname       = "dirty_background_bytes",
1353                 .data           = &dirty_background_bytes,
1354                 .maxlen         = sizeof(dirty_background_bytes),
1355                 .mode           = 0644,
1356                 .proc_handler   = dirty_background_bytes_handler,
1357                 .extra1         = &one_ul,
1358         },
1359         {
1360                 .procname       = "dirty_ratio",
1361                 .data           = &vm_dirty_ratio,
1362                 .maxlen         = sizeof(vm_dirty_ratio),
1363                 .mode           = 0644,
1364                 .proc_handler   = dirty_ratio_handler,
1365                 .extra1         = &zero,
1366                 .extra2         = &one_hundred,
1367         },
1368         {
1369                 .procname       = "dirty_bytes",
1370                 .data           = &vm_dirty_bytes,
1371                 .maxlen         = sizeof(vm_dirty_bytes),
1372                 .mode           = 0644,
1373                 .proc_handler   = dirty_bytes_handler,
1374                 .extra1         = &dirty_bytes_min,
1375         },
1376         {
1377                 .procname       = "dirty_writeback_centisecs",
1378                 .data           = &dirty_writeback_interval,
1379                 .maxlen         = sizeof(dirty_writeback_interval),
1380                 .mode           = 0644,
1381                 .proc_handler   = dirty_writeback_centisecs_handler,
1382         },
1383         {
1384                 .procname       = "dirty_expire_centisecs",
1385                 .data           = &dirty_expire_interval,
1386                 .maxlen         = sizeof(dirty_expire_interval),
1387                 .mode           = 0644,
1388                 .proc_handler   = proc_dointvec_minmax,
1389                 .extra1         = &zero,
1390         },
1391         {
1392                 .procname       = "dirtytime_expire_seconds",
1393                 .data           = &dirtytime_expire_interval,
1394                 .maxlen         = sizeof(dirtytime_expire_interval),
1395                 .mode           = 0644,
1396                 .proc_handler   = dirtytime_interval_handler,
1397                 .extra1         = &zero,
1398         },
1399         {
1400                 .procname       = "swappiness",
1401                 .data           = &vm_swappiness,
1402                 .maxlen         = sizeof(vm_swappiness),
1403                 .mode           = 0644,
1404                 .proc_handler   = proc_dointvec_minmax,
1405                 .extra1         = &zero,
1406                 .extra2         = &one_hundred,
1407         },
1408 #ifdef CONFIG_HUGETLB_PAGE
1409         {
1410                 .procname       = "nr_hugepages",
1411                 .data           = NULL,
1412                 .maxlen         = sizeof(unsigned long),
1413                 .mode           = 0644,
1414                 .proc_handler   = hugetlb_sysctl_handler,
1415         },
1416 #ifdef CONFIG_NUMA
1417         {
1418                 .procname       = "nr_hugepages_mempolicy",
1419                 .data           = NULL,
1420                 .maxlen         = sizeof(unsigned long),
1421                 .mode           = 0644,
1422                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1423         },
1424         {
1425                 .procname               = "numa_stat",
1426                 .data                   = &sysctl_vm_numa_stat,
1427                 .maxlen                 = sizeof(int),
1428                 .mode                   = 0644,
1429                 .proc_handler   = sysctl_vm_numa_stat_handler,
1430                 .extra1                 = &zero,
1431                 .extra2                 = &one,
1432         },
1433 #endif
1434          {
1435                 .procname       = "hugetlb_shm_group",
1436                 .data           = &sysctl_hugetlb_shm_group,
1437                 .maxlen         = sizeof(gid_t),
1438                 .mode           = 0644,
1439                 .proc_handler   = proc_dointvec,
1440          },
1441         {
1442                 .procname       = "nr_overcommit_hugepages",
1443                 .data           = NULL,
1444                 .maxlen         = sizeof(unsigned long),
1445                 .mode           = 0644,
1446                 .proc_handler   = hugetlb_overcommit_handler,
1447         },
1448 #endif
1449         {
1450                 .procname       = "lowmem_reserve_ratio",
1451                 .data           = &sysctl_lowmem_reserve_ratio,
1452                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1453                 .mode           = 0644,
1454                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1455         },
1456         {
1457                 .procname       = "drop_caches",
1458                 .data           = &sysctl_drop_caches,
1459                 .maxlen         = sizeof(int),
1460                 .mode           = 0644,
1461                 .proc_handler   = drop_caches_sysctl_handler,
1462                 .extra1         = &one,
1463                 .extra2         = &four,
1464         },
1465 #ifdef CONFIG_COMPACTION
1466         {
1467                 .procname       = "compact_memory",
1468                 .data           = &sysctl_compact_memory,
1469                 .maxlen         = sizeof(int),
1470                 .mode           = 0200,
1471                 .proc_handler   = sysctl_compaction_handler,
1472         },
1473         {
1474                 .procname       = "extfrag_threshold",
1475                 .data           = &sysctl_extfrag_threshold,
1476                 .maxlen         = sizeof(int),
1477                 .mode           = 0644,
1478                 .proc_handler   = proc_dointvec_minmax,
1479                 .extra1         = &min_extfrag_threshold,
1480                 .extra2         = &max_extfrag_threshold,
1481         },
1482         {
1483                 .procname       = "compact_unevictable_allowed",
1484                 .data           = &sysctl_compact_unevictable_allowed,
1485                 .maxlen         = sizeof(int),
1486                 .mode           = 0644,
1487                 .proc_handler   = proc_dointvec,
1488                 .extra1         = &zero,
1489                 .extra2         = &one,
1490         },
1491
1492 #endif /* CONFIG_COMPACTION */
1493         {
1494                 .procname       = "min_free_kbytes",
1495                 .data           = &min_free_kbytes,
1496                 .maxlen         = sizeof(min_free_kbytes),
1497                 .mode           = 0644,
1498                 .proc_handler   = min_free_kbytes_sysctl_handler,
1499                 .extra1         = &zero,
1500         },
1501         {
1502                 .procname       = "watermark_boost_factor",
1503                 .data           = &watermark_boost_factor,
1504                 .maxlen         = sizeof(watermark_boost_factor),
1505                 .mode           = 0644,
1506                 .proc_handler   = watermark_boost_factor_sysctl_handler,
1507                 .extra1         = &zero,
1508         },
1509         {
1510                 .procname       = "watermark_scale_factor",
1511                 .data           = &watermark_scale_factor,
1512                 .maxlen         = sizeof(watermark_scale_factor),
1513                 .mode           = 0644,
1514                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1515                 .extra1         = &one,
1516                 .extra2         = &one_thousand,
1517         },
1518         {
1519                 .procname       = "percpu_pagelist_fraction",
1520                 .data           = &percpu_pagelist_fraction,
1521                 .maxlen         = sizeof(percpu_pagelist_fraction),
1522                 .mode           = 0644,
1523                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1524                 .extra1         = &zero,
1525         },
1526 #ifdef CONFIG_MMU
1527         {
1528                 .procname       = "max_map_count",
1529                 .data           = &sysctl_max_map_count,
1530                 .maxlen         = sizeof(sysctl_max_map_count),
1531                 .mode           = 0644,
1532                 .proc_handler   = proc_dointvec_minmax,
1533                 .extra1         = &zero,
1534         },
1535 #else
1536         {
1537                 .procname       = "nr_trim_pages",
1538                 .data           = &sysctl_nr_trim_pages,
1539                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1540                 .mode           = 0644,
1541                 .proc_handler   = proc_dointvec_minmax,
1542                 .extra1         = &zero,
1543         },
1544 #endif
1545         {
1546                 .procname       = "laptop_mode",
1547                 .data           = &laptop_mode,
1548                 .maxlen         = sizeof(laptop_mode),
1549                 .mode           = 0644,
1550                 .proc_handler   = proc_dointvec_jiffies,
1551         },
1552         {
1553                 .procname       = "block_dump",
1554                 .data           = &block_dump,
1555                 .maxlen         = sizeof(block_dump),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec,
1558                 .extra1         = &zero,
1559         },
1560         {
1561                 .procname       = "vfs_cache_pressure",
1562                 .data           = &sysctl_vfs_cache_pressure,
1563                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1564                 .mode           = 0644,
1565                 .proc_handler   = proc_dointvec,
1566                 .extra1         = &zero,
1567         },
1568 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1569         {
1570                 .procname       = "legacy_va_layout",
1571                 .data           = &sysctl_legacy_va_layout,
1572                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1573                 .mode           = 0644,
1574                 .proc_handler   = proc_dointvec,
1575                 .extra1         = &zero,
1576         },
1577 #endif
1578 #ifdef CONFIG_NUMA
1579         {
1580                 .procname       = "zone_reclaim_mode",
1581                 .data           = &node_reclaim_mode,
1582                 .maxlen         = sizeof(node_reclaim_mode),
1583                 .mode           = 0644,
1584                 .proc_handler   = proc_dointvec,
1585                 .extra1         = &zero,
1586         },
1587         {
1588                 .procname       = "min_unmapped_ratio",
1589                 .data           = &sysctl_min_unmapped_ratio,
1590                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1591                 .mode           = 0644,
1592                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1593                 .extra1         = &zero,
1594                 .extra2         = &one_hundred,
1595         },
1596         {
1597                 .procname       = "min_slab_ratio",
1598                 .data           = &sysctl_min_slab_ratio,
1599                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1600                 .mode           = 0644,
1601                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1602                 .extra1         = &zero,
1603                 .extra2         = &one_hundred,
1604         },
1605 #endif
1606 #ifdef CONFIG_SMP
1607         {
1608                 .procname       = "stat_interval",
1609                 .data           = &sysctl_stat_interval,
1610                 .maxlen         = sizeof(sysctl_stat_interval),
1611                 .mode           = 0644,
1612                 .proc_handler   = proc_dointvec_jiffies,
1613         },
1614         {
1615                 .procname       = "stat_refresh",
1616                 .data           = NULL,
1617                 .maxlen         = 0,
1618                 .mode           = 0600,
1619                 .proc_handler   = vmstat_refresh,
1620         },
1621 #endif
1622 #ifdef CONFIG_MMU
1623         {
1624                 .procname       = "mmap_min_addr",
1625                 .data           = &dac_mmap_min_addr,
1626                 .maxlen         = sizeof(unsigned long),
1627                 .mode           = 0644,
1628                 .proc_handler   = mmap_min_addr_handler,
1629         },
1630 #endif
1631 #ifdef CONFIG_NUMA
1632         {
1633                 .procname       = "numa_zonelist_order",
1634                 .data           = &numa_zonelist_order,
1635                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1636                 .mode           = 0644,
1637                 .proc_handler   = numa_zonelist_order_handler,
1638         },
1639 #endif
1640 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1641    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1642         {
1643                 .procname       = "vdso_enabled",
1644 #ifdef CONFIG_X86_32
1645                 .data           = &vdso32_enabled,
1646                 .maxlen         = sizeof(vdso32_enabled),
1647 #else
1648                 .data           = &vdso_enabled,
1649                 .maxlen         = sizeof(vdso_enabled),
1650 #endif
1651                 .mode           = 0644,
1652                 .proc_handler   = proc_dointvec,
1653                 .extra1         = &zero,
1654         },
1655 #endif
1656 #ifdef CONFIG_HIGHMEM
1657         {
1658                 .procname       = "highmem_is_dirtyable",
1659                 .data           = &vm_highmem_is_dirtyable,
1660                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1661                 .mode           = 0644,
1662                 .proc_handler   = proc_dointvec_minmax,
1663                 .extra1         = &zero,
1664                 .extra2         = &one,
1665         },
1666 #endif
1667 #ifdef CONFIG_MEMORY_FAILURE
1668         {
1669                 .procname       = "memory_failure_early_kill",
1670                 .data           = &sysctl_memory_failure_early_kill,
1671                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1672                 .mode           = 0644,
1673                 .proc_handler   = proc_dointvec_minmax,
1674                 .extra1         = &zero,
1675                 .extra2         = &one,
1676         },
1677         {
1678                 .procname       = "memory_failure_recovery",
1679                 .data           = &sysctl_memory_failure_recovery,
1680                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1681                 .mode           = 0644,
1682                 .proc_handler   = proc_dointvec_minmax,
1683                 .extra1         = &zero,
1684                 .extra2         = &one,
1685         },
1686 #endif
1687         {
1688                 .procname       = "user_reserve_kbytes",
1689                 .data           = &sysctl_user_reserve_kbytes,
1690                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1691                 .mode           = 0644,
1692                 .proc_handler   = proc_doulongvec_minmax,
1693         },
1694         {
1695                 .procname       = "admin_reserve_kbytes",
1696                 .data           = &sysctl_admin_reserve_kbytes,
1697                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1698                 .mode           = 0644,
1699                 .proc_handler   = proc_doulongvec_minmax,
1700         },
1701 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1702         {
1703                 .procname       = "mmap_rnd_bits",
1704                 .data           = &mmap_rnd_bits,
1705                 .maxlen         = sizeof(mmap_rnd_bits),
1706                 .mode           = 0600,
1707                 .proc_handler   = proc_dointvec_minmax,
1708                 .extra1         = (void *)&mmap_rnd_bits_min,
1709                 .extra2         = (void *)&mmap_rnd_bits_max,
1710         },
1711 #endif
1712 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1713         {
1714                 .procname       = "mmap_rnd_compat_bits",
1715                 .data           = &mmap_rnd_compat_bits,
1716                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1717                 .mode           = 0600,
1718                 .proc_handler   = proc_dointvec_minmax,
1719                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1720                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1721         },
1722 #endif
1723         { }
1724 };
1725
1726 static struct ctl_table fs_table[] = {
1727         {
1728                 .procname       = "inode-nr",
1729                 .data           = &inodes_stat,
1730                 .maxlen         = 2*sizeof(long),
1731                 .mode           = 0444,
1732                 .proc_handler   = proc_nr_inodes,
1733         },
1734         {
1735                 .procname       = "inode-state",
1736                 .data           = &inodes_stat,
1737                 .maxlen         = 7*sizeof(long),
1738                 .mode           = 0444,
1739                 .proc_handler   = proc_nr_inodes,
1740         },
1741         {
1742                 .procname       = "file-nr",
1743                 .data           = &files_stat,
1744                 .maxlen         = sizeof(files_stat),
1745                 .mode           = 0444,
1746                 .proc_handler   = proc_nr_files,
1747         },
1748         {
1749                 .procname       = "file-max",
1750                 .data           = &files_stat.max_files,
1751                 .maxlen         = sizeof(files_stat.max_files),
1752                 .mode           = 0644,
1753                 .proc_handler   = proc_doulongvec_minmax,
1754                 .extra1         = &zero_ul,
1755                 .extra2         = &long_max,
1756         },
1757         {
1758                 .procname       = "nr_open",
1759                 .data           = &sysctl_nr_open,
1760                 .maxlen         = sizeof(unsigned int),
1761                 .mode           = 0644,
1762                 .proc_handler   = proc_dointvec_minmax,
1763                 .extra1         = &sysctl_nr_open_min,
1764                 .extra2         = &sysctl_nr_open_max,
1765         },
1766         {
1767                 .procname       = "dentry-state",
1768                 .data           = &dentry_stat,
1769                 .maxlen         = 6*sizeof(long),
1770                 .mode           = 0444,
1771                 .proc_handler   = proc_nr_dentry,
1772         },
1773         {
1774                 .procname       = "overflowuid",
1775                 .data           = &fs_overflowuid,
1776                 .maxlen         = sizeof(int),
1777                 .mode           = 0644,
1778                 .proc_handler   = proc_dointvec_minmax,
1779                 .extra1         = &minolduid,
1780                 .extra2         = &maxolduid,
1781         },
1782         {
1783                 .procname       = "overflowgid",
1784                 .data           = &fs_overflowgid,
1785                 .maxlen         = sizeof(int),
1786                 .mode           = 0644,
1787                 .proc_handler   = proc_dointvec_minmax,
1788                 .extra1         = &minolduid,
1789                 .extra2         = &maxolduid,
1790         },
1791 #ifdef CONFIG_FILE_LOCKING
1792         {
1793                 .procname       = "leases-enable",
1794                 .data           = &leases_enable,
1795                 .maxlen         = sizeof(int),
1796                 .mode           = 0644,
1797                 .proc_handler   = proc_dointvec,
1798         },
1799 #endif
1800 #ifdef CONFIG_DNOTIFY
1801         {
1802                 .procname       = "dir-notify-enable",
1803                 .data           = &dir_notify_enable,
1804                 .maxlen         = sizeof(int),
1805                 .mode           = 0644,
1806                 .proc_handler   = proc_dointvec,
1807         },
1808 #endif
1809 #ifdef CONFIG_MMU
1810 #ifdef CONFIG_FILE_LOCKING
1811         {
1812                 .procname       = "lease-break-time",
1813                 .data           = &lease_break_time,
1814                 .maxlen         = sizeof(int),
1815                 .mode           = 0644,
1816                 .proc_handler   = proc_dointvec,
1817         },
1818 #endif
1819 #ifdef CONFIG_AIO
1820         {
1821                 .procname       = "aio-nr",
1822                 .data           = &aio_nr,
1823                 .maxlen         = sizeof(aio_nr),
1824                 .mode           = 0444,
1825                 .proc_handler   = proc_doulongvec_minmax,
1826         },
1827         {
1828                 .procname       = "aio-max-nr",
1829                 .data           = &aio_max_nr,
1830                 .maxlen         = sizeof(aio_max_nr),
1831                 .mode           = 0644,
1832                 .proc_handler   = proc_doulongvec_minmax,
1833         },
1834 #endif /* CONFIG_AIO */
1835 #ifdef CONFIG_INOTIFY_USER
1836         {
1837                 .procname       = "inotify",
1838                 .mode           = 0555,
1839                 .child          = inotify_table,
1840         },
1841 #endif  
1842 #ifdef CONFIG_EPOLL
1843         {
1844                 .procname       = "epoll",
1845                 .mode           = 0555,
1846                 .child          = epoll_table,
1847         },
1848 #endif
1849 #endif
1850         {
1851                 .procname       = "protected_symlinks",
1852                 .data           = &sysctl_protected_symlinks,
1853                 .maxlen         = sizeof(int),
1854                 .mode           = 0600,
1855                 .proc_handler   = proc_dointvec_minmax,
1856                 .extra1         = &zero,
1857                 .extra2         = &one,
1858         },
1859         {
1860                 .procname       = "protected_hardlinks",
1861                 .data           = &sysctl_protected_hardlinks,
1862                 .maxlen         = sizeof(int),
1863                 .mode           = 0600,
1864                 .proc_handler   = proc_dointvec_minmax,
1865                 .extra1         = &zero,
1866                 .extra2         = &one,
1867         },
1868         {
1869                 .procname       = "protected_fifos",
1870                 .data           = &sysctl_protected_fifos,
1871                 .maxlen         = sizeof(int),
1872                 .mode           = 0600,
1873                 .proc_handler   = proc_dointvec_minmax,
1874                 .extra1         = &zero,
1875                 .extra2         = &two,
1876         },
1877         {
1878                 .procname       = "protected_regular",
1879                 .data           = &sysctl_protected_regular,
1880                 .maxlen         = sizeof(int),
1881                 .mode           = 0600,
1882                 .proc_handler   = proc_dointvec_minmax,
1883                 .extra1         = &zero,
1884                 .extra2         = &two,
1885         },
1886         {
1887                 .procname       = "suid_dumpable",
1888                 .data           = &suid_dumpable,
1889                 .maxlen         = sizeof(int),
1890                 .mode           = 0644,
1891                 .proc_handler   = proc_dointvec_minmax_coredump,
1892                 .extra1         = &zero,
1893                 .extra2         = &two,
1894         },
1895 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1896         {
1897                 .procname       = "binfmt_misc",
1898                 .mode           = 0555,
1899                 .child          = sysctl_mount_point,
1900         },
1901 #endif
1902         {
1903                 .procname       = "pipe-max-size",
1904                 .data           = &pipe_max_size,
1905                 .maxlen         = sizeof(pipe_max_size),
1906                 .mode           = 0644,
1907                 .proc_handler   = proc_dopipe_max_size,
1908         },
1909         {
1910                 .procname       = "pipe-user-pages-hard",
1911                 .data           = &pipe_user_pages_hard,
1912                 .maxlen         = sizeof(pipe_user_pages_hard),
1913                 .mode           = 0644,
1914                 .proc_handler   = proc_doulongvec_minmax,
1915         },
1916         {
1917                 .procname       = "pipe-user-pages-soft",
1918                 .data           = &pipe_user_pages_soft,
1919                 .maxlen         = sizeof(pipe_user_pages_soft),
1920                 .mode           = 0644,
1921                 .proc_handler   = proc_doulongvec_minmax,
1922         },
1923         {
1924                 .procname       = "mount-max",
1925                 .data           = &sysctl_mount_max,
1926                 .maxlen         = sizeof(unsigned int),
1927                 .mode           = 0644,
1928                 .proc_handler   = proc_dointvec_minmax,
1929                 .extra1         = &one,
1930         },
1931         { }
1932 };
1933
1934 static struct ctl_table debug_table[] = {
1935 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1936         {
1937                 .procname       = "exception-trace",
1938                 .data           = &show_unhandled_signals,
1939                 .maxlen         = sizeof(int),
1940                 .mode           = 0644,
1941                 .proc_handler   = proc_dointvec
1942         },
1943 #endif
1944 #if defined(CONFIG_OPTPROBES)
1945         {
1946                 .procname       = "kprobes-optimization",
1947                 .data           = &sysctl_kprobes_optimization,
1948                 .maxlen         = sizeof(int),
1949                 .mode           = 0644,
1950                 .proc_handler   = proc_kprobes_optimization_handler,
1951                 .extra1         = &zero,
1952                 .extra2         = &one,
1953         },
1954 #endif
1955         { }
1956 };
1957
1958 static struct ctl_table dev_table[] = {
1959         { }
1960 };
1961
1962 int __init sysctl_init(void)
1963 {
1964         struct ctl_table_header *hdr;
1965
1966         hdr = register_sysctl_table(sysctl_base_table);
1967         kmemleak_not_leak(hdr);
1968         return 0;
1969 }
1970
1971 #endif /* CONFIG_SYSCTL */
1972
1973 /*
1974  * /proc/sys support
1975  */
1976
1977 #ifdef CONFIG_PROC_SYSCTL
1978
1979 static int _proc_do_string(char *data, int maxlen, int write,
1980                            char __user *buffer,
1981                            size_t *lenp, loff_t *ppos)
1982 {
1983         size_t len;
1984         char __user *p;
1985         char c;
1986
1987         if (!data || !maxlen || !*lenp) {
1988                 *lenp = 0;
1989                 return 0;
1990         }
1991
1992         if (write) {
1993                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1994                         /* Only continue writes not past the end of buffer. */
1995                         len = strlen(data);
1996                         if (len > maxlen - 1)
1997                                 len = maxlen - 1;
1998
1999                         if (*ppos > len)
2000                                 return 0;
2001                         len = *ppos;
2002                 } else {
2003                         /* Start writing from beginning of buffer. */
2004                         len = 0;
2005                 }
2006
2007                 *ppos += *lenp;
2008                 p = buffer;
2009                 while ((p - buffer) < *lenp && len < maxlen - 1) {
2010                         if (get_user(c, p++))
2011                                 return -EFAULT;
2012                         if (c == 0 || c == '\n')
2013                                 break;
2014                         data[len++] = c;
2015                 }
2016                 data[len] = 0;
2017         } else {
2018                 len = strlen(data);
2019                 if (len > maxlen)
2020                         len = maxlen;
2021
2022                 if (*ppos > len) {
2023                         *lenp = 0;
2024                         return 0;
2025                 }
2026
2027                 data += *ppos;
2028                 len  -= *ppos;
2029
2030                 if (len > *lenp)
2031                         len = *lenp;
2032                 if (len)
2033                         if (copy_to_user(buffer, data, len))
2034                                 return -EFAULT;
2035                 if (len < *lenp) {
2036                         if (put_user('\n', buffer + len))
2037                                 return -EFAULT;
2038                         len++;
2039                 }
2040                 *lenp = len;
2041                 *ppos += len;
2042         }
2043         return 0;
2044 }
2045
2046 static void warn_sysctl_write(struct ctl_table *table)
2047 {
2048         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2049                 "This will not be supported in the future. To silence this\n"
2050                 "warning, set kernel.sysctl_writes_strict = -1\n",
2051                 current->comm, table->procname);
2052 }
2053
2054 /**
2055  * proc_first_pos_non_zero_ignore - check if first position is allowed
2056  * @ppos: file position
2057  * @table: the sysctl table
2058  *
2059  * Returns true if the first position is non-zero and the sysctl_writes_strict
2060  * mode indicates this is not allowed for numeric input types. String proc
2061  * handlers can ignore the return value.
2062  */
2063 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2064                                            struct ctl_table *table)
2065 {
2066         if (!*ppos)
2067                 return false;
2068
2069         switch (sysctl_writes_strict) {
2070         case SYSCTL_WRITES_STRICT:
2071                 return true;
2072         case SYSCTL_WRITES_WARN:
2073                 warn_sysctl_write(table);
2074                 return false;
2075         default:
2076                 return false;
2077         }
2078 }
2079
2080 /**
2081  * proc_dostring - read a string sysctl
2082  * @table: the sysctl table
2083  * @write: %TRUE if this is a write to the sysctl file
2084  * @buffer: the user buffer
2085  * @lenp: the size of the user buffer
2086  * @ppos: file position
2087  *
2088  * Reads/writes a string from/to the user buffer. If the kernel
2089  * buffer provided is not large enough to hold the string, the
2090  * string is truncated. The copied string is %NULL-terminated.
2091  * If the string is being read by the user process, it is copied
2092  * and a newline '\n' is added. It is truncated if the buffer is
2093  * not large enough.
2094  *
2095  * Returns 0 on success.
2096  */
2097 int proc_dostring(struct ctl_table *table, int write,
2098                   void __user *buffer, size_t *lenp, loff_t *ppos)
2099 {
2100         if (write)
2101                 proc_first_pos_non_zero_ignore(ppos, table);
2102
2103         return _proc_do_string((char *)(table->data), table->maxlen, write,
2104                                (char __user *)buffer, lenp, ppos);
2105 }
2106
2107 static size_t proc_skip_spaces(char **buf)
2108 {
2109         size_t ret;
2110         char *tmp = skip_spaces(*buf);
2111         ret = tmp - *buf;
2112         *buf = tmp;
2113         return ret;
2114 }
2115
2116 static void proc_skip_char(char **buf, size_t *size, const char v)
2117 {
2118         while (*size) {
2119                 if (**buf != v)
2120                         break;
2121                 (*size)--;
2122                 (*buf)++;
2123         }
2124 }
2125
2126 /**
2127  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
2128  *                   fail on overflow
2129  *
2130  * @cp: kernel buffer containing the string to parse
2131  * @endp: pointer to store the trailing characters
2132  * @base: the base to use
2133  * @res: where the parsed integer will be stored
2134  *
2135  * In case of success 0 is returned and @res will contain the parsed integer,
2136  * @endp will hold any trailing characters.
2137  * This function will fail the parse on overflow. If there wasn't an overflow
2138  * the function will defer the decision what characters count as invalid to the
2139  * caller.
2140  */
2141 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
2142                            unsigned long *res)
2143 {
2144         unsigned long long result;
2145         unsigned int rv;
2146
2147         cp = _parse_integer_fixup_radix(cp, &base);
2148         rv = _parse_integer(cp, base, &result);
2149         if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
2150                 return -ERANGE;
2151
2152         cp += rv;
2153
2154         if (endp)
2155                 *endp = (char *)cp;
2156
2157         *res = (unsigned long)result;
2158         return 0;
2159 }
2160
2161 #define TMPBUFLEN 22
2162 /**
2163  * proc_get_long - reads an ASCII formatted integer from a user buffer
2164  *
2165  * @buf: a kernel buffer
2166  * @size: size of the kernel buffer
2167  * @val: this is where the number will be stored
2168  * @neg: set to %TRUE if number is negative
2169  * @perm_tr: a vector which contains the allowed trailers
2170  * @perm_tr_len: size of the perm_tr vector
2171  * @tr: pointer to store the trailer character
2172  *
2173  * In case of success %0 is returned and @buf and @size are updated with
2174  * the amount of bytes read. If @tr is non-NULL and a trailing
2175  * character exists (size is non-zero after returning from this
2176  * function), @tr is updated with the trailing character.
2177  */
2178 static int proc_get_long(char **buf, size_t *size,
2179                           unsigned long *val, bool *neg,
2180                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2181 {
2182         int len;
2183         char *p, tmp[TMPBUFLEN];
2184
2185         if (!*size)
2186                 return -EINVAL;
2187
2188         len = *size;
2189         if (len > TMPBUFLEN - 1)
2190                 len = TMPBUFLEN - 1;
2191
2192         memcpy(tmp, *buf, len);
2193
2194         tmp[len] = 0;
2195         p = tmp;
2196         if (*p == '-' && *size > 1) {
2197                 *neg = true;
2198                 p++;
2199         } else
2200                 *neg = false;
2201         if (!isdigit(*p))
2202                 return -EINVAL;
2203
2204         if (strtoul_lenient(p, &p, 0, val))
2205                 return -EINVAL;
2206
2207         len = p - tmp;
2208
2209         /* We don't know if the next char is whitespace thus we may accept
2210          * invalid integers (e.g. 1234...a) or two integers instead of one
2211          * (e.g. 123...1). So lets not allow such large numbers. */
2212         if (len == TMPBUFLEN - 1)
2213                 return -EINVAL;
2214
2215         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2216                 return -EINVAL;
2217
2218         if (tr && (len < *size))
2219                 *tr = *p;
2220
2221         *buf += len;
2222         *size -= len;
2223
2224         return 0;
2225 }
2226
2227 /**
2228  * proc_put_long - converts an integer to a decimal ASCII formatted string
2229  *
2230  * @buf: the user buffer
2231  * @size: the size of the user buffer
2232  * @val: the integer to be converted
2233  * @neg: sign of the number, %TRUE for negative
2234  *
2235  * In case of success %0 is returned and @buf and @size are updated with
2236  * the amount of bytes written.
2237  */
2238 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2239                           bool neg)
2240 {
2241         int len;
2242         char tmp[TMPBUFLEN], *p = tmp;
2243
2244         sprintf(p, "%s%lu", neg ? "-" : "", val);
2245         len = strlen(tmp);
2246         if (len > *size)
2247                 len = *size;
2248         if (copy_to_user(*buf, tmp, len))
2249                 return -EFAULT;
2250         *size -= len;
2251         *buf += len;
2252         return 0;
2253 }
2254 #undef TMPBUFLEN
2255
2256 static int proc_put_char(void __user **buf, size_t *size, char c)
2257 {
2258         if (*size) {
2259                 char __user **buffer = (char __user **)buf;
2260                 if (put_user(c, *buffer))
2261                         return -EFAULT;
2262                 (*size)--, (*buffer)++;
2263                 *buf = *buffer;
2264         }
2265         return 0;
2266 }
2267
2268 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2269                                  int *valp,
2270                                  int write, void *data)
2271 {
2272         if (write) {
2273                 if (*negp) {
2274                         if (*lvalp > (unsigned long) INT_MAX + 1)
2275                                 return -EINVAL;
2276                         *valp = -*lvalp;
2277                 } else {
2278                         if (*lvalp > (unsigned long) INT_MAX)
2279                                 return -EINVAL;
2280                         *valp = *lvalp;
2281                 }
2282         } else {
2283                 int val = *valp;
2284                 if (val < 0) {
2285                         *negp = true;
2286                         *lvalp = -(unsigned long)val;
2287                 } else {
2288                         *negp = false;
2289                         *lvalp = (unsigned long)val;
2290                 }
2291         }
2292         return 0;
2293 }
2294
2295 static int do_proc_douintvec_conv(unsigned long *lvalp,
2296                                   unsigned int *valp,
2297                                   int write, void *data)
2298 {
2299         if (write) {
2300                 if (*lvalp > UINT_MAX)
2301                         return -EINVAL;
2302                 *valp = *lvalp;
2303         } else {
2304                 unsigned int val = *valp;
2305                 *lvalp = (unsigned long)val;
2306         }
2307         return 0;
2308 }
2309
2310 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2311
2312 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2313                   int write, void __user *buffer,
2314                   size_t *lenp, loff_t *ppos,
2315                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2316                               int write, void *data),
2317                   void *data)
2318 {
2319         int *i, vleft, first = 1, err = 0;
2320         size_t left;
2321         char *kbuf = NULL, *p;
2322         
2323         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2324                 *lenp = 0;
2325                 return 0;
2326         }
2327         
2328         i = (int *) tbl_data;
2329         vleft = table->maxlen / sizeof(*i);
2330         left = *lenp;
2331
2332         if (!conv)
2333                 conv = do_proc_dointvec_conv;
2334
2335         if (write) {
2336                 if (proc_first_pos_non_zero_ignore(ppos, table))
2337                         goto out;
2338
2339                 if (left > PAGE_SIZE - 1)
2340                         left = PAGE_SIZE - 1;
2341                 p = kbuf = memdup_user_nul(buffer, left);
2342                 if (IS_ERR(kbuf))
2343                         return PTR_ERR(kbuf);
2344         }
2345
2346         for (; left && vleft--; i++, first=0) {
2347                 unsigned long lval;
2348                 bool neg;
2349
2350                 if (write) {
2351                         left -= proc_skip_spaces(&p);
2352
2353                         if (!left)
2354                                 break;
2355                         err = proc_get_long(&p, &left, &lval, &neg,
2356                                              proc_wspace_sep,
2357                                              sizeof(proc_wspace_sep), NULL);
2358                         if (err)
2359                                 break;
2360                         if (conv(&neg, &lval, i, 1, data)) {
2361                                 err = -EINVAL;
2362                                 break;
2363                         }
2364                 } else {
2365                         if (conv(&neg, &lval, i, 0, data)) {
2366                                 err = -EINVAL;
2367                                 break;
2368                         }
2369                         if (!first)
2370                                 err = proc_put_char(&buffer, &left, '\t');
2371                         if (err)
2372                                 break;
2373                         err = proc_put_long(&buffer, &left, lval, neg);
2374                         if (err)
2375                                 break;
2376                 }
2377         }
2378
2379         if (!write && !first && left && !err)
2380                 err = proc_put_char(&buffer, &left, '\n');
2381         if (write && !err && left)
2382                 left -= proc_skip_spaces(&p);
2383         if (write) {
2384                 kfree(kbuf);
2385                 if (first)
2386                         return err ? : -EINVAL;
2387         }
2388         *lenp -= left;
2389 out:
2390         *ppos += *lenp;
2391         return err;
2392 }
2393
2394 static int do_proc_dointvec(struct ctl_table *table, int write,
2395                   void __user *buffer, size_t *lenp, loff_t *ppos,
2396                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2397                               int write, void *data),
2398                   void *data)
2399 {
2400         return __do_proc_dointvec(table->data, table, write,
2401                         buffer, lenp, ppos, conv, data);
2402 }
2403
2404 static int do_proc_douintvec_w(unsigned int *tbl_data,
2405                                struct ctl_table *table,
2406                                void __user *buffer,
2407                                size_t *lenp, loff_t *ppos,
2408                                int (*conv)(unsigned long *lvalp,
2409                                            unsigned int *valp,
2410                                            int write, void *data),
2411                                void *data)
2412 {
2413         unsigned long lval;
2414         int err = 0;
2415         size_t left;
2416         bool neg;
2417         char *kbuf = NULL, *p;
2418
2419         left = *lenp;
2420
2421         if (proc_first_pos_non_zero_ignore(ppos, table))
2422                 goto bail_early;
2423
2424         if (left > PAGE_SIZE - 1)
2425                 left = PAGE_SIZE - 1;
2426
2427         p = kbuf = memdup_user_nul(buffer, left);
2428         if (IS_ERR(kbuf))
2429                 return -EINVAL;
2430
2431         left -= proc_skip_spaces(&p);
2432         if (!left) {
2433                 err = -EINVAL;
2434                 goto out_free;
2435         }
2436
2437         err = proc_get_long(&p, &left, &lval, &neg,
2438                              proc_wspace_sep,
2439                              sizeof(proc_wspace_sep), NULL);
2440         if (err || neg) {
2441                 err = -EINVAL;
2442                 goto out_free;
2443         }
2444
2445         if (conv(&lval, tbl_data, 1, data)) {
2446                 err = -EINVAL;
2447                 goto out_free;
2448         }
2449
2450         if (!err && left)
2451                 left -= proc_skip_spaces(&p);
2452
2453 out_free:
2454         kfree(kbuf);
2455         if (err)
2456                 return -EINVAL;
2457
2458         return 0;
2459
2460         /* This is in keeping with old __do_proc_dointvec() */
2461 bail_early:
2462         *ppos += *lenp;
2463         return err;
2464 }
2465
2466 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2467                                size_t *lenp, loff_t *ppos,
2468                                int (*conv)(unsigned long *lvalp,
2469                                            unsigned int *valp,
2470                                            int write, void *data),
2471                                void *data)
2472 {
2473         unsigned long lval;
2474         int err = 0;
2475         size_t left;
2476
2477         left = *lenp;
2478
2479         if (conv(&lval, tbl_data, 0, data)) {
2480                 err = -EINVAL;
2481                 goto out;
2482         }
2483
2484         err = proc_put_long(&buffer, &left, lval, false);
2485         if (err || !left)
2486                 goto out;
2487
2488         err = proc_put_char(&buffer, &left, '\n');
2489
2490 out:
2491         *lenp -= left;
2492         *ppos += *lenp;
2493
2494         return err;
2495 }
2496
2497 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2498                                int write, void __user *buffer,
2499                                size_t *lenp, loff_t *ppos,
2500                                int (*conv)(unsigned long *lvalp,
2501                                            unsigned int *valp,
2502                                            int write, void *data),
2503                                void *data)
2504 {
2505         unsigned int *i, vleft;
2506
2507         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2508                 *lenp = 0;
2509                 return 0;
2510         }
2511
2512         i = (unsigned int *) tbl_data;
2513         vleft = table->maxlen / sizeof(*i);
2514
2515         /*
2516          * Arrays are not supported, keep this simple. *Do not* add
2517          * support for them.
2518          */
2519         if (vleft != 1) {
2520                 *lenp = 0;
2521                 return -EINVAL;
2522         }
2523
2524         if (!conv)
2525                 conv = do_proc_douintvec_conv;
2526
2527         if (write)
2528                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2529                                            conv, data);
2530         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2531 }
2532
2533 static int do_proc_douintvec(struct ctl_table *table, int write,
2534                              void __user *buffer, size_t *lenp, loff_t *ppos,
2535                              int (*conv)(unsigned long *lvalp,
2536                                          unsigned int *valp,
2537                                          int write, void *data),
2538                              void *data)
2539 {
2540         return __do_proc_douintvec(table->data, table, write,
2541                                    buffer, lenp, ppos, conv, data);
2542 }
2543
2544 /**
2545  * proc_dointvec - read a vector of integers
2546  * @table: the sysctl table
2547  * @write: %TRUE if this is a write to the sysctl file
2548  * @buffer: the user buffer
2549  * @lenp: the size of the user buffer
2550  * @ppos: file position
2551  *
2552  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2553  * values from/to the user buffer, treated as an ASCII string. 
2554  *
2555  * Returns 0 on success.
2556  */
2557 int proc_dointvec(struct ctl_table *table, int write,
2558                      void __user *buffer, size_t *lenp, loff_t *ppos)
2559 {
2560         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2561 }
2562
2563 /**
2564  * proc_douintvec - read a vector of unsigned integers
2565  * @table: the sysctl table
2566  * @write: %TRUE if this is a write to the sysctl file
2567  * @buffer: the user buffer
2568  * @lenp: the size of the user buffer
2569  * @ppos: file position
2570  *
2571  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2572  * values from/to the user buffer, treated as an ASCII string.
2573  *
2574  * Returns 0 on success.
2575  */
2576 int proc_douintvec(struct ctl_table *table, int write,
2577                      void __user *buffer, size_t *lenp, loff_t *ppos)
2578 {
2579         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2580                                  do_proc_douintvec_conv, NULL);
2581 }
2582
2583 /*
2584  * Taint values can only be increased
2585  * This means we can safely use a temporary.
2586  */
2587 static int proc_taint(struct ctl_table *table, int write,
2588                                void __user *buffer, size_t *lenp, loff_t *ppos)
2589 {
2590         struct ctl_table t;
2591         unsigned long tmptaint = get_taint();
2592         int err;
2593
2594         if (write && !capable(CAP_SYS_ADMIN))
2595                 return -EPERM;
2596
2597         t = *table;
2598         t.data = &tmptaint;
2599         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2600         if (err < 0)
2601                 return err;
2602
2603         if (write) {
2604                 /*
2605                  * Poor man's atomic or. Not worth adding a primitive
2606                  * to everyone's atomic.h for this
2607                  */
2608                 int i;
2609                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2610                         if ((tmptaint >> i) & 1)
2611                                 add_taint(i, LOCKDEP_STILL_OK);
2612                 }
2613         }
2614
2615         return err;
2616 }
2617
2618 #ifdef CONFIG_PRINTK
2619 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2620                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2621 {
2622         if (write && !capable(CAP_SYS_ADMIN))
2623                 return -EPERM;
2624
2625         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2626 }
2627 #endif
2628
2629 /**
2630  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2631  * @min: pointer to minimum allowable value
2632  * @max: pointer to maximum allowable value
2633  *
2634  * The do_proc_dointvec_minmax_conv_param structure provides the
2635  * minimum and maximum values for doing range checking for those sysctl
2636  * parameters that use the proc_dointvec_minmax() handler.
2637  */
2638 struct do_proc_dointvec_minmax_conv_param {
2639         int *min;
2640         int *max;
2641 };
2642
2643 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2644                                         int *valp,
2645                                         int write, void *data)
2646 {
2647         int tmp, ret;
2648         struct do_proc_dointvec_minmax_conv_param *param = data;
2649         /*
2650          * If writing, first do so via a temporary local int so we can
2651          * bounds-check it before touching *valp.
2652          */
2653         int *ip = write ? &tmp : valp;
2654
2655         ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
2656         if (ret)
2657                 return ret;
2658
2659         if (write) {
2660                 if ((param->min && *param->min > tmp) ||
2661                     (param->max && *param->max < tmp))
2662                         return -EINVAL;
2663                 *valp = tmp;
2664         }
2665
2666         return 0;
2667 }
2668
2669 /**
2670  * proc_dointvec_minmax - read a vector of integers with min/max values
2671  * @table: the sysctl table
2672  * @write: %TRUE if this is a write to the sysctl file
2673  * @buffer: the user buffer
2674  * @lenp: the size of the user buffer
2675  * @ppos: file position
2676  *
2677  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2678  * values from/to the user buffer, treated as an ASCII string.
2679  *
2680  * This routine will ensure the values are within the range specified by
2681  * table->extra1 (min) and table->extra2 (max).
2682  *
2683  * Returns 0 on success or -EINVAL on write when the range check fails.
2684  */
2685 int proc_dointvec_minmax(struct ctl_table *table, int write,
2686                   void __user *buffer, size_t *lenp, loff_t *ppos)
2687 {
2688         struct do_proc_dointvec_minmax_conv_param param = {
2689                 .min = (int *) table->extra1,
2690                 .max = (int *) table->extra2,
2691         };
2692         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2693                                 do_proc_dointvec_minmax_conv, &param);
2694 }
2695
2696 /**
2697  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2698  * @min: pointer to minimum allowable value
2699  * @max: pointer to maximum allowable value
2700  *
2701  * The do_proc_douintvec_minmax_conv_param structure provides the
2702  * minimum and maximum values for doing range checking for those sysctl
2703  * parameters that use the proc_douintvec_minmax() handler.
2704  */
2705 struct do_proc_douintvec_minmax_conv_param {
2706         unsigned int *min;
2707         unsigned int *max;
2708 };
2709
2710 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2711                                          unsigned int *valp,
2712                                          int write, void *data)
2713 {
2714         int ret;
2715         unsigned int tmp;
2716         struct do_proc_douintvec_minmax_conv_param *param = data;
2717         /* write via temporary local uint for bounds-checking */
2718         unsigned int *up = write ? &tmp : valp;
2719
2720         ret = do_proc_douintvec_conv(lvalp, up, write, data);
2721         if (ret)
2722                 return ret;
2723
2724         if (write) {
2725                 if ((param->min && *param->min > tmp) ||
2726                     (param->max && *param->max < tmp))
2727                         return -ERANGE;
2728
2729                 *valp = tmp;
2730         }
2731
2732         return 0;
2733 }
2734
2735 /**
2736  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2737  * @table: the sysctl table
2738  * @write: %TRUE if this is a write to the sysctl file
2739  * @buffer: the user buffer
2740  * @lenp: the size of the user buffer
2741  * @ppos: file position
2742  *
2743  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2744  * values from/to the user buffer, treated as an ASCII string. Negative
2745  * strings are not allowed.
2746  *
2747  * This routine will ensure the values are within the range specified by
2748  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2749  * check for UINT_MAX to avoid having to support wrap around uses from
2750  * userspace.
2751  *
2752  * Returns 0 on success or -ERANGE on write when the range check fails.
2753  */
2754 int proc_douintvec_minmax(struct ctl_table *table, int write,
2755                           void __user *buffer, size_t *lenp, loff_t *ppos)
2756 {
2757         struct do_proc_douintvec_minmax_conv_param param = {
2758                 .min = (unsigned int *) table->extra1,
2759                 .max = (unsigned int *) table->extra2,
2760         };
2761         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2762                                  do_proc_douintvec_minmax_conv, &param);
2763 }
2764
2765 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2766                                         unsigned int *valp,
2767                                         int write, void *data)
2768 {
2769         if (write) {
2770                 unsigned int val;
2771
2772                 val = round_pipe_size(*lvalp);
2773                 if (val == 0)
2774                         return -EINVAL;
2775
2776                 *valp = val;
2777         } else {
2778                 unsigned int val = *valp;
2779                 *lvalp = (unsigned long) val;
2780         }
2781
2782         return 0;
2783 }
2784
2785 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2786                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2787 {
2788         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2789                                  do_proc_dopipe_max_size_conv, NULL);
2790 }
2791
2792 static void validate_coredump_safety(void)
2793 {
2794 #ifdef CONFIG_COREDUMP
2795         if (suid_dumpable == SUID_DUMP_ROOT &&
2796             core_pattern[0] != '/' && core_pattern[0] != '|') {
2797                 printk(KERN_WARNING
2798 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2799 "Pipe handler or fully qualified core dump path required.\n"
2800 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2801                 );
2802         }
2803 #endif
2804 }
2805
2806 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2807                 void __user *buffer, size_t *lenp, loff_t *ppos)
2808 {
2809         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2810         if (!error)
2811                 validate_coredump_safety();
2812         return error;
2813 }
2814
2815 #ifdef CONFIG_COREDUMP
2816 static int proc_dostring_coredump(struct ctl_table *table, int write,
2817                   void __user *buffer, size_t *lenp, loff_t *ppos)
2818 {
2819         int error = proc_dostring(table, write, buffer, lenp, ppos);
2820         if (!error)
2821                 validate_coredump_safety();
2822         return error;
2823 }
2824 #endif
2825
2826 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2827                                      void __user *buffer,
2828                                      size_t *lenp, loff_t *ppos,
2829                                      unsigned long convmul,
2830                                      unsigned long convdiv)
2831 {
2832         unsigned long *i, *min, *max;
2833         int vleft, first = 1, err = 0;
2834         size_t left;
2835         char *kbuf = NULL, *p;
2836
2837         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2838                 *lenp = 0;
2839                 return 0;
2840         }
2841
2842         i = (unsigned long *) data;
2843         min = (unsigned long *) table->extra1;
2844         max = (unsigned long *) table->extra2;
2845         vleft = table->maxlen / sizeof(unsigned long);
2846         left = *lenp;
2847
2848         if (write) {
2849                 if (proc_first_pos_non_zero_ignore(ppos, table))
2850                         goto out;
2851
2852                 if (left > PAGE_SIZE - 1)
2853                         left = PAGE_SIZE - 1;
2854                 p = kbuf = memdup_user_nul(buffer, left);
2855                 if (IS_ERR(kbuf))
2856                         return PTR_ERR(kbuf);
2857         }
2858
2859         for (; left && vleft--; i++, first = 0) {
2860                 unsigned long val;
2861
2862                 if (write) {
2863                         bool neg;
2864
2865                         left -= proc_skip_spaces(&p);
2866                         if (!left)
2867                                 break;
2868
2869                         err = proc_get_long(&p, &left, &val, &neg,
2870                                              proc_wspace_sep,
2871                                              sizeof(proc_wspace_sep), NULL);
2872                         if (err)
2873                                 break;
2874                         if (neg)
2875                                 continue;
2876                         val = convmul * val / convdiv;
2877                         if ((min && val < *min) || (max && val > *max))
2878                                 continue;
2879                         *i = val;
2880                 } else {
2881                         val = convdiv * (*i) / convmul;
2882                         if (!first) {
2883                                 err = proc_put_char(&buffer, &left, '\t');
2884                                 if (err)
2885                                         break;
2886                         }
2887                         err = proc_put_long(&buffer, &left, val, false);
2888                         if (err)
2889                                 break;
2890                 }
2891         }
2892
2893         if (!write && !first && left && !err)
2894                 err = proc_put_char(&buffer, &left, '\n');
2895         if (write && !err)
2896                 left -= proc_skip_spaces(&p);
2897         if (write) {
2898                 kfree(kbuf);
2899                 if (first)
2900                         return err ? : -EINVAL;
2901         }
2902         *lenp -= left;
2903 out:
2904         *ppos += *lenp;
2905         return err;
2906 }
2907
2908 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2909                                      void __user *buffer,
2910                                      size_t *lenp, loff_t *ppos,
2911                                      unsigned long convmul,
2912                                      unsigned long convdiv)
2913 {
2914         return __do_proc_doulongvec_minmax(table->data, table, write,
2915                         buffer, lenp, ppos, convmul, convdiv);
2916 }
2917
2918 /**
2919  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2920  * @table: the sysctl table
2921  * @write: %TRUE if this is a write to the sysctl file
2922  * @buffer: the user buffer
2923  * @lenp: the size of the user buffer
2924  * @ppos: file position
2925  *
2926  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2927  * values from/to the user buffer, treated as an ASCII string.
2928  *
2929  * This routine will ensure the values are within the range specified by
2930  * table->extra1 (min) and table->extra2 (max).
2931  *
2932  * Returns 0 on success.
2933  */
2934 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2935                            void __user *buffer, size_t *lenp, loff_t *ppos)
2936 {
2937     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2938 }
2939
2940 /**
2941  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2942  * @table: the sysctl table
2943  * @write: %TRUE if this is a write to the sysctl file
2944  * @buffer: the user buffer
2945  * @lenp: the size of the user buffer
2946  * @ppos: file position
2947  *
2948  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2949  * values from/to the user buffer, treated as an ASCII string. The values
2950  * are treated as milliseconds, and converted to jiffies when they are stored.
2951  *
2952  * This routine will ensure the values are within the range specified by
2953  * table->extra1 (min) and table->extra2 (max).
2954  *
2955  * Returns 0 on success.
2956  */
2957 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2958                                       void __user *buffer,
2959                                       size_t *lenp, loff_t *ppos)
2960 {
2961     return do_proc_doulongvec_minmax(table, write, buffer,
2962                                      lenp, ppos, HZ, 1000l);
2963 }
2964
2965
2966 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2967                                          int *valp,
2968                                          int write, void *data)
2969 {
2970         if (write) {
2971                 if (*lvalp > INT_MAX / HZ)
2972                         return 1;
2973                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2974         } else {
2975                 int val = *valp;
2976                 unsigned long lval;
2977                 if (val < 0) {
2978                         *negp = true;
2979                         lval = -(unsigned long)val;
2980                 } else {
2981                         *negp = false;
2982                         lval = (unsigned long)val;
2983                 }
2984                 *lvalp = lval / HZ;
2985         }
2986         return 0;
2987 }
2988
2989 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2990                                                 int *valp,
2991                                                 int write, void *data)
2992 {
2993         if (write) {
2994                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2995                         return 1;
2996                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2997         } else {
2998                 int val = *valp;
2999                 unsigned long lval;
3000                 if (val < 0) {
3001                         *negp = true;
3002                         lval = -(unsigned long)val;
3003                 } else {
3004                         *negp = false;
3005                         lval = (unsigned long)val;
3006                 }
3007                 *lvalp = jiffies_to_clock_t(lval);
3008         }
3009         return 0;
3010 }
3011
3012 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
3013                                             int *valp,
3014                                             int write, void *data)
3015 {
3016         if (write) {
3017                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
3018
3019                 if (jif > INT_MAX)
3020                         return 1;
3021                 *valp = (int)jif;
3022         } else {
3023                 int val = *valp;
3024                 unsigned long lval;
3025                 if (val < 0) {
3026                         *negp = true;
3027                         lval = -(unsigned long)val;
3028                 } else {
3029                         *negp = false;
3030                         lval = (unsigned long)val;
3031                 }
3032                 *lvalp = jiffies_to_msecs(lval);
3033         }
3034         return 0;
3035 }
3036
3037 /**
3038  * proc_dointvec_jiffies - read a vector of integers as seconds
3039  * @table: the sysctl table
3040  * @write: %TRUE if this is a write to the sysctl file
3041  * @buffer: the user buffer
3042  * @lenp: the size of the user buffer
3043  * @ppos: file position
3044  *
3045  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3046  * values from/to the user buffer, treated as an ASCII string. 
3047  * The values read are assumed to be in seconds, and are converted into
3048  * jiffies.
3049  *
3050  * Returns 0 on success.
3051  */
3052 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3053                           void __user *buffer, size_t *lenp, loff_t *ppos)
3054 {
3055     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3056                             do_proc_dointvec_jiffies_conv,NULL);
3057 }
3058
3059 /**
3060  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3061  * @table: the sysctl table
3062  * @write: %TRUE if this is a write to the sysctl file
3063  * @buffer: the user buffer
3064  * @lenp: the size of the user buffer
3065  * @ppos: pointer to the file position
3066  *
3067  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3068  * values from/to the user buffer, treated as an ASCII string. 
3069  * The values read are assumed to be in 1/USER_HZ seconds, and 
3070  * are converted into jiffies.
3071  *
3072  * Returns 0 on success.
3073  */
3074 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3075                                  void __user *buffer, size_t *lenp, loff_t *ppos)
3076 {
3077     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3078                             do_proc_dointvec_userhz_jiffies_conv,NULL);
3079 }
3080
3081 /**
3082  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3083  * @table: the sysctl table
3084  * @write: %TRUE if this is a write to the sysctl file
3085  * @buffer: the user buffer
3086  * @lenp: the size of the user buffer
3087  * @ppos: file position
3088  * @ppos: the current position in the file
3089  *
3090  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3091  * values from/to the user buffer, treated as an ASCII string. 
3092  * The values read are assumed to be in 1/1000 seconds, and 
3093  * are converted into jiffies.
3094  *
3095  * Returns 0 on success.
3096  */
3097 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3098                              void __user *buffer, size_t *lenp, loff_t *ppos)
3099 {
3100         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3101                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3102 }
3103
3104 static int proc_do_cad_pid(struct ctl_table *table, int write,
3105                            void __user *buffer, size_t *lenp, loff_t *ppos)
3106 {
3107         struct pid *new_pid;
3108         pid_t tmp;
3109         int r;
3110
3111         tmp = pid_vnr(cad_pid);
3112
3113         r = __do_proc_dointvec(&tmp, table, write, buffer,
3114                                lenp, ppos, NULL, NULL);
3115         if (r || !write)
3116                 return r;
3117
3118         new_pid = find_get_pid(tmp);
3119         if (!new_pid)
3120                 return -ESRCH;
3121
3122         put_pid(xchg(&cad_pid, new_pid));
3123         return 0;
3124 }
3125
3126 /**
3127  * proc_do_large_bitmap - read/write from/to a large bitmap
3128  * @table: the sysctl table
3129  * @write: %TRUE if this is a write to the sysctl file
3130  * @buffer: the user buffer
3131  * @lenp: the size of the user buffer
3132  * @ppos: file position
3133  *
3134  * The bitmap is stored at table->data and the bitmap length (in bits)
3135  * in table->maxlen.
3136  *
3137  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3138  * large bitmaps may be represented in a compact manner. Writing into
3139  * the file will clear the bitmap then update it with the given input.
3140  *
3141  * Returns 0 on success.
3142  */
3143 int proc_do_large_bitmap(struct ctl_table *table, int write,
3144                          void __user *buffer, size_t *lenp, loff_t *ppos)
3145 {
3146         int err = 0;
3147         bool first = 1;
3148         size_t left = *lenp;
3149         unsigned long bitmap_len = table->maxlen;
3150         unsigned long *bitmap = *(unsigned long **) table->data;
3151         unsigned long *tmp_bitmap = NULL;
3152         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3153
3154         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3155                 *lenp = 0;
3156                 return 0;
3157         }
3158
3159         if (write) {
3160                 char *kbuf, *p;
3161
3162                 if (left > PAGE_SIZE - 1)
3163                         left = PAGE_SIZE - 1;
3164
3165                 p = kbuf = memdup_user_nul(buffer, left);
3166                 if (IS_ERR(kbuf))
3167                         return PTR_ERR(kbuf);
3168
3169                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3170                                      sizeof(unsigned long),
3171                                      GFP_KERNEL);
3172                 if (!tmp_bitmap) {
3173                         kfree(kbuf);
3174                         return -ENOMEM;
3175                 }
3176                 proc_skip_char(&p, &left, '\n');
3177                 while (!err && left) {
3178                         unsigned long val_a, val_b;
3179                         bool neg;
3180
3181                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3182                                              sizeof(tr_a), &c);
3183                         if (err)
3184                                 break;
3185                         if (val_a >= bitmap_len || neg) {
3186                                 err = -EINVAL;
3187                                 break;
3188                         }
3189
3190                         val_b = val_a;
3191                         if (left) {
3192                                 p++;
3193                                 left--;
3194                         }
3195
3196                         if (c == '-') {
3197                                 err = proc_get_long(&p, &left, &val_b,
3198                                                      &neg, tr_b, sizeof(tr_b),
3199                                                      &c);
3200                                 if (err)
3201                                         break;
3202                                 if (val_b >= bitmap_len || neg ||
3203                                     val_a > val_b) {
3204                                         err = -EINVAL;
3205                                         break;
3206                                 }
3207                                 if (left) {
3208                                         p++;
3209                                         left--;
3210                                 }
3211                         }
3212
3213                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3214                         first = 0;
3215                         proc_skip_char(&p, &left, '\n');
3216                 }
3217                 kfree(kbuf);
3218         } else {
3219                 unsigned long bit_a, bit_b = 0;
3220
3221                 while (left) {
3222                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3223                         if (bit_a >= bitmap_len)
3224                                 break;
3225                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3226                                                    bit_a + 1) - 1;
3227
3228                         if (!first) {
3229                                 err = proc_put_char(&buffer, &left, ',');
3230                                 if (err)
3231                                         break;
3232                         }
3233                         err = proc_put_long(&buffer, &left, bit_a, false);
3234                         if (err)
3235                                 break;
3236                         if (bit_a != bit_b) {
3237                                 err = proc_put_char(&buffer, &left, '-');
3238                                 if (err)
3239                                         break;
3240                                 err = proc_put_long(&buffer, &left, bit_b, false);
3241                                 if (err)
3242                                         break;
3243                         }
3244
3245                         first = 0; bit_b++;
3246                 }
3247                 if (!err)
3248                         err = proc_put_char(&buffer, &left, '\n');
3249         }
3250
3251         if (!err) {
3252                 if (write) {
3253                         if (*ppos)
3254                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3255                         else
3256                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3257                 }
3258                 *lenp -= left;
3259                 *ppos += *lenp;
3260         }
3261
3262         kfree(tmp_bitmap);
3263         return err;
3264 }
3265
3266 #else /* CONFIG_PROC_SYSCTL */
3267
3268 int proc_dostring(struct ctl_table *table, int write,
3269                   void __user *buffer, size_t *lenp, loff_t *ppos)
3270 {
3271         return -ENOSYS;
3272 }
3273
3274 int proc_dointvec(struct ctl_table *table, int write,
3275                   void __user *buffer, size_t *lenp, loff_t *ppos)
3276 {
3277         return -ENOSYS;
3278 }
3279
3280 int proc_douintvec(struct ctl_table *table, int write,
3281                   void __user *buffer, size_t *lenp, loff_t *ppos)
3282 {
3283         return -ENOSYS;
3284 }
3285
3286 int proc_dointvec_minmax(struct ctl_table *table, int write,
3287                     void __user *buffer, size_t *lenp, loff_t *ppos)
3288 {
3289         return -ENOSYS;
3290 }
3291
3292 int proc_douintvec_minmax(struct ctl_table *table, int write,
3293                           void __user *buffer, size_t *lenp, loff_t *ppos)
3294 {
3295         return -ENOSYS;
3296 }
3297
3298 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3299                     void __user *buffer, size_t *lenp, loff_t *ppos)
3300 {
3301         return -ENOSYS;
3302 }
3303
3304 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3305                     void __user *buffer, size_t *lenp, loff_t *ppos)
3306 {
3307         return -ENOSYS;
3308 }
3309
3310 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3311                              void __user *buffer, size_t *lenp, loff_t *ppos)
3312 {
3313         return -ENOSYS;
3314 }
3315
3316 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3317                     void __user *buffer, size_t *lenp, loff_t *ppos)
3318 {
3319         return -ENOSYS;
3320 }
3321
3322 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3323                                       void __user *buffer,
3324                                       size_t *lenp, loff_t *ppos)
3325 {
3326     return -ENOSYS;
3327 }
3328
3329 int proc_do_large_bitmap(struct ctl_table *table, int write,
3330                          void __user *buffer, size_t *lenp, loff_t *ppos)
3331 {
3332         return -ENOSYS;
3333 }
3334
3335 #endif /* CONFIG_PROC_SYSCTL */
3336
3337 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
3338 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3339                                           void __user *buffer, size_t *lenp,
3340                                           loff_t *ppos)
3341 {
3342         int ret, bpf_stats = *(int *)table->data;
3343         struct ctl_table tmp = *table;
3344
3345         if (write && !capable(CAP_SYS_ADMIN))
3346                 return -EPERM;
3347
3348         tmp.data = &bpf_stats;
3349         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3350         if (write && !ret) {
3351                 *(int *)table->data = bpf_stats;
3352                 if (bpf_stats)
3353                         static_branch_enable(&bpf_stats_enabled_key);
3354                 else
3355                         static_branch_disable(&bpf_stats_enabled_key);
3356         }
3357         return ret;
3358 }
3359 #endif
3360 /*
3361  * No sense putting this after each symbol definition, twice,
3362  * exception granted :-)
3363  */
3364 EXPORT_SYMBOL(proc_dointvec);
3365 EXPORT_SYMBOL(proc_douintvec);
3366 EXPORT_SYMBOL(proc_dointvec_jiffies);
3367 EXPORT_SYMBOL(proc_dointvec_minmax);
3368 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3369 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3370 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3371 EXPORT_SYMBOL(proc_dostring);
3372 EXPORT_SYMBOL(proc_doulongvec_minmax);
3373 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3374 EXPORT_SYMBOL(proc_do_large_bitmap);