Merge tag 'armsoc-late' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[sfrench/cifs-2.6.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
94 #include <linux/stackleak.h>
95 #endif
96 #ifdef CONFIG_LOCKUP_DETECTOR
97 #include <linux/nmi.h>
98 #endif
99
100 #if defined(CONFIG_SYSCTL)
101
102 /* External variables not in a header file. */
103 extern int suid_dumpable;
104 #ifdef CONFIG_COREDUMP
105 extern int core_uses_pid;
106 extern char core_pattern[];
107 extern unsigned int core_pipe_limit;
108 #endif
109 extern int pid_max;
110 extern int pid_max_min, pid_max_max;
111 extern int percpu_pagelist_fraction;
112 extern int latencytop_enabled;
113 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
114 #ifndef CONFIG_MMU
115 extern int sysctl_nr_trim_pages;
116 #endif
117
118 /* Constants used for minimum and  maximum */
119 #ifdef CONFIG_LOCKUP_DETECTOR
120 static int sixty = 60;
121 #endif
122
123 static int __maybe_unused neg_one = -1;
124
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused four = 4;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178
179 #ifdef CONFIG_PROC_SYSCTL
180
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  *      to be written, and multiple writes on the same sysctl file descriptor
186  *      will rewrite the sysctl value, regardless of file position. No warning
187  *      is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  *      not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  *      file position 0 and the value must be fully contained in the buffer
192  *      sent to the write syscall. If dealing with strings respect the file
193  *      position, but restrict this to the max length of the buffer, anything
194  *      passed the max lenght will be ignored. Multiple writes will append
195  *      to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201         SYSCTL_WRITES_LEGACY            = -1,
202         SYSCTL_WRITES_WARN              = 0,
203         SYSCTL_WRITES_STRICT            = 1,
204 };
205
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209                   void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211                                void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216                                 void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223                 void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226                 void __user *buffer, size_t *lenp, loff_t *ppos);
227 #ifdef CONFIG_BPF_SYSCALL
228 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
229                                           void __user *buffer, size_t *lenp,
230                                           loff_t *ppos);
231 #endif
232
233 #ifdef CONFIG_MAGIC_SYSRQ
234 /* Note: sysrq code uses its own private copy */
235 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
236
237 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
238                                 void __user *buffer, size_t *lenp,
239                                 loff_t *ppos)
240 {
241         int error;
242
243         error = proc_dointvec(table, write, buffer, lenp, ppos);
244         if (error)
245                 return error;
246
247         if (write)
248                 sysrq_toggle_support(__sysrq_enabled);
249
250         return 0;
251 }
252
253 #endif
254
255 static struct ctl_table kern_table[];
256 static struct ctl_table vm_table[];
257 static struct ctl_table fs_table[];
258 static struct ctl_table debug_table[];
259 static struct ctl_table dev_table[];
260 extern struct ctl_table random_table[];
261 #ifdef CONFIG_EPOLL
262 extern struct ctl_table epoll_table[];
263 #endif
264
265 #ifdef CONFIG_FW_LOADER_USER_HELPER
266 extern struct ctl_table firmware_config_table[];
267 #endif
268
269 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
270 int sysctl_legacy_va_layout;
271 #endif
272
273 /* The default sysctl tables: */
274
275 static struct ctl_table sysctl_base_table[] = {
276         {
277                 .procname       = "kernel",
278                 .mode           = 0555,
279                 .child          = kern_table,
280         },
281         {
282                 .procname       = "vm",
283                 .mode           = 0555,
284                 .child          = vm_table,
285         },
286         {
287                 .procname       = "fs",
288                 .mode           = 0555,
289                 .child          = fs_table,
290         },
291         {
292                 .procname       = "debug",
293                 .mode           = 0555,
294                 .child          = debug_table,
295         },
296         {
297                 .procname       = "dev",
298                 .mode           = 0555,
299                 .child          = dev_table,
300         },
301         { }
302 };
303
304 #ifdef CONFIG_SCHED_DEBUG
305 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
306 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
307 static int min_wakeup_granularity_ns;                   /* 0 usecs */
308 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
309 #ifdef CONFIG_SMP
310 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
311 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
312 #endif /* CONFIG_SMP */
313 #endif /* CONFIG_SCHED_DEBUG */
314
315 #ifdef CONFIG_COMPACTION
316 static int min_extfrag_threshold;
317 static int max_extfrag_threshold = 1000;
318 #endif
319
320 static struct ctl_table kern_table[] = {
321         {
322                 .procname       = "sched_child_runs_first",
323                 .data           = &sysctl_sched_child_runs_first,
324                 .maxlen         = sizeof(unsigned int),
325                 .mode           = 0644,
326                 .proc_handler   = proc_dointvec,
327         },
328 #ifdef CONFIG_SCHED_DEBUG
329         {
330                 .procname       = "sched_min_granularity_ns",
331                 .data           = &sysctl_sched_min_granularity,
332                 .maxlen         = sizeof(unsigned int),
333                 .mode           = 0644,
334                 .proc_handler   = sched_proc_update_handler,
335                 .extra1         = &min_sched_granularity_ns,
336                 .extra2         = &max_sched_granularity_ns,
337         },
338         {
339                 .procname       = "sched_latency_ns",
340                 .data           = &sysctl_sched_latency,
341                 .maxlen         = sizeof(unsigned int),
342                 .mode           = 0644,
343                 .proc_handler   = sched_proc_update_handler,
344                 .extra1         = &min_sched_granularity_ns,
345                 .extra2         = &max_sched_granularity_ns,
346         },
347         {
348                 .procname       = "sched_wakeup_granularity_ns",
349                 .data           = &sysctl_sched_wakeup_granularity,
350                 .maxlen         = sizeof(unsigned int),
351                 .mode           = 0644,
352                 .proc_handler   = sched_proc_update_handler,
353                 .extra1         = &min_wakeup_granularity_ns,
354                 .extra2         = &max_wakeup_granularity_ns,
355         },
356 #ifdef CONFIG_SMP
357         {
358                 .procname       = "sched_tunable_scaling",
359                 .data           = &sysctl_sched_tunable_scaling,
360                 .maxlen         = sizeof(enum sched_tunable_scaling),
361                 .mode           = 0644,
362                 .proc_handler   = sched_proc_update_handler,
363                 .extra1         = &min_sched_tunable_scaling,
364                 .extra2         = &max_sched_tunable_scaling,
365         },
366         {
367                 .procname       = "sched_migration_cost_ns",
368                 .data           = &sysctl_sched_migration_cost,
369                 .maxlen         = sizeof(unsigned int),
370                 .mode           = 0644,
371                 .proc_handler   = proc_dointvec,
372         },
373         {
374                 .procname       = "sched_nr_migrate",
375                 .data           = &sysctl_sched_nr_migrate,
376                 .maxlen         = sizeof(unsigned int),
377                 .mode           = 0644,
378                 .proc_handler   = proc_dointvec,
379         },
380 #ifdef CONFIG_SCHEDSTATS
381         {
382                 .procname       = "sched_schedstats",
383                 .data           = NULL,
384                 .maxlen         = sizeof(unsigned int),
385                 .mode           = 0644,
386                 .proc_handler   = sysctl_schedstats,
387                 .extra1         = &zero,
388                 .extra2         = &one,
389         },
390 #endif /* CONFIG_SCHEDSTATS */
391 #endif /* CONFIG_SMP */
392 #ifdef CONFIG_NUMA_BALANCING
393         {
394                 .procname       = "numa_balancing_scan_delay_ms",
395                 .data           = &sysctl_numa_balancing_scan_delay,
396                 .maxlen         = sizeof(unsigned int),
397                 .mode           = 0644,
398                 .proc_handler   = proc_dointvec,
399         },
400         {
401                 .procname       = "numa_balancing_scan_period_min_ms",
402                 .data           = &sysctl_numa_balancing_scan_period_min,
403                 .maxlen         = sizeof(unsigned int),
404                 .mode           = 0644,
405                 .proc_handler   = proc_dointvec,
406         },
407         {
408                 .procname       = "numa_balancing_scan_period_max_ms",
409                 .data           = &sysctl_numa_balancing_scan_period_max,
410                 .maxlen         = sizeof(unsigned int),
411                 .mode           = 0644,
412                 .proc_handler   = proc_dointvec,
413         },
414         {
415                 .procname       = "numa_balancing_scan_size_mb",
416                 .data           = &sysctl_numa_balancing_scan_size,
417                 .maxlen         = sizeof(unsigned int),
418                 .mode           = 0644,
419                 .proc_handler   = proc_dointvec_minmax,
420                 .extra1         = &one,
421         },
422         {
423                 .procname       = "numa_balancing",
424                 .data           = NULL, /* filled in by handler */
425                 .maxlen         = sizeof(unsigned int),
426                 .mode           = 0644,
427                 .proc_handler   = sysctl_numa_balancing,
428                 .extra1         = &zero,
429                 .extra2         = &one,
430         },
431 #endif /* CONFIG_NUMA_BALANCING */
432 #endif /* CONFIG_SCHED_DEBUG */
433         {
434                 .procname       = "sched_rt_period_us",
435                 .data           = &sysctl_sched_rt_period,
436                 .maxlen         = sizeof(unsigned int),
437                 .mode           = 0644,
438                 .proc_handler   = sched_rt_handler,
439         },
440         {
441                 .procname       = "sched_rt_runtime_us",
442                 .data           = &sysctl_sched_rt_runtime,
443                 .maxlen         = sizeof(int),
444                 .mode           = 0644,
445                 .proc_handler   = sched_rt_handler,
446         },
447         {
448                 .procname       = "sched_rr_timeslice_ms",
449                 .data           = &sysctl_sched_rr_timeslice,
450                 .maxlen         = sizeof(int),
451                 .mode           = 0644,
452                 .proc_handler   = sched_rr_handler,
453         },
454 #ifdef CONFIG_SCHED_AUTOGROUP
455         {
456                 .procname       = "sched_autogroup_enabled",
457                 .data           = &sysctl_sched_autogroup_enabled,
458                 .maxlen         = sizeof(unsigned int),
459                 .mode           = 0644,
460                 .proc_handler   = proc_dointvec_minmax,
461                 .extra1         = &zero,
462                 .extra2         = &one,
463         },
464 #endif
465 #ifdef CONFIG_CFS_BANDWIDTH
466         {
467                 .procname       = "sched_cfs_bandwidth_slice_us",
468                 .data           = &sysctl_sched_cfs_bandwidth_slice,
469                 .maxlen         = sizeof(unsigned int),
470                 .mode           = 0644,
471                 .proc_handler   = proc_dointvec_minmax,
472                 .extra1         = &one,
473         },
474 #endif
475 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
476         {
477                 .procname       = "sched_energy_aware",
478                 .data           = &sysctl_sched_energy_aware,
479                 .maxlen         = sizeof(unsigned int),
480                 .mode           = 0644,
481                 .proc_handler   = sched_energy_aware_handler,
482                 .extra1         = &zero,
483                 .extra2         = &one,
484         },
485 #endif
486 #ifdef CONFIG_PROVE_LOCKING
487         {
488                 .procname       = "prove_locking",
489                 .data           = &prove_locking,
490                 .maxlen         = sizeof(int),
491                 .mode           = 0644,
492                 .proc_handler   = proc_dointvec,
493         },
494 #endif
495 #ifdef CONFIG_LOCK_STAT
496         {
497                 .procname       = "lock_stat",
498                 .data           = &lock_stat,
499                 .maxlen         = sizeof(int),
500                 .mode           = 0644,
501                 .proc_handler   = proc_dointvec,
502         },
503 #endif
504         {
505                 .procname       = "panic",
506                 .data           = &panic_timeout,
507                 .maxlen         = sizeof(int),
508                 .mode           = 0644,
509                 .proc_handler   = proc_dointvec,
510         },
511 #ifdef CONFIG_COREDUMP
512         {
513                 .procname       = "core_uses_pid",
514                 .data           = &core_uses_pid,
515                 .maxlen         = sizeof(int),
516                 .mode           = 0644,
517                 .proc_handler   = proc_dointvec,
518         },
519         {
520                 .procname       = "core_pattern",
521                 .data           = core_pattern,
522                 .maxlen         = CORENAME_MAX_SIZE,
523                 .mode           = 0644,
524                 .proc_handler   = proc_dostring_coredump,
525         },
526         {
527                 .procname       = "core_pipe_limit",
528                 .data           = &core_pipe_limit,
529                 .maxlen         = sizeof(unsigned int),
530                 .mode           = 0644,
531                 .proc_handler   = proc_dointvec,
532         },
533 #endif
534 #ifdef CONFIG_PROC_SYSCTL
535         {
536                 .procname       = "tainted",
537                 .maxlen         = sizeof(long),
538                 .mode           = 0644,
539                 .proc_handler   = proc_taint,
540         },
541         {
542                 .procname       = "sysctl_writes_strict",
543                 .data           = &sysctl_writes_strict,
544                 .maxlen         = sizeof(int),
545                 .mode           = 0644,
546                 .proc_handler   = proc_dointvec_minmax,
547                 .extra1         = &neg_one,
548                 .extra2         = &one,
549         },
550 #endif
551 #ifdef CONFIG_LATENCYTOP
552         {
553                 .procname       = "latencytop",
554                 .data           = &latencytop_enabled,
555                 .maxlen         = sizeof(int),
556                 .mode           = 0644,
557                 .proc_handler   = sysctl_latencytop,
558         },
559 #endif
560 #ifdef CONFIG_BLK_DEV_INITRD
561         {
562                 .procname       = "real-root-dev",
563                 .data           = &real_root_dev,
564                 .maxlen         = sizeof(int),
565                 .mode           = 0644,
566                 .proc_handler   = proc_dointvec,
567         },
568 #endif
569         {
570                 .procname       = "print-fatal-signals",
571                 .data           = &print_fatal_signals,
572                 .maxlen         = sizeof(int),
573                 .mode           = 0644,
574                 .proc_handler   = proc_dointvec,
575         },
576 #ifdef CONFIG_SPARC
577         {
578                 .procname       = "reboot-cmd",
579                 .data           = reboot_command,
580                 .maxlen         = 256,
581                 .mode           = 0644,
582                 .proc_handler   = proc_dostring,
583         },
584         {
585                 .procname       = "stop-a",
586                 .data           = &stop_a_enabled,
587                 .maxlen         = sizeof (int),
588                 .mode           = 0644,
589                 .proc_handler   = proc_dointvec,
590         },
591         {
592                 .procname       = "scons-poweroff",
593                 .data           = &scons_pwroff,
594                 .maxlen         = sizeof (int),
595                 .mode           = 0644,
596                 .proc_handler   = proc_dointvec,
597         },
598 #endif
599 #ifdef CONFIG_SPARC64
600         {
601                 .procname       = "tsb-ratio",
602                 .data           = &sysctl_tsb_ratio,
603                 .maxlen         = sizeof (int),
604                 .mode           = 0644,
605                 .proc_handler   = proc_dointvec,
606         },
607 #endif
608 #ifdef __hppa__
609         {
610                 .procname       = "soft-power",
611                 .data           = &pwrsw_enabled,
612                 .maxlen         = sizeof (int),
613                 .mode           = 0644,
614                 .proc_handler   = proc_dointvec,
615         },
616 #endif
617 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
618         {
619                 .procname       = "unaligned-trap",
620                 .data           = &unaligned_enabled,
621                 .maxlen         = sizeof (int),
622                 .mode           = 0644,
623                 .proc_handler   = proc_dointvec,
624         },
625 #endif
626         {
627                 .procname       = "ctrl-alt-del",
628                 .data           = &C_A_D,
629                 .maxlen         = sizeof(int),
630                 .mode           = 0644,
631                 .proc_handler   = proc_dointvec,
632         },
633 #ifdef CONFIG_FUNCTION_TRACER
634         {
635                 .procname       = "ftrace_enabled",
636                 .data           = &ftrace_enabled,
637                 .maxlen         = sizeof(int),
638                 .mode           = 0644,
639                 .proc_handler   = ftrace_enable_sysctl,
640         },
641 #endif
642 #ifdef CONFIG_STACK_TRACER
643         {
644                 .procname       = "stack_tracer_enabled",
645                 .data           = &stack_tracer_enabled,
646                 .maxlen         = sizeof(int),
647                 .mode           = 0644,
648                 .proc_handler   = stack_trace_sysctl,
649         },
650 #endif
651 #ifdef CONFIG_TRACING
652         {
653                 .procname       = "ftrace_dump_on_oops",
654                 .data           = &ftrace_dump_on_oops,
655                 .maxlen         = sizeof(int),
656                 .mode           = 0644,
657                 .proc_handler   = proc_dointvec,
658         },
659         {
660                 .procname       = "traceoff_on_warning",
661                 .data           = &__disable_trace_on_warning,
662                 .maxlen         = sizeof(__disable_trace_on_warning),
663                 .mode           = 0644,
664                 .proc_handler   = proc_dointvec,
665         },
666         {
667                 .procname       = "tracepoint_printk",
668                 .data           = &tracepoint_printk,
669                 .maxlen         = sizeof(tracepoint_printk),
670                 .mode           = 0644,
671                 .proc_handler   = tracepoint_printk_sysctl,
672         },
673 #endif
674 #ifdef CONFIG_KEXEC_CORE
675         {
676                 .procname       = "kexec_load_disabled",
677                 .data           = &kexec_load_disabled,
678                 .maxlen         = sizeof(int),
679                 .mode           = 0644,
680                 /* only handle a transition from default "0" to "1" */
681                 .proc_handler   = proc_dointvec_minmax,
682                 .extra1         = &one,
683                 .extra2         = &one,
684         },
685 #endif
686 #ifdef CONFIG_MODULES
687         {
688                 .procname       = "modprobe",
689                 .data           = &modprobe_path,
690                 .maxlen         = KMOD_PATH_LEN,
691                 .mode           = 0644,
692                 .proc_handler   = proc_dostring,
693         },
694         {
695                 .procname       = "modules_disabled",
696                 .data           = &modules_disabled,
697                 .maxlen         = sizeof(int),
698                 .mode           = 0644,
699                 /* only handle a transition from default "0" to "1" */
700                 .proc_handler   = proc_dointvec_minmax,
701                 .extra1         = &one,
702                 .extra2         = &one,
703         },
704 #endif
705 #ifdef CONFIG_UEVENT_HELPER
706         {
707                 .procname       = "hotplug",
708                 .data           = &uevent_helper,
709                 .maxlen         = UEVENT_HELPER_PATH_LEN,
710                 .mode           = 0644,
711                 .proc_handler   = proc_dostring,
712         },
713 #endif
714 #ifdef CONFIG_CHR_DEV_SG
715         {
716                 .procname       = "sg-big-buff",
717                 .data           = &sg_big_buff,
718                 .maxlen         = sizeof (int),
719                 .mode           = 0444,
720                 .proc_handler   = proc_dointvec,
721         },
722 #endif
723 #ifdef CONFIG_BSD_PROCESS_ACCT
724         {
725                 .procname       = "acct",
726                 .data           = &acct_parm,
727                 .maxlen         = 3*sizeof(int),
728                 .mode           = 0644,
729                 .proc_handler   = proc_dointvec,
730         },
731 #endif
732 #ifdef CONFIG_MAGIC_SYSRQ
733         {
734                 .procname       = "sysrq",
735                 .data           = &__sysrq_enabled,
736                 .maxlen         = sizeof (int),
737                 .mode           = 0644,
738                 .proc_handler   = sysrq_sysctl_handler,
739         },
740 #endif
741 #ifdef CONFIG_PROC_SYSCTL
742         {
743                 .procname       = "cad_pid",
744                 .data           = NULL,
745                 .maxlen         = sizeof (int),
746                 .mode           = 0600,
747                 .proc_handler   = proc_do_cad_pid,
748         },
749 #endif
750         {
751                 .procname       = "threads-max",
752                 .data           = NULL,
753                 .maxlen         = sizeof(int),
754                 .mode           = 0644,
755                 .proc_handler   = sysctl_max_threads,
756         },
757         {
758                 .procname       = "random",
759                 .mode           = 0555,
760                 .child          = random_table,
761         },
762         {
763                 .procname       = "usermodehelper",
764                 .mode           = 0555,
765                 .child          = usermodehelper_table,
766         },
767 #ifdef CONFIG_FW_LOADER_USER_HELPER
768         {
769                 .procname       = "firmware_config",
770                 .mode           = 0555,
771                 .child          = firmware_config_table,
772         },
773 #endif
774         {
775                 .procname       = "overflowuid",
776                 .data           = &overflowuid,
777                 .maxlen         = sizeof(int),
778                 .mode           = 0644,
779                 .proc_handler   = proc_dointvec_minmax,
780                 .extra1         = &minolduid,
781                 .extra2         = &maxolduid,
782         },
783         {
784                 .procname       = "overflowgid",
785                 .data           = &overflowgid,
786                 .maxlen         = sizeof(int),
787                 .mode           = 0644,
788                 .proc_handler   = proc_dointvec_minmax,
789                 .extra1         = &minolduid,
790                 .extra2         = &maxolduid,
791         },
792 #ifdef CONFIG_S390
793 #ifdef CONFIG_MATHEMU
794         {
795                 .procname       = "ieee_emulation_warnings",
796                 .data           = &sysctl_ieee_emulation_warnings,
797                 .maxlen         = sizeof(int),
798                 .mode           = 0644,
799                 .proc_handler   = proc_dointvec,
800         },
801 #endif
802         {
803                 .procname       = "userprocess_debug",
804                 .data           = &show_unhandled_signals,
805                 .maxlen         = sizeof(int),
806                 .mode           = 0644,
807                 .proc_handler   = proc_dointvec,
808         },
809 #endif
810         {
811                 .procname       = "pid_max",
812                 .data           = &pid_max,
813                 .maxlen         = sizeof (int),
814                 .mode           = 0644,
815                 .proc_handler   = proc_dointvec_minmax,
816                 .extra1         = &pid_max_min,
817                 .extra2         = &pid_max_max,
818         },
819         {
820                 .procname       = "panic_on_oops",
821                 .data           = &panic_on_oops,
822                 .maxlen         = sizeof(int),
823                 .mode           = 0644,
824                 .proc_handler   = proc_dointvec,
825         },
826         {
827                 .procname       = "panic_print",
828                 .data           = &panic_print,
829                 .maxlen         = sizeof(unsigned long),
830                 .mode           = 0644,
831                 .proc_handler   = proc_doulongvec_minmax,
832         },
833 #if defined CONFIG_PRINTK
834         {
835                 .procname       = "printk",
836                 .data           = &console_loglevel,
837                 .maxlen         = 4*sizeof(int),
838                 .mode           = 0644,
839                 .proc_handler   = proc_dointvec,
840         },
841         {
842                 .procname       = "printk_ratelimit",
843                 .data           = &printk_ratelimit_state.interval,
844                 .maxlen         = sizeof(int),
845                 .mode           = 0644,
846                 .proc_handler   = proc_dointvec_jiffies,
847         },
848         {
849                 .procname       = "printk_ratelimit_burst",
850                 .data           = &printk_ratelimit_state.burst,
851                 .maxlen         = sizeof(int),
852                 .mode           = 0644,
853                 .proc_handler   = proc_dointvec,
854         },
855         {
856                 .procname       = "printk_delay",
857                 .data           = &printk_delay_msec,
858                 .maxlen         = sizeof(int),
859                 .mode           = 0644,
860                 .proc_handler   = proc_dointvec_minmax,
861                 .extra1         = &zero,
862                 .extra2         = &ten_thousand,
863         },
864         {
865                 .procname       = "printk_devkmsg",
866                 .data           = devkmsg_log_str,
867                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
868                 .mode           = 0644,
869                 .proc_handler   = devkmsg_sysctl_set_loglvl,
870         },
871         {
872                 .procname       = "dmesg_restrict",
873                 .data           = &dmesg_restrict,
874                 .maxlen         = sizeof(int),
875                 .mode           = 0644,
876                 .proc_handler   = proc_dointvec_minmax_sysadmin,
877                 .extra1         = &zero,
878                 .extra2         = &one,
879         },
880         {
881                 .procname       = "kptr_restrict",
882                 .data           = &kptr_restrict,
883                 .maxlen         = sizeof(int),
884                 .mode           = 0644,
885                 .proc_handler   = proc_dointvec_minmax_sysadmin,
886                 .extra1         = &zero,
887                 .extra2         = &two,
888         },
889 #endif
890         {
891                 .procname       = "ngroups_max",
892                 .data           = &ngroups_max,
893                 .maxlen         = sizeof (int),
894                 .mode           = 0444,
895                 .proc_handler   = proc_dointvec,
896         },
897         {
898                 .procname       = "cap_last_cap",
899                 .data           = (void *)&cap_last_cap,
900                 .maxlen         = sizeof(int),
901                 .mode           = 0444,
902                 .proc_handler   = proc_dointvec,
903         },
904 #if defined(CONFIG_LOCKUP_DETECTOR)
905         {
906                 .procname       = "watchdog",
907                 .data           = &watchdog_user_enabled,
908                 .maxlen         = sizeof(int),
909                 .mode           = 0644,
910                 .proc_handler   = proc_watchdog,
911                 .extra1         = &zero,
912                 .extra2         = &one,
913         },
914         {
915                 .procname       = "watchdog_thresh",
916                 .data           = &watchdog_thresh,
917                 .maxlen         = sizeof(int),
918                 .mode           = 0644,
919                 .proc_handler   = proc_watchdog_thresh,
920                 .extra1         = &zero,
921                 .extra2         = &sixty,
922         },
923         {
924                 .procname       = "nmi_watchdog",
925                 .data           = &nmi_watchdog_user_enabled,
926                 .maxlen         = sizeof(int),
927                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
928                 .proc_handler   = proc_nmi_watchdog,
929                 .extra1         = &zero,
930                 .extra2         = &one,
931         },
932         {
933                 .procname       = "watchdog_cpumask",
934                 .data           = &watchdog_cpumask_bits,
935                 .maxlen         = NR_CPUS,
936                 .mode           = 0644,
937                 .proc_handler   = proc_watchdog_cpumask,
938         },
939 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
940         {
941                 .procname       = "soft_watchdog",
942                 .data           = &soft_watchdog_user_enabled,
943                 .maxlen         = sizeof(int),
944                 .mode           = 0644,
945                 .proc_handler   = proc_soft_watchdog,
946                 .extra1         = &zero,
947                 .extra2         = &one,
948         },
949         {
950                 .procname       = "softlockup_panic",
951                 .data           = &softlockup_panic,
952                 .maxlen         = sizeof(int),
953                 .mode           = 0644,
954                 .proc_handler   = proc_dointvec_minmax,
955                 .extra1         = &zero,
956                 .extra2         = &one,
957         },
958 #ifdef CONFIG_SMP
959         {
960                 .procname       = "softlockup_all_cpu_backtrace",
961                 .data           = &sysctl_softlockup_all_cpu_backtrace,
962                 .maxlen         = sizeof(int),
963                 .mode           = 0644,
964                 .proc_handler   = proc_dointvec_minmax,
965                 .extra1         = &zero,
966                 .extra2         = &one,
967         },
968 #endif /* CONFIG_SMP */
969 #endif
970 #ifdef CONFIG_HARDLOCKUP_DETECTOR
971         {
972                 .procname       = "hardlockup_panic",
973                 .data           = &hardlockup_panic,
974                 .maxlen         = sizeof(int),
975                 .mode           = 0644,
976                 .proc_handler   = proc_dointvec_minmax,
977                 .extra1         = &zero,
978                 .extra2         = &one,
979         },
980 #ifdef CONFIG_SMP
981         {
982                 .procname       = "hardlockup_all_cpu_backtrace",
983                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
984                 .maxlen         = sizeof(int),
985                 .mode           = 0644,
986                 .proc_handler   = proc_dointvec_minmax,
987                 .extra1         = &zero,
988                 .extra2         = &one,
989         },
990 #endif /* CONFIG_SMP */
991 #endif
992 #endif
993
994 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
995         {
996                 .procname       = "unknown_nmi_panic",
997                 .data           = &unknown_nmi_panic,
998                 .maxlen         = sizeof (int),
999                 .mode           = 0644,
1000                 .proc_handler   = proc_dointvec,
1001         },
1002 #endif
1003 #if defined(CONFIG_X86)
1004         {
1005                 .procname       = "panic_on_unrecovered_nmi",
1006                 .data           = &panic_on_unrecovered_nmi,
1007                 .maxlen         = sizeof(int),
1008                 .mode           = 0644,
1009                 .proc_handler   = proc_dointvec,
1010         },
1011         {
1012                 .procname       = "panic_on_io_nmi",
1013                 .data           = &panic_on_io_nmi,
1014                 .maxlen         = sizeof(int),
1015                 .mode           = 0644,
1016                 .proc_handler   = proc_dointvec,
1017         },
1018 #ifdef CONFIG_DEBUG_STACKOVERFLOW
1019         {
1020                 .procname       = "panic_on_stackoverflow",
1021                 .data           = &sysctl_panic_on_stackoverflow,
1022                 .maxlen         = sizeof(int),
1023                 .mode           = 0644,
1024                 .proc_handler   = proc_dointvec,
1025         },
1026 #endif
1027         {
1028                 .procname       = "bootloader_type",
1029                 .data           = &bootloader_type,
1030                 .maxlen         = sizeof (int),
1031                 .mode           = 0444,
1032                 .proc_handler   = proc_dointvec,
1033         },
1034         {
1035                 .procname       = "bootloader_version",
1036                 .data           = &bootloader_version,
1037                 .maxlen         = sizeof (int),
1038                 .mode           = 0444,
1039                 .proc_handler   = proc_dointvec,
1040         },
1041         {
1042                 .procname       = "io_delay_type",
1043                 .data           = &io_delay_type,
1044                 .maxlen         = sizeof(int),
1045                 .mode           = 0644,
1046                 .proc_handler   = proc_dointvec,
1047         },
1048 #endif
1049 #if defined(CONFIG_MMU)
1050         {
1051                 .procname       = "randomize_va_space",
1052                 .data           = &randomize_va_space,
1053                 .maxlen         = sizeof(int),
1054                 .mode           = 0644,
1055                 .proc_handler   = proc_dointvec,
1056         },
1057 #endif
1058 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1059         {
1060                 .procname       = "spin_retry",
1061                 .data           = &spin_retry,
1062                 .maxlen         = sizeof (int),
1063                 .mode           = 0644,
1064                 .proc_handler   = proc_dointvec,
1065         },
1066 #endif
1067 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1068         {
1069                 .procname       = "acpi_video_flags",
1070                 .data           = &acpi_realmode_flags,
1071                 .maxlen         = sizeof (unsigned long),
1072                 .mode           = 0644,
1073                 .proc_handler   = proc_doulongvec_minmax,
1074         },
1075 #endif
1076 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1077         {
1078                 .procname       = "ignore-unaligned-usertrap",
1079                 .data           = &no_unaligned_warning,
1080                 .maxlen         = sizeof (int),
1081                 .mode           = 0644,
1082                 .proc_handler   = proc_dointvec,
1083         },
1084 #endif
1085 #ifdef CONFIG_IA64
1086         {
1087                 .procname       = "unaligned-dump-stack",
1088                 .data           = &unaligned_dump_stack,
1089                 .maxlen         = sizeof (int),
1090                 .mode           = 0644,
1091                 .proc_handler   = proc_dointvec,
1092         },
1093 #endif
1094 #ifdef CONFIG_DETECT_HUNG_TASK
1095         {
1096                 .procname       = "hung_task_panic",
1097                 .data           = &sysctl_hung_task_panic,
1098                 .maxlen         = sizeof(int),
1099                 .mode           = 0644,
1100                 .proc_handler   = proc_dointvec_minmax,
1101                 .extra1         = &zero,
1102                 .extra2         = &one,
1103         },
1104         {
1105                 .procname       = "hung_task_check_count",
1106                 .data           = &sysctl_hung_task_check_count,
1107                 .maxlen         = sizeof(int),
1108                 .mode           = 0644,
1109                 .proc_handler   = proc_dointvec_minmax,
1110                 .extra1         = &zero,
1111         },
1112         {
1113                 .procname       = "hung_task_timeout_secs",
1114                 .data           = &sysctl_hung_task_timeout_secs,
1115                 .maxlen         = sizeof(unsigned long),
1116                 .mode           = 0644,
1117                 .proc_handler   = proc_dohung_task_timeout_secs,
1118                 .extra2         = &hung_task_timeout_max,
1119         },
1120         {
1121                 .procname       = "hung_task_check_interval_secs",
1122                 .data           = &sysctl_hung_task_check_interval_secs,
1123                 .maxlen         = sizeof(unsigned long),
1124                 .mode           = 0644,
1125                 .proc_handler   = proc_dohung_task_timeout_secs,
1126                 .extra2         = &hung_task_timeout_max,
1127         },
1128         {
1129                 .procname       = "hung_task_warnings",
1130                 .data           = &sysctl_hung_task_warnings,
1131                 .maxlen         = sizeof(int),
1132                 .mode           = 0644,
1133                 .proc_handler   = proc_dointvec_minmax,
1134                 .extra1         = &neg_one,
1135         },
1136 #endif
1137 #ifdef CONFIG_RT_MUTEXES
1138         {
1139                 .procname       = "max_lock_depth",
1140                 .data           = &max_lock_depth,
1141                 .maxlen         = sizeof(int),
1142                 .mode           = 0644,
1143                 .proc_handler   = proc_dointvec,
1144         },
1145 #endif
1146         {
1147                 .procname       = "poweroff_cmd",
1148                 .data           = &poweroff_cmd,
1149                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1150                 .mode           = 0644,
1151                 .proc_handler   = proc_dostring,
1152         },
1153 #ifdef CONFIG_KEYS
1154         {
1155                 .procname       = "keys",
1156                 .mode           = 0555,
1157                 .child          = key_sysctls,
1158         },
1159 #endif
1160 #ifdef CONFIG_PERF_EVENTS
1161         /*
1162          * User-space scripts rely on the existence of this file
1163          * as a feature check for perf_events being enabled.
1164          *
1165          * So it's an ABI, do not remove!
1166          */
1167         {
1168                 .procname       = "perf_event_paranoid",
1169                 .data           = &sysctl_perf_event_paranoid,
1170                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1171                 .mode           = 0644,
1172                 .proc_handler   = proc_dointvec,
1173         },
1174         {
1175                 .procname       = "perf_event_mlock_kb",
1176                 .data           = &sysctl_perf_event_mlock,
1177                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1178                 .mode           = 0644,
1179                 .proc_handler   = proc_dointvec,
1180         },
1181         {
1182                 .procname       = "perf_event_max_sample_rate",
1183                 .data           = &sysctl_perf_event_sample_rate,
1184                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1185                 .mode           = 0644,
1186                 .proc_handler   = perf_proc_update_handler,
1187                 .extra1         = &one,
1188         },
1189         {
1190                 .procname       = "perf_cpu_time_max_percent",
1191                 .data           = &sysctl_perf_cpu_time_max_percent,
1192                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1193                 .mode           = 0644,
1194                 .proc_handler   = perf_cpu_time_max_percent_handler,
1195                 .extra1         = &zero,
1196                 .extra2         = &one_hundred,
1197         },
1198         {
1199                 .procname       = "perf_event_max_stack",
1200                 .data           = &sysctl_perf_event_max_stack,
1201                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1202                 .mode           = 0644,
1203                 .proc_handler   = perf_event_max_stack_handler,
1204                 .extra1         = &zero,
1205                 .extra2         = &six_hundred_forty_kb,
1206         },
1207         {
1208                 .procname       = "perf_event_max_contexts_per_stack",
1209                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1210                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1211                 .mode           = 0644,
1212                 .proc_handler   = perf_event_max_stack_handler,
1213                 .extra1         = &zero,
1214                 .extra2         = &one_thousand,
1215         },
1216 #endif
1217         {
1218                 .procname       = "panic_on_warn",
1219                 .data           = &panic_on_warn,
1220                 .maxlen         = sizeof(int),
1221                 .mode           = 0644,
1222                 .proc_handler   = proc_dointvec_minmax,
1223                 .extra1         = &zero,
1224                 .extra2         = &one,
1225         },
1226 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1227         {
1228                 .procname       = "timer_migration",
1229                 .data           = &sysctl_timer_migration,
1230                 .maxlen         = sizeof(unsigned int),
1231                 .mode           = 0644,
1232                 .proc_handler   = timer_migration_handler,
1233                 .extra1         = &zero,
1234                 .extra2         = &one,
1235         },
1236 #endif
1237 #ifdef CONFIG_BPF_SYSCALL
1238         {
1239                 .procname       = "unprivileged_bpf_disabled",
1240                 .data           = &sysctl_unprivileged_bpf_disabled,
1241                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1242                 .mode           = 0644,
1243                 /* only handle a transition from default "0" to "1" */
1244                 .proc_handler   = proc_dointvec_minmax,
1245                 .extra1         = &one,
1246                 .extra2         = &one,
1247         },
1248         {
1249                 .procname       = "bpf_stats_enabled",
1250                 .data           = &sysctl_bpf_stats_enabled,
1251                 .maxlen         = sizeof(sysctl_bpf_stats_enabled),
1252                 .mode           = 0644,
1253                 .proc_handler   = proc_dointvec_minmax_bpf_stats,
1254                 .extra1         = &zero,
1255                 .extra2         = &one,
1256         },
1257 #endif
1258 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1259         {
1260                 .procname       = "panic_on_rcu_stall",
1261                 .data           = &sysctl_panic_on_rcu_stall,
1262                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1263                 .mode           = 0644,
1264                 .proc_handler   = proc_dointvec_minmax,
1265                 .extra1         = &zero,
1266                 .extra2         = &one,
1267         },
1268 #endif
1269 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1270         {
1271                 .procname       = "stack_erasing",
1272                 .data           = NULL,
1273                 .maxlen         = sizeof(int),
1274                 .mode           = 0600,
1275                 .proc_handler   = stack_erasing_sysctl,
1276                 .extra1         = &zero,
1277                 .extra2         = &one,
1278         },
1279 #endif
1280         { }
1281 };
1282
1283 static struct ctl_table vm_table[] = {
1284         {
1285                 .procname       = "overcommit_memory",
1286                 .data           = &sysctl_overcommit_memory,
1287                 .maxlen         = sizeof(sysctl_overcommit_memory),
1288                 .mode           = 0644,
1289                 .proc_handler   = proc_dointvec_minmax,
1290                 .extra1         = &zero,
1291                 .extra2         = &two,
1292         },
1293         {
1294                 .procname       = "panic_on_oom",
1295                 .data           = &sysctl_panic_on_oom,
1296                 .maxlen         = sizeof(sysctl_panic_on_oom),
1297                 .mode           = 0644,
1298                 .proc_handler   = proc_dointvec_minmax,
1299                 .extra1         = &zero,
1300                 .extra2         = &two,
1301         },
1302         {
1303                 .procname       = "oom_kill_allocating_task",
1304                 .data           = &sysctl_oom_kill_allocating_task,
1305                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1306                 .mode           = 0644,
1307                 .proc_handler   = proc_dointvec,
1308         },
1309         {
1310                 .procname       = "oom_dump_tasks",
1311                 .data           = &sysctl_oom_dump_tasks,
1312                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1313                 .mode           = 0644,
1314                 .proc_handler   = proc_dointvec,
1315         },
1316         {
1317                 .procname       = "overcommit_ratio",
1318                 .data           = &sysctl_overcommit_ratio,
1319                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1320                 .mode           = 0644,
1321                 .proc_handler   = overcommit_ratio_handler,
1322         },
1323         {
1324                 .procname       = "overcommit_kbytes",
1325                 .data           = &sysctl_overcommit_kbytes,
1326                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1327                 .mode           = 0644,
1328                 .proc_handler   = overcommit_kbytes_handler,
1329         },
1330         {
1331                 .procname       = "page-cluster", 
1332                 .data           = &page_cluster,
1333                 .maxlen         = sizeof(int),
1334                 .mode           = 0644,
1335                 .proc_handler   = proc_dointvec_minmax,
1336                 .extra1         = &zero,
1337         },
1338         {
1339                 .procname       = "dirty_background_ratio",
1340                 .data           = &dirty_background_ratio,
1341                 .maxlen         = sizeof(dirty_background_ratio),
1342                 .mode           = 0644,
1343                 .proc_handler   = dirty_background_ratio_handler,
1344                 .extra1         = &zero,
1345                 .extra2         = &one_hundred,
1346         },
1347         {
1348                 .procname       = "dirty_background_bytes",
1349                 .data           = &dirty_background_bytes,
1350                 .maxlen         = sizeof(dirty_background_bytes),
1351                 .mode           = 0644,
1352                 .proc_handler   = dirty_background_bytes_handler,
1353                 .extra1         = &one_ul,
1354         },
1355         {
1356                 .procname       = "dirty_ratio",
1357                 .data           = &vm_dirty_ratio,
1358                 .maxlen         = sizeof(vm_dirty_ratio),
1359                 .mode           = 0644,
1360                 .proc_handler   = dirty_ratio_handler,
1361                 .extra1         = &zero,
1362                 .extra2         = &one_hundred,
1363         },
1364         {
1365                 .procname       = "dirty_bytes",
1366                 .data           = &vm_dirty_bytes,
1367                 .maxlen         = sizeof(vm_dirty_bytes),
1368                 .mode           = 0644,
1369                 .proc_handler   = dirty_bytes_handler,
1370                 .extra1         = &dirty_bytes_min,
1371         },
1372         {
1373                 .procname       = "dirty_writeback_centisecs",
1374                 .data           = &dirty_writeback_interval,
1375                 .maxlen         = sizeof(dirty_writeback_interval),
1376                 .mode           = 0644,
1377                 .proc_handler   = dirty_writeback_centisecs_handler,
1378         },
1379         {
1380                 .procname       = "dirty_expire_centisecs",
1381                 .data           = &dirty_expire_interval,
1382                 .maxlen         = sizeof(dirty_expire_interval),
1383                 .mode           = 0644,
1384                 .proc_handler   = proc_dointvec_minmax,
1385                 .extra1         = &zero,
1386         },
1387         {
1388                 .procname       = "dirtytime_expire_seconds",
1389                 .data           = &dirtytime_expire_interval,
1390                 .maxlen         = sizeof(dirtytime_expire_interval),
1391                 .mode           = 0644,
1392                 .proc_handler   = dirtytime_interval_handler,
1393                 .extra1         = &zero,
1394         },
1395         {
1396                 .procname       = "swappiness",
1397                 .data           = &vm_swappiness,
1398                 .maxlen         = sizeof(vm_swappiness),
1399                 .mode           = 0644,
1400                 .proc_handler   = proc_dointvec_minmax,
1401                 .extra1         = &zero,
1402                 .extra2         = &one_hundred,
1403         },
1404 #ifdef CONFIG_HUGETLB_PAGE
1405         {
1406                 .procname       = "nr_hugepages",
1407                 .data           = NULL,
1408                 .maxlen         = sizeof(unsigned long),
1409                 .mode           = 0644,
1410                 .proc_handler   = hugetlb_sysctl_handler,
1411         },
1412 #ifdef CONFIG_NUMA
1413         {
1414                 .procname       = "nr_hugepages_mempolicy",
1415                 .data           = NULL,
1416                 .maxlen         = sizeof(unsigned long),
1417                 .mode           = 0644,
1418                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1419         },
1420         {
1421                 .procname               = "numa_stat",
1422                 .data                   = &sysctl_vm_numa_stat,
1423                 .maxlen                 = sizeof(int),
1424                 .mode                   = 0644,
1425                 .proc_handler   = sysctl_vm_numa_stat_handler,
1426                 .extra1                 = &zero,
1427                 .extra2                 = &one,
1428         },
1429 #endif
1430          {
1431                 .procname       = "hugetlb_shm_group",
1432                 .data           = &sysctl_hugetlb_shm_group,
1433                 .maxlen         = sizeof(gid_t),
1434                 .mode           = 0644,
1435                 .proc_handler   = proc_dointvec,
1436          },
1437         {
1438                 .procname       = "nr_overcommit_hugepages",
1439                 .data           = NULL,
1440                 .maxlen         = sizeof(unsigned long),
1441                 .mode           = 0644,
1442                 .proc_handler   = hugetlb_overcommit_handler,
1443         },
1444 #endif
1445         {
1446                 .procname       = "lowmem_reserve_ratio",
1447                 .data           = &sysctl_lowmem_reserve_ratio,
1448                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1449                 .mode           = 0644,
1450                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1451         },
1452         {
1453                 .procname       = "drop_caches",
1454                 .data           = &sysctl_drop_caches,
1455                 .maxlen         = sizeof(int),
1456                 .mode           = 0644,
1457                 .proc_handler   = drop_caches_sysctl_handler,
1458                 .extra1         = &one,
1459                 .extra2         = &four,
1460         },
1461 #ifdef CONFIG_COMPACTION
1462         {
1463                 .procname       = "compact_memory",
1464                 .data           = &sysctl_compact_memory,
1465                 .maxlen         = sizeof(int),
1466                 .mode           = 0200,
1467                 .proc_handler   = sysctl_compaction_handler,
1468         },
1469         {
1470                 .procname       = "extfrag_threshold",
1471                 .data           = &sysctl_extfrag_threshold,
1472                 .maxlen         = sizeof(int),
1473                 .mode           = 0644,
1474                 .proc_handler   = sysctl_extfrag_handler,
1475                 .extra1         = &min_extfrag_threshold,
1476                 .extra2         = &max_extfrag_threshold,
1477         },
1478         {
1479                 .procname       = "compact_unevictable_allowed",
1480                 .data           = &sysctl_compact_unevictable_allowed,
1481                 .maxlen         = sizeof(int),
1482                 .mode           = 0644,
1483                 .proc_handler   = proc_dointvec,
1484                 .extra1         = &zero,
1485                 .extra2         = &one,
1486         },
1487
1488 #endif /* CONFIG_COMPACTION */
1489         {
1490                 .procname       = "min_free_kbytes",
1491                 .data           = &min_free_kbytes,
1492                 .maxlen         = sizeof(min_free_kbytes),
1493                 .mode           = 0644,
1494                 .proc_handler   = min_free_kbytes_sysctl_handler,
1495                 .extra1         = &zero,
1496         },
1497         {
1498                 .procname       = "watermark_boost_factor",
1499                 .data           = &watermark_boost_factor,
1500                 .maxlen         = sizeof(watermark_boost_factor),
1501                 .mode           = 0644,
1502                 .proc_handler   = watermark_boost_factor_sysctl_handler,
1503                 .extra1         = &zero,
1504         },
1505         {
1506                 .procname       = "watermark_scale_factor",
1507                 .data           = &watermark_scale_factor,
1508                 .maxlen         = sizeof(watermark_scale_factor),
1509                 .mode           = 0644,
1510                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1511                 .extra1         = &one,
1512                 .extra2         = &one_thousand,
1513         },
1514         {
1515                 .procname       = "percpu_pagelist_fraction",
1516                 .data           = &percpu_pagelist_fraction,
1517                 .maxlen         = sizeof(percpu_pagelist_fraction),
1518                 .mode           = 0644,
1519                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1520                 .extra1         = &zero,
1521         },
1522 #ifdef CONFIG_MMU
1523         {
1524                 .procname       = "max_map_count",
1525                 .data           = &sysctl_max_map_count,
1526                 .maxlen         = sizeof(sysctl_max_map_count),
1527                 .mode           = 0644,
1528                 .proc_handler   = proc_dointvec_minmax,
1529                 .extra1         = &zero,
1530         },
1531 #else
1532         {
1533                 .procname       = "nr_trim_pages",
1534                 .data           = &sysctl_nr_trim_pages,
1535                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1536                 .mode           = 0644,
1537                 .proc_handler   = proc_dointvec_minmax,
1538                 .extra1         = &zero,
1539         },
1540 #endif
1541         {
1542                 .procname       = "laptop_mode",
1543                 .data           = &laptop_mode,
1544                 .maxlen         = sizeof(laptop_mode),
1545                 .mode           = 0644,
1546                 .proc_handler   = proc_dointvec_jiffies,
1547         },
1548         {
1549                 .procname       = "block_dump",
1550                 .data           = &block_dump,
1551                 .maxlen         = sizeof(block_dump),
1552                 .mode           = 0644,
1553                 .proc_handler   = proc_dointvec,
1554                 .extra1         = &zero,
1555         },
1556         {
1557                 .procname       = "vfs_cache_pressure",
1558                 .data           = &sysctl_vfs_cache_pressure,
1559                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1560                 .mode           = 0644,
1561                 .proc_handler   = proc_dointvec,
1562                 .extra1         = &zero,
1563         },
1564 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1565         {
1566                 .procname       = "legacy_va_layout",
1567                 .data           = &sysctl_legacy_va_layout,
1568                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1569                 .mode           = 0644,
1570                 .proc_handler   = proc_dointvec,
1571                 .extra1         = &zero,
1572         },
1573 #endif
1574 #ifdef CONFIG_NUMA
1575         {
1576                 .procname       = "zone_reclaim_mode",
1577                 .data           = &node_reclaim_mode,
1578                 .maxlen         = sizeof(node_reclaim_mode),
1579                 .mode           = 0644,
1580                 .proc_handler   = proc_dointvec,
1581                 .extra1         = &zero,
1582         },
1583         {
1584                 .procname       = "min_unmapped_ratio",
1585                 .data           = &sysctl_min_unmapped_ratio,
1586                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1587                 .mode           = 0644,
1588                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1589                 .extra1         = &zero,
1590                 .extra2         = &one_hundred,
1591         },
1592         {
1593                 .procname       = "min_slab_ratio",
1594                 .data           = &sysctl_min_slab_ratio,
1595                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1596                 .mode           = 0644,
1597                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1598                 .extra1         = &zero,
1599                 .extra2         = &one_hundred,
1600         },
1601 #endif
1602 #ifdef CONFIG_SMP
1603         {
1604                 .procname       = "stat_interval",
1605                 .data           = &sysctl_stat_interval,
1606                 .maxlen         = sizeof(sysctl_stat_interval),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "stat_refresh",
1612                 .data           = NULL,
1613                 .maxlen         = 0,
1614                 .mode           = 0600,
1615                 .proc_handler   = vmstat_refresh,
1616         },
1617 #endif
1618 #ifdef CONFIG_MMU
1619         {
1620                 .procname       = "mmap_min_addr",
1621                 .data           = &dac_mmap_min_addr,
1622                 .maxlen         = sizeof(unsigned long),
1623                 .mode           = 0644,
1624                 .proc_handler   = mmap_min_addr_handler,
1625         },
1626 #endif
1627 #ifdef CONFIG_NUMA
1628         {
1629                 .procname       = "numa_zonelist_order",
1630                 .data           = &numa_zonelist_order,
1631                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1632                 .mode           = 0644,
1633                 .proc_handler   = numa_zonelist_order_handler,
1634         },
1635 #endif
1636 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1637    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1638         {
1639                 .procname       = "vdso_enabled",
1640 #ifdef CONFIG_X86_32
1641                 .data           = &vdso32_enabled,
1642                 .maxlen         = sizeof(vdso32_enabled),
1643 #else
1644                 .data           = &vdso_enabled,
1645                 .maxlen         = sizeof(vdso_enabled),
1646 #endif
1647                 .mode           = 0644,
1648                 .proc_handler   = proc_dointvec,
1649                 .extra1         = &zero,
1650         },
1651 #endif
1652 #ifdef CONFIG_HIGHMEM
1653         {
1654                 .procname       = "highmem_is_dirtyable",
1655                 .data           = &vm_highmem_is_dirtyable,
1656                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1657                 .mode           = 0644,
1658                 .proc_handler   = proc_dointvec_minmax,
1659                 .extra1         = &zero,
1660                 .extra2         = &one,
1661         },
1662 #endif
1663 #ifdef CONFIG_MEMORY_FAILURE
1664         {
1665                 .procname       = "memory_failure_early_kill",
1666                 .data           = &sysctl_memory_failure_early_kill,
1667                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1668                 .mode           = 0644,
1669                 .proc_handler   = proc_dointvec_minmax,
1670                 .extra1         = &zero,
1671                 .extra2         = &one,
1672         },
1673         {
1674                 .procname       = "memory_failure_recovery",
1675                 .data           = &sysctl_memory_failure_recovery,
1676                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1677                 .mode           = 0644,
1678                 .proc_handler   = proc_dointvec_minmax,
1679                 .extra1         = &zero,
1680                 .extra2         = &one,
1681         },
1682 #endif
1683         {
1684                 .procname       = "user_reserve_kbytes",
1685                 .data           = &sysctl_user_reserve_kbytes,
1686                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1687                 .mode           = 0644,
1688                 .proc_handler   = proc_doulongvec_minmax,
1689         },
1690         {
1691                 .procname       = "admin_reserve_kbytes",
1692                 .data           = &sysctl_admin_reserve_kbytes,
1693                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1694                 .mode           = 0644,
1695                 .proc_handler   = proc_doulongvec_minmax,
1696         },
1697 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1698         {
1699                 .procname       = "mmap_rnd_bits",
1700                 .data           = &mmap_rnd_bits,
1701                 .maxlen         = sizeof(mmap_rnd_bits),
1702                 .mode           = 0600,
1703                 .proc_handler   = proc_dointvec_minmax,
1704                 .extra1         = (void *)&mmap_rnd_bits_min,
1705                 .extra2         = (void *)&mmap_rnd_bits_max,
1706         },
1707 #endif
1708 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1709         {
1710                 .procname       = "mmap_rnd_compat_bits",
1711                 .data           = &mmap_rnd_compat_bits,
1712                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1713                 .mode           = 0600,
1714                 .proc_handler   = proc_dointvec_minmax,
1715                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1716                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1717         },
1718 #endif
1719         { }
1720 };
1721
1722 static struct ctl_table fs_table[] = {
1723         {
1724                 .procname       = "inode-nr",
1725                 .data           = &inodes_stat,
1726                 .maxlen         = 2*sizeof(long),
1727                 .mode           = 0444,
1728                 .proc_handler   = proc_nr_inodes,
1729         },
1730         {
1731                 .procname       = "inode-state",
1732                 .data           = &inodes_stat,
1733                 .maxlen         = 7*sizeof(long),
1734                 .mode           = 0444,
1735                 .proc_handler   = proc_nr_inodes,
1736         },
1737         {
1738                 .procname       = "file-nr",
1739                 .data           = &files_stat,
1740                 .maxlen         = sizeof(files_stat),
1741                 .mode           = 0444,
1742                 .proc_handler   = proc_nr_files,
1743         },
1744         {
1745                 .procname       = "file-max",
1746                 .data           = &files_stat.max_files,
1747                 .maxlen         = sizeof(files_stat.max_files),
1748                 .mode           = 0644,
1749                 .proc_handler   = proc_doulongvec_minmax,
1750         },
1751         {
1752                 .procname       = "nr_open",
1753                 .data           = &sysctl_nr_open,
1754                 .maxlen         = sizeof(unsigned int),
1755                 .mode           = 0644,
1756                 .proc_handler   = proc_dointvec_minmax,
1757                 .extra1         = &sysctl_nr_open_min,
1758                 .extra2         = &sysctl_nr_open_max,
1759         },
1760         {
1761                 .procname       = "dentry-state",
1762                 .data           = &dentry_stat,
1763                 .maxlen         = 6*sizeof(long),
1764                 .mode           = 0444,
1765                 .proc_handler   = proc_nr_dentry,
1766         },
1767         {
1768                 .procname       = "overflowuid",
1769                 .data           = &fs_overflowuid,
1770                 .maxlen         = sizeof(int),
1771                 .mode           = 0644,
1772                 .proc_handler   = proc_dointvec_minmax,
1773                 .extra1         = &minolduid,
1774                 .extra2         = &maxolduid,
1775         },
1776         {
1777                 .procname       = "overflowgid",
1778                 .data           = &fs_overflowgid,
1779                 .maxlen         = sizeof(int),
1780                 .mode           = 0644,
1781                 .proc_handler   = proc_dointvec_minmax,
1782                 .extra1         = &minolduid,
1783                 .extra2         = &maxolduid,
1784         },
1785 #ifdef CONFIG_FILE_LOCKING
1786         {
1787                 .procname       = "leases-enable",
1788                 .data           = &leases_enable,
1789                 .maxlen         = sizeof(int),
1790                 .mode           = 0644,
1791                 .proc_handler   = proc_dointvec,
1792         },
1793 #endif
1794 #ifdef CONFIG_DNOTIFY
1795         {
1796                 .procname       = "dir-notify-enable",
1797                 .data           = &dir_notify_enable,
1798                 .maxlen         = sizeof(int),
1799                 .mode           = 0644,
1800                 .proc_handler   = proc_dointvec,
1801         },
1802 #endif
1803 #ifdef CONFIG_MMU
1804 #ifdef CONFIG_FILE_LOCKING
1805         {
1806                 .procname       = "lease-break-time",
1807                 .data           = &lease_break_time,
1808                 .maxlen         = sizeof(int),
1809                 .mode           = 0644,
1810                 .proc_handler   = proc_dointvec,
1811         },
1812 #endif
1813 #ifdef CONFIG_AIO
1814         {
1815                 .procname       = "aio-nr",
1816                 .data           = &aio_nr,
1817                 .maxlen         = sizeof(aio_nr),
1818                 .mode           = 0444,
1819                 .proc_handler   = proc_doulongvec_minmax,
1820         },
1821         {
1822                 .procname       = "aio-max-nr",
1823                 .data           = &aio_max_nr,
1824                 .maxlen         = sizeof(aio_max_nr),
1825                 .mode           = 0644,
1826                 .proc_handler   = proc_doulongvec_minmax,
1827         },
1828 #endif /* CONFIG_AIO */
1829 #ifdef CONFIG_INOTIFY_USER
1830         {
1831                 .procname       = "inotify",
1832                 .mode           = 0555,
1833                 .child          = inotify_table,
1834         },
1835 #endif  
1836 #ifdef CONFIG_EPOLL
1837         {
1838                 .procname       = "epoll",
1839                 .mode           = 0555,
1840                 .child          = epoll_table,
1841         },
1842 #endif
1843 #endif
1844         {
1845                 .procname       = "protected_symlinks",
1846                 .data           = &sysctl_protected_symlinks,
1847                 .maxlen         = sizeof(int),
1848                 .mode           = 0600,
1849                 .proc_handler   = proc_dointvec_minmax,
1850                 .extra1         = &zero,
1851                 .extra2         = &one,
1852         },
1853         {
1854                 .procname       = "protected_hardlinks",
1855                 .data           = &sysctl_protected_hardlinks,
1856                 .maxlen         = sizeof(int),
1857                 .mode           = 0600,
1858                 .proc_handler   = proc_dointvec_minmax,
1859                 .extra1         = &zero,
1860                 .extra2         = &one,
1861         },
1862         {
1863                 .procname       = "protected_fifos",
1864                 .data           = &sysctl_protected_fifos,
1865                 .maxlen         = sizeof(int),
1866                 .mode           = 0600,
1867                 .proc_handler   = proc_dointvec_minmax,
1868                 .extra1         = &zero,
1869                 .extra2         = &two,
1870         },
1871         {
1872                 .procname       = "protected_regular",
1873                 .data           = &sysctl_protected_regular,
1874                 .maxlen         = sizeof(int),
1875                 .mode           = 0600,
1876                 .proc_handler   = proc_dointvec_minmax,
1877                 .extra1         = &zero,
1878                 .extra2         = &two,
1879         },
1880         {
1881                 .procname       = "suid_dumpable",
1882                 .data           = &suid_dumpable,
1883                 .maxlen         = sizeof(int),
1884                 .mode           = 0644,
1885                 .proc_handler   = proc_dointvec_minmax_coredump,
1886                 .extra1         = &zero,
1887                 .extra2         = &two,
1888         },
1889 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1890         {
1891                 .procname       = "binfmt_misc",
1892                 .mode           = 0555,
1893                 .child          = sysctl_mount_point,
1894         },
1895 #endif
1896         {
1897                 .procname       = "pipe-max-size",
1898                 .data           = &pipe_max_size,
1899                 .maxlen         = sizeof(pipe_max_size),
1900                 .mode           = 0644,
1901                 .proc_handler   = proc_dopipe_max_size,
1902         },
1903         {
1904                 .procname       = "pipe-user-pages-hard",
1905                 .data           = &pipe_user_pages_hard,
1906                 .maxlen         = sizeof(pipe_user_pages_hard),
1907                 .mode           = 0644,
1908                 .proc_handler   = proc_doulongvec_minmax,
1909         },
1910         {
1911                 .procname       = "pipe-user-pages-soft",
1912                 .data           = &pipe_user_pages_soft,
1913                 .maxlen         = sizeof(pipe_user_pages_soft),
1914                 .mode           = 0644,
1915                 .proc_handler   = proc_doulongvec_minmax,
1916         },
1917         {
1918                 .procname       = "mount-max",
1919                 .data           = &sysctl_mount_max,
1920                 .maxlen         = sizeof(unsigned int),
1921                 .mode           = 0644,
1922                 .proc_handler   = proc_dointvec_minmax,
1923                 .extra1         = &one,
1924         },
1925         { }
1926 };
1927
1928 static struct ctl_table debug_table[] = {
1929 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1930         {
1931                 .procname       = "exception-trace",
1932                 .data           = &show_unhandled_signals,
1933                 .maxlen         = sizeof(int),
1934                 .mode           = 0644,
1935                 .proc_handler   = proc_dointvec
1936         },
1937 #endif
1938 #if defined(CONFIG_OPTPROBES)
1939         {
1940                 .procname       = "kprobes-optimization",
1941                 .data           = &sysctl_kprobes_optimization,
1942                 .maxlen         = sizeof(int),
1943                 .mode           = 0644,
1944                 .proc_handler   = proc_kprobes_optimization_handler,
1945                 .extra1         = &zero,
1946                 .extra2         = &one,
1947         },
1948 #endif
1949         { }
1950 };
1951
1952 static struct ctl_table dev_table[] = {
1953         { }
1954 };
1955
1956 int __init sysctl_init(void)
1957 {
1958         struct ctl_table_header *hdr;
1959
1960         hdr = register_sysctl_table(sysctl_base_table);
1961         kmemleak_not_leak(hdr);
1962         return 0;
1963 }
1964
1965 #endif /* CONFIG_SYSCTL */
1966
1967 /*
1968  * /proc/sys support
1969  */
1970
1971 #ifdef CONFIG_PROC_SYSCTL
1972
1973 static int _proc_do_string(char *data, int maxlen, int write,
1974                            char __user *buffer,
1975                            size_t *lenp, loff_t *ppos)
1976 {
1977         size_t len;
1978         char __user *p;
1979         char c;
1980
1981         if (!data || !maxlen || !*lenp) {
1982                 *lenp = 0;
1983                 return 0;
1984         }
1985
1986         if (write) {
1987                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1988                         /* Only continue writes not past the end of buffer. */
1989                         len = strlen(data);
1990                         if (len > maxlen - 1)
1991                                 len = maxlen - 1;
1992
1993                         if (*ppos > len)
1994                                 return 0;
1995                         len = *ppos;
1996                 } else {
1997                         /* Start writing from beginning of buffer. */
1998                         len = 0;
1999                 }
2000
2001                 *ppos += *lenp;
2002                 p = buffer;
2003                 while ((p - buffer) < *lenp && len < maxlen - 1) {
2004                         if (get_user(c, p++))
2005                                 return -EFAULT;
2006                         if (c == 0 || c == '\n')
2007                                 break;
2008                         data[len++] = c;
2009                 }
2010                 data[len] = 0;
2011         } else {
2012                 len = strlen(data);
2013                 if (len > maxlen)
2014                         len = maxlen;
2015
2016                 if (*ppos > len) {
2017                         *lenp = 0;
2018                         return 0;
2019                 }
2020
2021                 data += *ppos;
2022                 len  -= *ppos;
2023
2024                 if (len > *lenp)
2025                         len = *lenp;
2026                 if (len)
2027                         if (copy_to_user(buffer, data, len))
2028                                 return -EFAULT;
2029                 if (len < *lenp) {
2030                         if (put_user('\n', buffer + len))
2031                                 return -EFAULT;
2032                         len++;
2033                 }
2034                 *lenp = len;
2035                 *ppos += len;
2036         }
2037         return 0;
2038 }
2039
2040 static void warn_sysctl_write(struct ctl_table *table)
2041 {
2042         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2043                 "This will not be supported in the future. To silence this\n"
2044                 "warning, set kernel.sysctl_writes_strict = -1\n",
2045                 current->comm, table->procname);
2046 }
2047
2048 /**
2049  * proc_first_pos_non_zero_ignore - check if first position is allowed
2050  * @ppos: file position
2051  * @table: the sysctl table
2052  *
2053  * Returns true if the first position is non-zero and the sysctl_writes_strict
2054  * mode indicates this is not allowed for numeric input types. String proc
2055  * handlers can ignore the return value.
2056  */
2057 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2058                                            struct ctl_table *table)
2059 {
2060         if (!*ppos)
2061                 return false;
2062
2063         switch (sysctl_writes_strict) {
2064         case SYSCTL_WRITES_STRICT:
2065                 return true;
2066         case SYSCTL_WRITES_WARN:
2067                 warn_sysctl_write(table);
2068                 return false;
2069         default:
2070                 return false;
2071         }
2072 }
2073
2074 /**
2075  * proc_dostring - read a string sysctl
2076  * @table: the sysctl table
2077  * @write: %TRUE if this is a write to the sysctl file
2078  * @buffer: the user buffer
2079  * @lenp: the size of the user buffer
2080  * @ppos: file position
2081  *
2082  * Reads/writes a string from/to the user buffer. If the kernel
2083  * buffer provided is not large enough to hold the string, the
2084  * string is truncated. The copied string is %NULL-terminated.
2085  * If the string is being read by the user process, it is copied
2086  * and a newline '\n' is added. It is truncated if the buffer is
2087  * not large enough.
2088  *
2089  * Returns 0 on success.
2090  */
2091 int proc_dostring(struct ctl_table *table, int write,
2092                   void __user *buffer, size_t *lenp, loff_t *ppos)
2093 {
2094         if (write)
2095                 proc_first_pos_non_zero_ignore(ppos, table);
2096
2097         return _proc_do_string((char *)(table->data), table->maxlen, write,
2098                                (char __user *)buffer, lenp, ppos);
2099 }
2100
2101 static size_t proc_skip_spaces(char **buf)
2102 {
2103         size_t ret;
2104         char *tmp = skip_spaces(*buf);
2105         ret = tmp - *buf;
2106         *buf = tmp;
2107         return ret;
2108 }
2109
2110 static void proc_skip_char(char **buf, size_t *size, const char v)
2111 {
2112         while (*size) {
2113                 if (**buf != v)
2114                         break;
2115                 (*size)--;
2116                 (*buf)++;
2117         }
2118 }
2119
2120 #define TMPBUFLEN 22
2121 /**
2122  * proc_get_long - reads an ASCII formatted integer from a user buffer
2123  *
2124  * @buf: a kernel buffer
2125  * @size: size of the kernel buffer
2126  * @val: this is where the number will be stored
2127  * @neg: set to %TRUE if number is negative
2128  * @perm_tr: a vector which contains the allowed trailers
2129  * @perm_tr_len: size of the perm_tr vector
2130  * @tr: pointer to store the trailer character
2131  *
2132  * In case of success %0 is returned and @buf and @size are updated with
2133  * the amount of bytes read. If @tr is non-NULL and a trailing
2134  * character exists (size is non-zero after returning from this
2135  * function), @tr is updated with the trailing character.
2136  */
2137 static int proc_get_long(char **buf, size_t *size,
2138                           unsigned long *val, bool *neg,
2139                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2140 {
2141         int len;
2142         char *p, tmp[TMPBUFLEN];
2143
2144         if (!*size)
2145                 return -EINVAL;
2146
2147         len = *size;
2148         if (len > TMPBUFLEN - 1)
2149                 len = TMPBUFLEN - 1;
2150
2151         memcpy(tmp, *buf, len);
2152
2153         tmp[len] = 0;
2154         p = tmp;
2155         if (*p == '-' && *size > 1) {
2156                 *neg = true;
2157                 p++;
2158         } else
2159                 *neg = false;
2160         if (!isdigit(*p))
2161                 return -EINVAL;
2162
2163         *val = simple_strtoul(p, &p, 0);
2164
2165         len = p - tmp;
2166
2167         /* We don't know if the next char is whitespace thus we may accept
2168          * invalid integers (e.g. 1234...a) or two integers instead of one
2169          * (e.g. 123...1). So lets not allow such large numbers. */
2170         if (len == TMPBUFLEN - 1)
2171                 return -EINVAL;
2172
2173         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2174                 return -EINVAL;
2175
2176         if (tr && (len < *size))
2177                 *tr = *p;
2178
2179         *buf += len;
2180         *size -= len;
2181
2182         return 0;
2183 }
2184
2185 /**
2186  * proc_put_long - converts an integer to a decimal ASCII formatted string
2187  *
2188  * @buf: the user buffer
2189  * @size: the size of the user buffer
2190  * @val: the integer to be converted
2191  * @neg: sign of the number, %TRUE for negative
2192  *
2193  * In case of success %0 is returned and @buf and @size are updated with
2194  * the amount of bytes written.
2195  */
2196 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2197                           bool neg)
2198 {
2199         int len;
2200         char tmp[TMPBUFLEN], *p = tmp;
2201
2202         sprintf(p, "%s%lu", neg ? "-" : "", val);
2203         len = strlen(tmp);
2204         if (len > *size)
2205                 len = *size;
2206         if (copy_to_user(*buf, tmp, len))
2207                 return -EFAULT;
2208         *size -= len;
2209         *buf += len;
2210         return 0;
2211 }
2212 #undef TMPBUFLEN
2213
2214 static int proc_put_char(void __user **buf, size_t *size, char c)
2215 {
2216         if (*size) {
2217                 char __user **buffer = (char __user **)buf;
2218                 if (put_user(c, *buffer))
2219                         return -EFAULT;
2220                 (*size)--, (*buffer)++;
2221                 *buf = *buffer;
2222         }
2223         return 0;
2224 }
2225
2226 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2227                                  int *valp,
2228                                  int write, void *data)
2229 {
2230         if (write) {
2231                 if (*negp) {
2232                         if (*lvalp > (unsigned long) INT_MAX + 1)
2233                                 return -EINVAL;
2234                         *valp = -*lvalp;
2235                 } else {
2236                         if (*lvalp > (unsigned long) INT_MAX)
2237                                 return -EINVAL;
2238                         *valp = *lvalp;
2239                 }
2240         } else {
2241                 int val = *valp;
2242                 if (val < 0) {
2243                         *negp = true;
2244                         *lvalp = -(unsigned long)val;
2245                 } else {
2246                         *negp = false;
2247                         *lvalp = (unsigned long)val;
2248                 }
2249         }
2250         return 0;
2251 }
2252
2253 static int do_proc_douintvec_conv(unsigned long *lvalp,
2254                                   unsigned int *valp,
2255                                   int write, void *data)
2256 {
2257         if (write) {
2258                 if (*lvalp > UINT_MAX)
2259                         return -EINVAL;
2260                 *valp = *lvalp;
2261         } else {
2262                 unsigned int val = *valp;
2263                 *lvalp = (unsigned long)val;
2264         }
2265         return 0;
2266 }
2267
2268 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2269
2270 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2271                   int write, void __user *buffer,
2272                   size_t *lenp, loff_t *ppos,
2273                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2274                               int write, void *data),
2275                   void *data)
2276 {
2277         int *i, vleft, first = 1, err = 0;
2278         size_t left;
2279         char *kbuf = NULL, *p;
2280         
2281         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2282                 *lenp = 0;
2283                 return 0;
2284         }
2285         
2286         i = (int *) tbl_data;
2287         vleft = table->maxlen / sizeof(*i);
2288         left = *lenp;
2289
2290         if (!conv)
2291                 conv = do_proc_dointvec_conv;
2292
2293         if (write) {
2294                 if (proc_first_pos_non_zero_ignore(ppos, table))
2295                         goto out;
2296
2297                 if (left > PAGE_SIZE - 1)
2298                         left = PAGE_SIZE - 1;
2299                 p = kbuf = memdup_user_nul(buffer, left);
2300                 if (IS_ERR(kbuf))
2301                         return PTR_ERR(kbuf);
2302         }
2303
2304         for (; left && vleft--; i++, first=0) {
2305                 unsigned long lval;
2306                 bool neg;
2307
2308                 if (write) {
2309                         left -= proc_skip_spaces(&p);
2310
2311                         if (!left)
2312                                 break;
2313                         err = proc_get_long(&p, &left, &lval, &neg,
2314                                              proc_wspace_sep,
2315                                              sizeof(proc_wspace_sep), NULL);
2316                         if (err)
2317                                 break;
2318                         if (conv(&neg, &lval, i, 1, data)) {
2319                                 err = -EINVAL;
2320                                 break;
2321                         }
2322                 } else {
2323                         if (conv(&neg, &lval, i, 0, data)) {
2324                                 err = -EINVAL;
2325                                 break;
2326                         }
2327                         if (!first)
2328                                 err = proc_put_char(&buffer, &left, '\t');
2329                         if (err)
2330                                 break;
2331                         err = proc_put_long(&buffer, &left, lval, neg);
2332                         if (err)
2333                                 break;
2334                 }
2335         }
2336
2337         if (!write && !first && left && !err)
2338                 err = proc_put_char(&buffer, &left, '\n');
2339         if (write && !err && left)
2340                 left -= proc_skip_spaces(&p);
2341         if (write) {
2342                 kfree(kbuf);
2343                 if (first)
2344                         return err ? : -EINVAL;
2345         }
2346         *lenp -= left;
2347 out:
2348         *ppos += *lenp;
2349         return err;
2350 }
2351
2352 static int do_proc_dointvec(struct ctl_table *table, int write,
2353                   void __user *buffer, size_t *lenp, loff_t *ppos,
2354                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2355                               int write, void *data),
2356                   void *data)
2357 {
2358         return __do_proc_dointvec(table->data, table, write,
2359                         buffer, lenp, ppos, conv, data);
2360 }
2361
2362 static int do_proc_douintvec_w(unsigned int *tbl_data,
2363                                struct ctl_table *table,
2364                                void __user *buffer,
2365                                size_t *lenp, loff_t *ppos,
2366                                int (*conv)(unsigned long *lvalp,
2367                                            unsigned int *valp,
2368                                            int write, void *data),
2369                                void *data)
2370 {
2371         unsigned long lval;
2372         int err = 0;
2373         size_t left;
2374         bool neg;
2375         char *kbuf = NULL, *p;
2376
2377         left = *lenp;
2378
2379         if (proc_first_pos_non_zero_ignore(ppos, table))
2380                 goto bail_early;
2381
2382         if (left > PAGE_SIZE - 1)
2383                 left = PAGE_SIZE - 1;
2384
2385         p = kbuf = memdup_user_nul(buffer, left);
2386         if (IS_ERR(kbuf))
2387                 return -EINVAL;
2388
2389         left -= proc_skip_spaces(&p);
2390         if (!left) {
2391                 err = -EINVAL;
2392                 goto out_free;
2393         }
2394
2395         err = proc_get_long(&p, &left, &lval, &neg,
2396                              proc_wspace_sep,
2397                              sizeof(proc_wspace_sep), NULL);
2398         if (err || neg) {
2399                 err = -EINVAL;
2400                 goto out_free;
2401         }
2402
2403         if (conv(&lval, tbl_data, 1, data)) {
2404                 err = -EINVAL;
2405                 goto out_free;
2406         }
2407
2408         if (!err && left)
2409                 left -= proc_skip_spaces(&p);
2410
2411 out_free:
2412         kfree(kbuf);
2413         if (err)
2414                 return -EINVAL;
2415
2416         return 0;
2417
2418         /* This is in keeping with old __do_proc_dointvec() */
2419 bail_early:
2420         *ppos += *lenp;
2421         return err;
2422 }
2423
2424 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2425                                size_t *lenp, loff_t *ppos,
2426                                int (*conv)(unsigned long *lvalp,
2427                                            unsigned int *valp,
2428                                            int write, void *data),
2429                                void *data)
2430 {
2431         unsigned long lval;
2432         int err = 0;
2433         size_t left;
2434
2435         left = *lenp;
2436
2437         if (conv(&lval, tbl_data, 0, data)) {
2438                 err = -EINVAL;
2439                 goto out;
2440         }
2441
2442         err = proc_put_long(&buffer, &left, lval, false);
2443         if (err || !left)
2444                 goto out;
2445
2446         err = proc_put_char(&buffer, &left, '\n');
2447
2448 out:
2449         *lenp -= left;
2450         *ppos += *lenp;
2451
2452         return err;
2453 }
2454
2455 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2456                                int write, void __user *buffer,
2457                                size_t *lenp, loff_t *ppos,
2458                                int (*conv)(unsigned long *lvalp,
2459                                            unsigned int *valp,
2460                                            int write, void *data),
2461                                void *data)
2462 {
2463         unsigned int *i, vleft;
2464
2465         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2466                 *lenp = 0;
2467                 return 0;
2468         }
2469
2470         i = (unsigned int *) tbl_data;
2471         vleft = table->maxlen / sizeof(*i);
2472
2473         /*
2474          * Arrays are not supported, keep this simple. *Do not* add
2475          * support for them.
2476          */
2477         if (vleft != 1) {
2478                 *lenp = 0;
2479                 return -EINVAL;
2480         }
2481
2482         if (!conv)
2483                 conv = do_proc_douintvec_conv;
2484
2485         if (write)
2486                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2487                                            conv, data);
2488         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2489 }
2490
2491 static int do_proc_douintvec(struct ctl_table *table, int write,
2492                              void __user *buffer, size_t *lenp, loff_t *ppos,
2493                              int (*conv)(unsigned long *lvalp,
2494                                          unsigned int *valp,
2495                                          int write, void *data),
2496                              void *data)
2497 {
2498         return __do_proc_douintvec(table->data, table, write,
2499                                    buffer, lenp, ppos, conv, data);
2500 }
2501
2502 /**
2503  * proc_dointvec - read a vector of integers
2504  * @table: the sysctl table
2505  * @write: %TRUE if this is a write to the sysctl file
2506  * @buffer: the user buffer
2507  * @lenp: the size of the user buffer
2508  * @ppos: file position
2509  *
2510  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2511  * values from/to the user buffer, treated as an ASCII string. 
2512  *
2513  * Returns 0 on success.
2514  */
2515 int proc_dointvec(struct ctl_table *table, int write,
2516                      void __user *buffer, size_t *lenp, loff_t *ppos)
2517 {
2518         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2519 }
2520
2521 /**
2522  * proc_douintvec - read a vector of unsigned integers
2523  * @table: the sysctl table
2524  * @write: %TRUE if this is a write to the sysctl file
2525  * @buffer: the user buffer
2526  * @lenp: the size of the user buffer
2527  * @ppos: file position
2528  *
2529  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2530  * values from/to the user buffer, treated as an ASCII string.
2531  *
2532  * Returns 0 on success.
2533  */
2534 int proc_douintvec(struct ctl_table *table, int write,
2535                      void __user *buffer, size_t *lenp, loff_t *ppos)
2536 {
2537         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2538                                  do_proc_douintvec_conv, NULL);
2539 }
2540
2541 /*
2542  * Taint values can only be increased
2543  * This means we can safely use a temporary.
2544  */
2545 static int proc_taint(struct ctl_table *table, int write,
2546                                void __user *buffer, size_t *lenp, loff_t *ppos)
2547 {
2548         struct ctl_table t;
2549         unsigned long tmptaint = get_taint();
2550         int err;
2551
2552         if (write && !capable(CAP_SYS_ADMIN))
2553                 return -EPERM;
2554
2555         t = *table;
2556         t.data = &tmptaint;
2557         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2558         if (err < 0)
2559                 return err;
2560
2561         if (write) {
2562                 /*
2563                  * Poor man's atomic or. Not worth adding a primitive
2564                  * to everyone's atomic.h for this
2565                  */
2566                 int i;
2567                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2568                         if ((tmptaint >> i) & 1)
2569                                 add_taint(i, LOCKDEP_STILL_OK);
2570                 }
2571         }
2572
2573         return err;
2574 }
2575
2576 #ifdef CONFIG_PRINTK
2577 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2578                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2579 {
2580         if (write && !capable(CAP_SYS_ADMIN))
2581                 return -EPERM;
2582
2583         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2584 }
2585 #endif
2586
2587 /**
2588  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2589  * @min: pointer to minimum allowable value
2590  * @max: pointer to maximum allowable value
2591  *
2592  * The do_proc_dointvec_minmax_conv_param structure provides the
2593  * minimum and maximum values for doing range checking for those sysctl
2594  * parameters that use the proc_dointvec_minmax() handler.
2595  */
2596 struct do_proc_dointvec_minmax_conv_param {
2597         int *min;
2598         int *max;
2599 };
2600
2601 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2602                                         int *valp,
2603                                         int write, void *data)
2604 {
2605         struct do_proc_dointvec_minmax_conv_param *param = data;
2606         if (write) {
2607                 int val = *negp ? -*lvalp : *lvalp;
2608                 if ((param->min && *param->min > val) ||
2609                     (param->max && *param->max < val))
2610                         return -EINVAL;
2611                 *valp = val;
2612         } else {
2613                 int val = *valp;
2614                 if (val < 0) {
2615                         *negp = true;
2616                         *lvalp = -(unsigned long)val;
2617                 } else {
2618                         *negp = false;
2619                         *lvalp = (unsigned long)val;
2620                 }
2621         }
2622         return 0;
2623 }
2624
2625 /**
2626  * proc_dointvec_minmax - read a vector of integers with min/max values
2627  * @table: the sysctl table
2628  * @write: %TRUE if this is a write to the sysctl file
2629  * @buffer: the user buffer
2630  * @lenp: the size of the user buffer
2631  * @ppos: file position
2632  *
2633  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2634  * values from/to the user buffer, treated as an ASCII string.
2635  *
2636  * This routine will ensure the values are within the range specified by
2637  * table->extra1 (min) and table->extra2 (max).
2638  *
2639  * Returns 0 on success or -EINVAL on write when the range check fails.
2640  */
2641 int proc_dointvec_minmax(struct ctl_table *table, int write,
2642                   void __user *buffer, size_t *lenp, loff_t *ppos)
2643 {
2644         struct do_proc_dointvec_minmax_conv_param param = {
2645                 .min = (int *) table->extra1,
2646                 .max = (int *) table->extra2,
2647         };
2648         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2649                                 do_proc_dointvec_minmax_conv, &param);
2650 }
2651
2652 /**
2653  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2654  * @min: pointer to minimum allowable value
2655  * @max: pointer to maximum allowable value
2656  *
2657  * The do_proc_douintvec_minmax_conv_param structure provides the
2658  * minimum and maximum values for doing range checking for those sysctl
2659  * parameters that use the proc_douintvec_minmax() handler.
2660  */
2661 struct do_proc_douintvec_minmax_conv_param {
2662         unsigned int *min;
2663         unsigned int *max;
2664 };
2665
2666 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2667                                          unsigned int *valp,
2668                                          int write, void *data)
2669 {
2670         struct do_proc_douintvec_minmax_conv_param *param = data;
2671
2672         if (write) {
2673                 unsigned int val = *lvalp;
2674
2675                 if (*lvalp > UINT_MAX)
2676                         return -EINVAL;
2677
2678                 if ((param->min && *param->min > val) ||
2679                     (param->max && *param->max < val))
2680                         return -ERANGE;
2681
2682                 *valp = val;
2683         } else {
2684                 unsigned int val = *valp;
2685                 *lvalp = (unsigned long) val;
2686         }
2687
2688         return 0;
2689 }
2690
2691 /**
2692  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2693  * @table: the sysctl table
2694  * @write: %TRUE if this is a write to the sysctl file
2695  * @buffer: the user buffer
2696  * @lenp: the size of the user buffer
2697  * @ppos: file position
2698  *
2699  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2700  * values from/to the user buffer, treated as an ASCII string. Negative
2701  * strings are not allowed.
2702  *
2703  * This routine will ensure the values are within the range specified by
2704  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2705  * check for UINT_MAX to avoid having to support wrap around uses from
2706  * userspace.
2707  *
2708  * Returns 0 on success or -ERANGE on write when the range check fails.
2709  */
2710 int proc_douintvec_minmax(struct ctl_table *table, int write,
2711                           void __user *buffer, size_t *lenp, loff_t *ppos)
2712 {
2713         struct do_proc_douintvec_minmax_conv_param param = {
2714                 .min = (unsigned int *) table->extra1,
2715                 .max = (unsigned int *) table->extra2,
2716         };
2717         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2718                                  do_proc_douintvec_minmax_conv, &param);
2719 }
2720
2721 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2722                                         unsigned int *valp,
2723                                         int write, void *data)
2724 {
2725         if (write) {
2726                 unsigned int val;
2727
2728                 val = round_pipe_size(*lvalp);
2729                 if (val == 0)
2730                         return -EINVAL;
2731
2732                 *valp = val;
2733         } else {
2734                 unsigned int val = *valp;
2735                 *lvalp = (unsigned long) val;
2736         }
2737
2738         return 0;
2739 }
2740
2741 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2742                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2743 {
2744         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2745                                  do_proc_dopipe_max_size_conv, NULL);
2746 }
2747
2748 static void validate_coredump_safety(void)
2749 {
2750 #ifdef CONFIG_COREDUMP
2751         if (suid_dumpable == SUID_DUMP_ROOT &&
2752             core_pattern[0] != '/' && core_pattern[0] != '|') {
2753                 printk(KERN_WARNING
2754 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2755 "Pipe handler or fully qualified core dump path required.\n"
2756 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2757                 );
2758         }
2759 #endif
2760 }
2761
2762 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2763                 void __user *buffer, size_t *lenp, loff_t *ppos)
2764 {
2765         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2766         if (!error)
2767                 validate_coredump_safety();
2768         return error;
2769 }
2770
2771 #ifdef CONFIG_COREDUMP
2772 static int proc_dostring_coredump(struct ctl_table *table, int write,
2773                   void __user *buffer, size_t *lenp, loff_t *ppos)
2774 {
2775         int error = proc_dostring(table, write, buffer, lenp, ppos);
2776         if (!error)
2777                 validate_coredump_safety();
2778         return error;
2779 }
2780 #endif
2781
2782 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2783                                      void __user *buffer,
2784                                      size_t *lenp, loff_t *ppos,
2785                                      unsigned long convmul,
2786                                      unsigned long convdiv)
2787 {
2788         unsigned long *i, *min, *max;
2789         int vleft, first = 1, err = 0;
2790         size_t left;
2791         char *kbuf = NULL, *p;
2792
2793         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2794                 *lenp = 0;
2795                 return 0;
2796         }
2797
2798         i = (unsigned long *) data;
2799         min = (unsigned long *) table->extra1;
2800         max = (unsigned long *) table->extra2;
2801         vleft = table->maxlen / sizeof(unsigned long);
2802         left = *lenp;
2803
2804         if (write) {
2805                 if (proc_first_pos_non_zero_ignore(ppos, table))
2806                         goto out;
2807
2808                 if (left > PAGE_SIZE - 1)
2809                         left = PAGE_SIZE - 1;
2810                 p = kbuf = memdup_user_nul(buffer, left);
2811                 if (IS_ERR(kbuf))
2812                         return PTR_ERR(kbuf);
2813         }
2814
2815         for (; left && vleft--; i++, first = 0) {
2816                 unsigned long val;
2817
2818                 if (write) {
2819                         bool neg;
2820
2821                         left -= proc_skip_spaces(&p);
2822                         if (!left)
2823                                 break;
2824
2825                         err = proc_get_long(&p, &left, &val, &neg,
2826                                              proc_wspace_sep,
2827                                              sizeof(proc_wspace_sep), NULL);
2828                         if (err)
2829                                 break;
2830                         if (neg)
2831                                 continue;
2832                         val = convmul * val / convdiv;
2833                         if ((min && val < *min) || (max && val > *max))
2834                                 continue;
2835                         *i = val;
2836                 } else {
2837                         val = convdiv * (*i) / convmul;
2838                         if (!first) {
2839                                 err = proc_put_char(&buffer, &left, '\t');
2840                                 if (err)
2841                                         break;
2842                         }
2843                         err = proc_put_long(&buffer, &left, val, false);
2844                         if (err)
2845                                 break;
2846                 }
2847         }
2848
2849         if (!write && !first && left && !err)
2850                 err = proc_put_char(&buffer, &left, '\n');
2851         if (write && !err)
2852                 left -= proc_skip_spaces(&p);
2853         if (write) {
2854                 kfree(kbuf);
2855                 if (first)
2856                         return err ? : -EINVAL;
2857         }
2858         *lenp -= left;
2859 out:
2860         *ppos += *lenp;
2861         return err;
2862 }
2863
2864 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2865                                      void __user *buffer,
2866                                      size_t *lenp, loff_t *ppos,
2867                                      unsigned long convmul,
2868                                      unsigned long convdiv)
2869 {
2870         return __do_proc_doulongvec_minmax(table->data, table, write,
2871                         buffer, lenp, ppos, convmul, convdiv);
2872 }
2873
2874 /**
2875  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2876  * @table: the sysctl table
2877  * @write: %TRUE if this is a write to the sysctl file
2878  * @buffer: the user buffer
2879  * @lenp: the size of the user buffer
2880  * @ppos: file position
2881  *
2882  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2883  * values from/to the user buffer, treated as an ASCII string.
2884  *
2885  * This routine will ensure the values are within the range specified by
2886  * table->extra1 (min) and table->extra2 (max).
2887  *
2888  * Returns 0 on success.
2889  */
2890 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2891                            void __user *buffer, size_t *lenp, loff_t *ppos)
2892 {
2893     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2894 }
2895
2896 /**
2897  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2898  * @table: the sysctl table
2899  * @write: %TRUE if this is a write to the sysctl file
2900  * @buffer: the user buffer
2901  * @lenp: the size of the user buffer
2902  * @ppos: file position
2903  *
2904  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2905  * values from/to the user buffer, treated as an ASCII string. The values
2906  * are treated as milliseconds, and converted to jiffies when they are stored.
2907  *
2908  * This routine will ensure the values are within the range specified by
2909  * table->extra1 (min) and table->extra2 (max).
2910  *
2911  * Returns 0 on success.
2912  */
2913 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2914                                       void __user *buffer,
2915                                       size_t *lenp, loff_t *ppos)
2916 {
2917     return do_proc_doulongvec_minmax(table, write, buffer,
2918                                      lenp, ppos, HZ, 1000l);
2919 }
2920
2921
2922 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2923                                          int *valp,
2924                                          int write, void *data)
2925 {
2926         if (write) {
2927                 if (*lvalp > INT_MAX / HZ)
2928                         return 1;
2929                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2930         } else {
2931                 int val = *valp;
2932                 unsigned long lval;
2933                 if (val < 0) {
2934                         *negp = true;
2935                         lval = -(unsigned long)val;
2936                 } else {
2937                         *negp = false;
2938                         lval = (unsigned long)val;
2939                 }
2940                 *lvalp = lval / HZ;
2941         }
2942         return 0;
2943 }
2944
2945 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2946                                                 int *valp,
2947                                                 int write, void *data)
2948 {
2949         if (write) {
2950                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2951                         return 1;
2952                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2953         } else {
2954                 int val = *valp;
2955                 unsigned long lval;
2956                 if (val < 0) {
2957                         *negp = true;
2958                         lval = -(unsigned long)val;
2959                 } else {
2960                         *negp = false;
2961                         lval = (unsigned long)val;
2962                 }
2963                 *lvalp = jiffies_to_clock_t(lval);
2964         }
2965         return 0;
2966 }
2967
2968 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2969                                             int *valp,
2970                                             int write, void *data)
2971 {
2972         if (write) {
2973                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2974
2975                 if (jif > INT_MAX)
2976                         return 1;
2977                 *valp = (int)jif;
2978         } else {
2979                 int val = *valp;
2980                 unsigned long lval;
2981                 if (val < 0) {
2982                         *negp = true;
2983                         lval = -(unsigned long)val;
2984                 } else {
2985                         *negp = false;
2986                         lval = (unsigned long)val;
2987                 }
2988                 *lvalp = jiffies_to_msecs(lval);
2989         }
2990         return 0;
2991 }
2992
2993 /**
2994  * proc_dointvec_jiffies - read a vector of integers as seconds
2995  * @table: the sysctl table
2996  * @write: %TRUE if this is a write to the sysctl file
2997  * @buffer: the user buffer
2998  * @lenp: the size of the user buffer
2999  * @ppos: file position
3000  *
3001  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3002  * values from/to the user buffer, treated as an ASCII string. 
3003  * The values read are assumed to be in seconds, and are converted into
3004  * jiffies.
3005  *
3006  * Returns 0 on success.
3007  */
3008 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3009                           void __user *buffer, size_t *lenp, loff_t *ppos)
3010 {
3011     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3012                             do_proc_dointvec_jiffies_conv,NULL);
3013 }
3014
3015 /**
3016  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
3017  * @table: the sysctl table
3018  * @write: %TRUE if this is a write to the sysctl file
3019  * @buffer: the user buffer
3020  * @lenp: the size of the user buffer
3021  * @ppos: pointer to the file position
3022  *
3023  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3024  * values from/to the user buffer, treated as an ASCII string. 
3025  * The values read are assumed to be in 1/USER_HZ seconds, and 
3026  * are converted into jiffies.
3027  *
3028  * Returns 0 on success.
3029  */
3030 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3031                                  void __user *buffer, size_t *lenp, loff_t *ppos)
3032 {
3033     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3034                             do_proc_dointvec_userhz_jiffies_conv,NULL);
3035 }
3036
3037 /**
3038  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3039  * @table: the sysctl table
3040  * @write: %TRUE if this is a write to the sysctl file
3041  * @buffer: the user buffer
3042  * @lenp: the size of the user buffer
3043  * @ppos: file position
3044  * @ppos: the current position in the file
3045  *
3046  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3047  * values from/to the user buffer, treated as an ASCII string. 
3048  * The values read are assumed to be in 1/1000 seconds, and 
3049  * are converted into jiffies.
3050  *
3051  * Returns 0 on success.
3052  */
3053 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3054                              void __user *buffer, size_t *lenp, loff_t *ppos)
3055 {
3056         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3057                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3058 }
3059
3060 static int proc_do_cad_pid(struct ctl_table *table, int write,
3061                            void __user *buffer, size_t *lenp, loff_t *ppos)
3062 {
3063         struct pid *new_pid;
3064         pid_t tmp;
3065         int r;
3066
3067         tmp = pid_vnr(cad_pid);
3068
3069         r = __do_proc_dointvec(&tmp, table, write, buffer,
3070                                lenp, ppos, NULL, NULL);
3071         if (r || !write)
3072                 return r;
3073
3074         new_pid = find_get_pid(tmp);
3075         if (!new_pid)
3076                 return -ESRCH;
3077
3078         put_pid(xchg(&cad_pid, new_pid));
3079         return 0;
3080 }
3081
3082 /**
3083  * proc_do_large_bitmap - read/write from/to a large bitmap
3084  * @table: the sysctl table
3085  * @write: %TRUE if this is a write to the sysctl file
3086  * @buffer: the user buffer
3087  * @lenp: the size of the user buffer
3088  * @ppos: file position
3089  *
3090  * The bitmap is stored at table->data and the bitmap length (in bits)
3091  * in table->maxlen.
3092  *
3093  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3094  * large bitmaps may be represented in a compact manner. Writing into
3095  * the file will clear the bitmap then update it with the given input.
3096  *
3097  * Returns 0 on success.
3098  */
3099 int proc_do_large_bitmap(struct ctl_table *table, int write,
3100                          void __user *buffer, size_t *lenp, loff_t *ppos)
3101 {
3102         int err = 0;
3103         bool first = 1;
3104         size_t left = *lenp;
3105         unsigned long bitmap_len = table->maxlen;
3106         unsigned long *bitmap = *(unsigned long **) table->data;
3107         unsigned long *tmp_bitmap = NULL;
3108         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3109
3110         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3111                 *lenp = 0;
3112                 return 0;
3113         }
3114
3115         if (write) {
3116                 char *kbuf, *p;
3117
3118                 if (left > PAGE_SIZE - 1)
3119                         left = PAGE_SIZE - 1;
3120
3121                 p = kbuf = memdup_user_nul(buffer, left);
3122                 if (IS_ERR(kbuf))
3123                         return PTR_ERR(kbuf);
3124
3125                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3126                                      sizeof(unsigned long),
3127                                      GFP_KERNEL);
3128                 if (!tmp_bitmap) {
3129                         kfree(kbuf);
3130                         return -ENOMEM;
3131                 }
3132                 proc_skip_char(&p, &left, '\n');
3133                 while (!err && left) {
3134                         unsigned long val_a, val_b;
3135                         bool neg;
3136
3137                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3138                                              sizeof(tr_a), &c);
3139                         if (err)
3140                                 break;
3141                         if (val_a >= bitmap_len || neg) {
3142                                 err = -EINVAL;
3143                                 break;
3144                         }
3145
3146                         val_b = val_a;
3147                         if (left) {
3148                                 p++;
3149                                 left--;
3150                         }
3151
3152                         if (c == '-') {
3153                                 err = proc_get_long(&p, &left, &val_b,
3154                                                      &neg, tr_b, sizeof(tr_b),
3155                                                      &c);
3156                                 if (err)
3157                                         break;
3158                                 if (val_b >= bitmap_len || neg ||
3159                                     val_a > val_b) {
3160                                         err = -EINVAL;
3161                                         break;
3162                                 }
3163                                 if (left) {
3164                                         p++;
3165                                         left--;
3166                                 }
3167                         }
3168
3169                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3170                         first = 0;
3171                         proc_skip_char(&p, &left, '\n');
3172                 }
3173                 kfree(kbuf);
3174         } else {
3175                 unsigned long bit_a, bit_b = 0;
3176
3177                 while (left) {
3178                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3179                         if (bit_a >= bitmap_len)
3180                                 break;
3181                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3182                                                    bit_a + 1) - 1;
3183
3184                         if (!first) {
3185                                 err = proc_put_char(&buffer, &left, ',');
3186                                 if (err)
3187                                         break;
3188                         }
3189                         err = proc_put_long(&buffer, &left, bit_a, false);
3190                         if (err)
3191                                 break;
3192                         if (bit_a != bit_b) {
3193                                 err = proc_put_char(&buffer, &left, '-');
3194                                 if (err)
3195                                         break;
3196                                 err = proc_put_long(&buffer, &left, bit_b, false);
3197                                 if (err)
3198                                         break;
3199                         }
3200
3201                         first = 0; bit_b++;
3202                 }
3203                 if (!err)
3204                         err = proc_put_char(&buffer, &left, '\n');
3205         }
3206
3207         if (!err) {
3208                 if (write) {
3209                         if (*ppos)
3210                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3211                         else
3212                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3213                 }
3214                 *lenp -= left;
3215                 *ppos += *lenp;
3216         }
3217
3218         kfree(tmp_bitmap);
3219         return err;
3220 }
3221
3222 #else /* CONFIG_PROC_SYSCTL */
3223
3224 int proc_dostring(struct ctl_table *table, int write,
3225                   void __user *buffer, size_t *lenp, loff_t *ppos)
3226 {
3227         return -ENOSYS;
3228 }
3229
3230 int proc_dointvec(struct ctl_table *table, int write,
3231                   void __user *buffer, size_t *lenp, loff_t *ppos)
3232 {
3233         return -ENOSYS;
3234 }
3235
3236 int proc_douintvec(struct ctl_table *table, int write,
3237                   void __user *buffer, size_t *lenp, loff_t *ppos)
3238 {
3239         return -ENOSYS;
3240 }
3241
3242 int proc_dointvec_minmax(struct ctl_table *table, int write,
3243                     void __user *buffer, size_t *lenp, loff_t *ppos)
3244 {
3245         return -ENOSYS;
3246 }
3247
3248 int proc_douintvec_minmax(struct ctl_table *table, int write,
3249                           void __user *buffer, size_t *lenp, loff_t *ppos)
3250 {
3251         return -ENOSYS;
3252 }
3253
3254 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3255                     void __user *buffer, size_t *lenp, loff_t *ppos)
3256 {
3257         return -ENOSYS;
3258 }
3259
3260 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3261                     void __user *buffer, size_t *lenp, loff_t *ppos)
3262 {
3263         return -ENOSYS;
3264 }
3265
3266 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3267                              void __user *buffer, size_t *lenp, loff_t *ppos)
3268 {
3269         return -ENOSYS;
3270 }
3271
3272 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3273                     void __user *buffer, size_t *lenp, loff_t *ppos)
3274 {
3275         return -ENOSYS;
3276 }
3277
3278 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3279                                       void __user *buffer,
3280                                       size_t *lenp, loff_t *ppos)
3281 {
3282     return -ENOSYS;
3283 }
3284
3285
3286 #endif /* CONFIG_PROC_SYSCTL */
3287
3288 #ifdef CONFIG_BPF_SYSCALL
3289 static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3290                                           void __user *buffer, size_t *lenp,
3291                                           loff_t *ppos)
3292 {
3293         int ret, bpf_stats = *(int *)table->data;
3294         struct ctl_table tmp = *table;
3295
3296         if (write && !capable(CAP_SYS_ADMIN))
3297                 return -EPERM;
3298
3299         tmp.data = &bpf_stats;
3300         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3301         if (write && !ret) {
3302                 *(int *)table->data = bpf_stats;
3303                 if (bpf_stats)
3304                         static_branch_enable(&bpf_stats_enabled_key);
3305                 else
3306                         static_branch_disable(&bpf_stats_enabled_key);
3307         }
3308         return ret;
3309 }
3310 #endif
3311 /*
3312  * No sense putting this after each symbol definition, twice,
3313  * exception granted :-)
3314  */
3315 EXPORT_SYMBOL(proc_dointvec);
3316 EXPORT_SYMBOL(proc_douintvec);
3317 EXPORT_SYMBOL(proc_dointvec_jiffies);
3318 EXPORT_SYMBOL(proc_dointvec_minmax);
3319 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3320 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3321 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3322 EXPORT_SYMBOL(proc_dostring);
3323 EXPORT_SYMBOL(proc_doulongvec_minmax);
3324 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);