Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102         [IFA_RT_PRIORITY]       = { .type = NLA_U32 },
103         [IFA_TARGET_NETNSID]    = { .type = NLA_S32 },
104 };
105
106 struct inet_fill_args {
107         u32 portid;
108         u32 seq;
109         int event;
110         unsigned int flags;
111         int netnsid;
112         int ifindex;
113 };
114
115 #define IN4_ADDR_HSIZE_SHIFT    8
116 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
117
118 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
119
120 static u32 inet_addr_hash(const struct net *net, __be32 addr)
121 {
122         u32 val = (__force u32) addr ^ net_hash_mix(net);
123
124         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
125 }
126
127 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
128 {
129         u32 hash = inet_addr_hash(net, ifa->ifa_local);
130
131         ASSERT_RTNL();
132         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
133 }
134
135 static void inet_hash_remove(struct in_ifaddr *ifa)
136 {
137         ASSERT_RTNL();
138         hlist_del_init_rcu(&ifa->hash);
139 }
140
141 /**
142  * __ip_dev_find - find the first device with a given source address.
143  * @net: the net namespace
144  * @addr: the source address
145  * @devref: if true, take a reference on the found device
146  *
147  * If a caller uses devref=false, it should be protected by RCU, or RTNL
148  */
149 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
150 {
151         struct net_device *result = NULL;
152         struct in_ifaddr *ifa;
153
154         rcu_read_lock();
155         ifa = inet_lookup_ifaddr_rcu(net, addr);
156         if (!ifa) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         } else {
170                 result = ifa->ifa_dev->dev;
171         }
172         if (result && devref)
173                 dev_hold(result);
174         rcu_read_unlock();
175         return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178
179 /* called under RCU lock */
180 struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
181 {
182         u32 hash = inet_addr_hash(net, addr);
183         struct in_ifaddr *ifa;
184
185         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
186                 if (ifa->ifa_local == addr &&
187                     net_eq(dev_net(ifa->ifa_dev->dev), net))
188                         return ifa;
189
190         return NULL;
191 }
192
193 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
194
195 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
196 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
197 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
198                          int destroy);
199 #ifdef CONFIG_SYSCTL
200 static int devinet_sysctl_register(struct in_device *idev);
201 static void devinet_sysctl_unregister(struct in_device *idev);
202 #else
203 static int devinet_sysctl_register(struct in_device *idev)
204 {
205         return 0;
206 }
207 static void devinet_sysctl_unregister(struct in_device *idev)
208 {
209 }
210 #endif
211
212 /* Locks all the inet devices. */
213
214 static struct in_ifaddr *inet_alloc_ifa(void)
215 {
216         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
217 }
218
219 static void inet_rcu_free_ifa(struct rcu_head *head)
220 {
221         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
222         if (ifa->ifa_dev)
223                 in_dev_put(ifa->ifa_dev);
224         kfree(ifa);
225 }
226
227 static void inet_free_ifa(struct in_ifaddr *ifa)
228 {
229         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
230 }
231
232 void in_dev_finish_destroy(struct in_device *idev)
233 {
234         struct net_device *dev = idev->dev;
235
236         WARN_ON(idev->ifa_list);
237         WARN_ON(idev->mc_list);
238         kfree(rcu_dereference_protected(idev->mc_hash, 1));
239 #ifdef NET_REFCNT_DEBUG
240         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
241 #endif
242         dev_put(dev);
243         if (!idev->dead)
244                 pr_err("Freeing alive in_device %p\n", idev);
245         else
246                 kfree(idev);
247 }
248 EXPORT_SYMBOL(in_dev_finish_destroy);
249
250 static struct in_device *inetdev_init(struct net_device *dev)
251 {
252         struct in_device *in_dev;
253         int err = -ENOMEM;
254
255         ASSERT_RTNL();
256
257         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
258         if (!in_dev)
259                 goto out;
260         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
261                         sizeof(in_dev->cnf));
262         in_dev->cnf.sysctl = NULL;
263         in_dev->dev = dev;
264         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
265         if (!in_dev->arp_parms)
266                 goto out_kfree;
267         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
268                 dev_disable_lro(dev);
269         /* Reference in_dev->dev */
270         dev_hold(dev);
271         /* Account for reference dev->ip_ptr (below) */
272         refcount_set(&in_dev->refcnt, 1);
273
274         err = devinet_sysctl_register(in_dev);
275         if (err) {
276                 in_dev->dead = 1;
277                 in_dev_put(in_dev);
278                 in_dev = NULL;
279                 goto out;
280         }
281         ip_mc_init_dev(in_dev);
282         if (dev->flags & IFF_UP)
283                 ip_mc_up(in_dev);
284
285         /* we can receive as soon as ip_ptr is set -- do this last */
286         rcu_assign_pointer(dev->ip_ptr, in_dev);
287 out:
288         return in_dev ?: ERR_PTR(err);
289 out_kfree:
290         kfree(in_dev);
291         in_dev = NULL;
292         goto out;
293 }
294
295 static void in_dev_rcu_put(struct rcu_head *head)
296 {
297         struct in_device *idev = container_of(head, struct in_device, rcu_head);
298         in_dev_put(idev);
299 }
300
301 static void inetdev_destroy(struct in_device *in_dev)
302 {
303         struct in_ifaddr *ifa;
304         struct net_device *dev;
305
306         ASSERT_RTNL();
307
308         dev = in_dev->dev;
309
310         in_dev->dead = 1;
311
312         ip_mc_destroy_dev(in_dev);
313
314         while ((ifa = in_dev->ifa_list) != NULL) {
315                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
316                 inet_free_ifa(ifa);
317         }
318
319         RCU_INIT_POINTER(dev->ip_ptr, NULL);
320
321         devinet_sysctl_unregister(in_dev);
322         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
323         arp_ifdown(dev);
324
325         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
326 }
327
328 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
329 {
330         rcu_read_lock();
331         for_primary_ifa(in_dev) {
332                 if (inet_ifa_match(a, ifa)) {
333                         if (!b || inet_ifa_match(b, ifa)) {
334                                 rcu_read_unlock();
335                                 return 1;
336                         }
337                 }
338         } endfor_ifa(in_dev);
339         rcu_read_unlock();
340         return 0;
341 }
342
343 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
344                          int destroy, struct nlmsghdr *nlh, u32 portid)
345 {
346         struct in_ifaddr *promote = NULL;
347         struct in_ifaddr *ifa, *ifa1 = *ifap;
348         struct in_ifaddr *last_prim = in_dev->ifa_list;
349         struct in_ifaddr *prev_prom = NULL;
350         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
351
352         ASSERT_RTNL();
353
354         if (in_dev->dead)
355                 goto no_promotions;
356
357         /* 1. Deleting primary ifaddr forces deletion all secondaries
358          * unless alias promotion is set
359          **/
360
361         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
362                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
363
364                 while ((ifa = *ifap1) != NULL) {
365                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
366                             ifa1->ifa_scope <= ifa->ifa_scope)
367                                 last_prim = ifa;
368
369                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
370                             ifa1->ifa_mask != ifa->ifa_mask ||
371                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
372                                 ifap1 = &ifa->ifa_next;
373                                 prev_prom = ifa;
374                                 continue;
375                         }
376
377                         if (!do_promote) {
378                                 inet_hash_remove(ifa);
379                                 *ifap1 = ifa->ifa_next;
380
381                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
382                                 blocking_notifier_call_chain(&inetaddr_chain,
383                                                 NETDEV_DOWN, ifa);
384                                 inet_free_ifa(ifa);
385                         } else {
386                                 promote = ifa;
387                                 break;
388                         }
389                 }
390         }
391
392         /* On promotion all secondaries from subnet are changing
393          * the primary IP, we must remove all their routes silently
394          * and later to add them back with new prefsrc. Do this
395          * while all addresses are on the device list.
396          */
397         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
398                 if (ifa1->ifa_mask == ifa->ifa_mask &&
399                     inet_ifa_match(ifa1->ifa_address, ifa))
400                         fib_del_ifaddr(ifa, ifa1);
401         }
402
403 no_promotions:
404         /* 2. Unlink it */
405
406         *ifap = ifa1->ifa_next;
407         inet_hash_remove(ifa1);
408
409         /* 3. Announce address deletion */
410
411         /* Send message first, then call notifier.
412            At first sight, FIB update triggered by notifier
413            will refer to already deleted ifaddr, that could confuse
414            netlink listeners. It is not true: look, gated sees
415            that route deleted and if it still thinks that ifaddr
416            is valid, it will try to restore deleted routes... Grr.
417            So that, this order is correct.
418          */
419         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
420         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
421
422         if (promote) {
423                 struct in_ifaddr *next_sec = promote->ifa_next;
424
425                 if (prev_prom) {
426                         prev_prom->ifa_next = promote->ifa_next;
427                         promote->ifa_next = last_prim->ifa_next;
428                         last_prim->ifa_next = promote;
429                 }
430
431                 promote->ifa_flags &= ~IFA_F_SECONDARY;
432                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
433                 blocking_notifier_call_chain(&inetaddr_chain,
434                                 NETDEV_UP, promote);
435                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
436                         if (ifa1->ifa_mask != ifa->ifa_mask ||
437                             !inet_ifa_match(ifa1->ifa_address, ifa))
438                                         continue;
439                         fib_add_ifaddr(ifa);
440                 }
441
442         }
443         if (destroy)
444                 inet_free_ifa(ifa1);
445 }
446
447 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
448                          int destroy)
449 {
450         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
451 }
452
453 static void check_lifetime(struct work_struct *work);
454
455 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
456
457 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
458                              u32 portid, struct netlink_ext_ack *extack)
459 {
460         struct in_device *in_dev = ifa->ifa_dev;
461         struct in_ifaddr *ifa1, **ifap, **last_primary;
462         struct in_validator_info ivi;
463         int ret;
464
465         ASSERT_RTNL();
466
467         if (!ifa->ifa_local) {
468                 inet_free_ifa(ifa);
469                 return 0;
470         }
471
472         ifa->ifa_flags &= ~IFA_F_SECONDARY;
473         last_primary = &in_dev->ifa_list;
474
475         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
476              ifap = &ifa1->ifa_next) {
477                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
478                     ifa->ifa_scope <= ifa1->ifa_scope)
479                         last_primary = &ifa1->ifa_next;
480                 if (ifa1->ifa_mask == ifa->ifa_mask &&
481                     inet_ifa_match(ifa1->ifa_address, ifa)) {
482                         if (ifa1->ifa_local == ifa->ifa_local) {
483                                 inet_free_ifa(ifa);
484                                 return -EEXIST;
485                         }
486                         if (ifa1->ifa_scope != ifa->ifa_scope) {
487                                 inet_free_ifa(ifa);
488                                 return -EINVAL;
489                         }
490                         ifa->ifa_flags |= IFA_F_SECONDARY;
491                 }
492         }
493
494         /* Allow any devices that wish to register ifaddr validtors to weigh
495          * in now, before changes are committed.  The rntl lock is serializing
496          * access here, so the state should not change between a validator call
497          * and a final notify on commit.  This isn't invoked on promotion under
498          * the assumption that validators are checking the address itself, and
499          * not the flags.
500          */
501         ivi.ivi_addr = ifa->ifa_address;
502         ivi.ivi_dev = ifa->ifa_dev;
503         ivi.extack = extack;
504         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
505                                            NETDEV_UP, &ivi);
506         ret = notifier_to_errno(ret);
507         if (ret) {
508                 inet_free_ifa(ifa);
509                 return ret;
510         }
511
512         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
513                 prandom_seed((__force u32) ifa->ifa_local);
514                 ifap = last_primary;
515         }
516
517         ifa->ifa_next = *ifap;
518         *ifap = ifa;
519
520         inet_hash_insert(dev_net(in_dev->dev), ifa);
521
522         cancel_delayed_work(&check_lifetime_work);
523         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
524
525         /* Send message first, then call notifier.
526            Notifier will trigger FIB update, so that
527            listeners of netlink will know about new ifaddr */
528         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
529         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
530
531         return 0;
532 }
533
534 static int inet_insert_ifa(struct in_ifaddr *ifa)
535 {
536         return __inet_insert_ifa(ifa, NULL, 0, NULL);
537 }
538
539 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
540 {
541         struct in_device *in_dev = __in_dev_get_rtnl(dev);
542
543         ASSERT_RTNL();
544
545         if (!in_dev) {
546                 inet_free_ifa(ifa);
547                 return -ENOBUFS;
548         }
549         ipv4_devconf_setall(in_dev);
550         neigh_parms_data_state_setall(in_dev->arp_parms);
551         if (ifa->ifa_dev != in_dev) {
552                 WARN_ON(ifa->ifa_dev);
553                 in_dev_hold(in_dev);
554                 ifa->ifa_dev = in_dev;
555         }
556         if (ipv4_is_loopback(ifa->ifa_local))
557                 ifa->ifa_scope = RT_SCOPE_HOST;
558         return inet_insert_ifa(ifa);
559 }
560
561 /* Caller must hold RCU or RTNL :
562  * We dont take a reference on found in_device
563  */
564 struct in_device *inetdev_by_index(struct net *net, int ifindex)
565 {
566         struct net_device *dev;
567         struct in_device *in_dev = NULL;
568
569         rcu_read_lock();
570         dev = dev_get_by_index_rcu(net, ifindex);
571         if (dev)
572                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
573         rcu_read_unlock();
574         return in_dev;
575 }
576 EXPORT_SYMBOL(inetdev_by_index);
577
578 /* Called only from RTNL semaphored context. No locks. */
579
580 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
581                                     __be32 mask)
582 {
583         ASSERT_RTNL();
584
585         for_primary_ifa(in_dev) {
586                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
587                         return ifa;
588         } endfor_ifa(in_dev);
589         return NULL;
590 }
591
592 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
593 {
594         struct ip_mreqn mreq = {
595                 .imr_multiaddr.s_addr = ifa->ifa_address,
596                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
597         };
598         int ret;
599
600         ASSERT_RTNL();
601
602         lock_sock(sk);
603         if (join)
604                 ret = ip_mc_join_group(sk, &mreq);
605         else
606                 ret = ip_mc_leave_group(sk, &mreq);
607         release_sock(sk);
608
609         return ret;
610 }
611
612 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
613                             struct netlink_ext_ack *extack)
614 {
615         struct net *net = sock_net(skb->sk);
616         struct nlattr *tb[IFA_MAX+1];
617         struct in_device *in_dev;
618         struct ifaddrmsg *ifm;
619         struct in_ifaddr *ifa, **ifap;
620         int err = -EINVAL;
621
622         ASSERT_RTNL();
623
624         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
625                           extack);
626         if (err < 0)
627                 goto errout;
628
629         ifm = nlmsg_data(nlh);
630         in_dev = inetdev_by_index(net, ifm->ifa_index);
631         if (!in_dev) {
632                 err = -ENODEV;
633                 goto errout;
634         }
635
636         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
637              ifap = &ifa->ifa_next) {
638                 if (tb[IFA_LOCAL] &&
639                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
640                         continue;
641
642                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
643                         continue;
644
645                 if (tb[IFA_ADDRESS] &&
646                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
647                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
648                         continue;
649
650                 if (ipv4_is_multicast(ifa->ifa_address))
651                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
652                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
653                 return 0;
654         }
655
656         err = -EADDRNOTAVAIL;
657 errout:
658         return err;
659 }
660
661 #define INFINITY_LIFE_TIME      0xFFFFFFFF
662
663 static void check_lifetime(struct work_struct *work)
664 {
665         unsigned long now, next, next_sec, next_sched;
666         struct in_ifaddr *ifa;
667         struct hlist_node *n;
668         int i;
669
670         now = jiffies;
671         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
672
673         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
674                 bool change_needed = false;
675
676                 rcu_read_lock();
677                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
678                         unsigned long age;
679
680                         if (ifa->ifa_flags & IFA_F_PERMANENT)
681                                 continue;
682
683                         /* We try to batch several events at once. */
684                         age = (now - ifa->ifa_tstamp +
685                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
686
687                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
688                             age >= ifa->ifa_valid_lft) {
689                                 change_needed = true;
690                         } else if (ifa->ifa_preferred_lft ==
691                                    INFINITY_LIFE_TIME) {
692                                 continue;
693                         } else if (age >= ifa->ifa_preferred_lft) {
694                                 if (time_before(ifa->ifa_tstamp +
695                                                 ifa->ifa_valid_lft * HZ, next))
696                                         next = ifa->ifa_tstamp +
697                                                ifa->ifa_valid_lft * HZ;
698
699                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
700                                         change_needed = true;
701                         } else if (time_before(ifa->ifa_tstamp +
702                                                ifa->ifa_preferred_lft * HZ,
703                                                next)) {
704                                 next = ifa->ifa_tstamp +
705                                        ifa->ifa_preferred_lft * HZ;
706                         }
707                 }
708                 rcu_read_unlock();
709                 if (!change_needed)
710                         continue;
711                 rtnl_lock();
712                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
713                         unsigned long age;
714
715                         if (ifa->ifa_flags & IFA_F_PERMANENT)
716                                 continue;
717
718                         /* We try to batch several events at once. */
719                         age = (now - ifa->ifa_tstamp +
720                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
721
722                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
723                             age >= ifa->ifa_valid_lft) {
724                                 struct in_ifaddr **ifap;
725
726                                 for (ifap = &ifa->ifa_dev->ifa_list;
727                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
728                                         if (*ifap == ifa) {
729                                                 inet_del_ifa(ifa->ifa_dev,
730                                                              ifap, 1);
731                                                 break;
732                                         }
733                                 }
734                         } else if (ifa->ifa_preferred_lft !=
735                                    INFINITY_LIFE_TIME &&
736                                    age >= ifa->ifa_preferred_lft &&
737                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
738                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
739                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
740                         }
741                 }
742                 rtnl_unlock();
743         }
744
745         next_sec = round_jiffies_up(next);
746         next_sched = next;
747
748         /* If rounded timeout is accurate enough, accept it. */
749         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
750                 next_sched = next_sec;
751
752         now = jiffies;
753         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
754         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
755                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
756
757         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
758                         next_sched - now);
759 }
760
761 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
762                              __u32 prefered_lft)
763 {
764         unsigned long timeout;
765
766         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
767
768         timeout = addrconf_timeout_fixup(valid_lft, HZ);
769         if (addrconf_finite_timeout(timeout))
770                 ifa->ifa_valid_lft = timeout;
771         else
772                 ifa->ifa_flags |= IFA_F_PERMANENT;
773
774         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
775         if (addrconf_finite_timeout(timeout)) {
776                 if (timeout == 0)
777                         ifa->ifa_flags |= IFA_F_DEPRECATED;
778                 ifa->ifa_preferred_lft = timeout;
779         }
780         ifa->ifa_tstamp = jiffies;
781         if (!ifa->ifa_cstamp)
782                 ifa->ifa_cstamp = ifa->ifa_tstamp;
783 }
784
785 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
786                                        __u32 *pvalid_lft, __u32 *pprefered_lft,
787                                        struct netlink_ext_ack *extack)
788 {
789         struct nlattr *tb[IFA_MAX+1];
790         struct in_ifaddr *ifa;
791         struct ifaddrmsg *ifm;
792         struct net_device *dev;
793         struct in_device *in_dev;
794         int err;
795
796         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
797                           extack);
798         if (err < 0)
799                 goto errout;
800
801         ifm = nlmsg_data(nlh);
802         err = -EINVAL;
803         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
804                 goto errout;
805
806         dev = __dev_get_by_index(net, ifm->ifa_index);
807         err = -ENODEV;
808         if (!dev)
809                 goto errout;
810
811         in_dev = __in_dev_get_rtnl(dev);
812         err = -ENOBUFS;
813         if (!in_dev)
814                 goto errout;
815
816         ifa = inet_alloc_ifa();
817         if (!ifa)
818                 /*
819                  * A potential indev allocation can be left alive, it stays
820                  * assigned to its device and is destroy with it.
821                  */
822                 goto errout;
823
824         ipv4_devconf_setall(in_dev);
825         neigh_parms_data_state_setall(in_dev->arp_parms);
826         in_dev_hold(in_dev);
827
828         if (!tb[IFA_ADDRESS])
829                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
830
831         INIT_HLIST_NODE(&ifa->hash);
832         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
833         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
834         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
835                                          ifm->ifa_flags;
836         ifa->ifa_scope = ifm->ifa_scope;
837         ifa->ifa_dev = in_dev;
838
839         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
840         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
841
842         if (tb[IFA_BROADCAST])
843                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
844
845         if (tb[IFA_LABEL])
846                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
847         else
848                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
849
850         if (tb[IFA_RT_PRIORITY])
851                 ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
852
853         if (tb[IFA_CACHEINFO]) {
854                 struct ifa_cacheinfo *ci;
855
856                 ci = nla_data(tb[IFA_CACHEINFO]);
857                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
858                         err = -EINVAL;
859                         goto errout_free;
860                 }
861                 *pvalid_lft = ci->ifa_valid;
862                 *pprefered_lft = ci->ifa_prefered;
863         }
864
865         return ifa;
866
867 errout_free:
868         inet_free_ifa(ifa);
869 errout:
870         return ERR_PTR(err);
871 }
872
873 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
874 {
875         struct in_device *in_dev = ifa->ifa_dev;
876         struct in_ifaddr *ifa1, **ifap;
877
878         if (!ifa->ifa_local)
879                 return NULL;
880
881         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
882              ifap = &ifa1->ifa_next) {
883                 if (ifa1->ifa_mask == ifa->ifa_mask &&
884                     inet_ifa_match(ifa1->ifa_address, ifa) &&
885                     ifa1->ifa_local == ifa->ifa_local)
886                         return ifa1;
887         }
888         return NULL;
889 }
890
891 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
892                             struct netlink_ext_ack *extack)
893 {
894         struct net *net = sock_net(skb->sk);
895         struct in_ifaddr *ifa;
896         struct in_ifaddr *ifa_existing;
897         __u32 valid_lft = INFINITY_LIFE_TIME;
898         __u32 prefered_lft = INFINITY_LIFE_TIME;
899
900         ASSERT_RTNL();
901
902         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
903         if (IS_ERR(ifa))
904                 return PTR_ERR(ifa);
905
906         ifa_existing = find_matching_ifa(ifa);
907         if (!ifa_existing) {
908                 /* It would be best to check for !NLM_F_CREATE here but
909                  * userspace already relies on not having to provide this.
910                  */
911                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
912                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
913                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
914                                                true, ifa);
915
916                         if (ret < 0) {
917                                 inet_free_ifa(ifa);
918                                 return ret;
919                         }
920                 }
921                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
922                                          extack);
923         } else {
924                 u32 new_metric = ifa->ifa_rt_priority;
925
926                 inet_free_ifa(ifa);
927
928                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
929                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
930                         return -EEXIST;
931                 ifa = ifa_existing;
932
933                 if (ifa->ifa_rt_priority != new_metric) {
934                         fib_modify_prefix_metric(ifa, new_metric);
935                         ifa->ifa_rt_priority = new_metric;
936                 }
937
938                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
939                 cancel_delayed_work(&check_lifetime_work);
940                 queue_delayed_work(system_power_efficient_wq,
941                                 &check_lifetime_work, 0);
942                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
943         }
944         return 0;
945 }
946
947 /*
948  *      Determine a default network mask, based on the IP address.
949  */
950
951 static int inet_abc_len(__be32 addr)
952 {
953         int rc = -1;    /* Something else, probably a multicast. */
954
955         if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
956                 rc = 0;
957         else {
958                 __u32 haddr = ntohl(addr);
959                 if (IN_CLASSA(haddr))
960                         rc = 8;
961                 else if (IN_CLASSB(haddr))
962                         rc = 16;
963                 else if (IN_CLASSC(haddr))
964                         rc = 24;
965                 else if (IN_CLASSE(haddr))
966                         rc = 32;
967         }
968
969         return rc;
970 }
971
972
973 int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
974 {
975         struct sockaddr_in sin_orig;
976         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
977         struct in_device *in_dev;
978         struct in_ifaddr **ifap = NULL;
979         struct in_ifaddr *ifa = NULL;
980         struct net_device *dev;
981         char *colon;
982         int ret = -EFAULT;
983         int tryaddrmatch = 0;
984
985         ifr->ifr_name[IFNAMSIZ - 1] = 0;
986
987         /* save original address for comparison */
988         memcpy(&sin_orig, sin, sizeof(*sin));
989
990         colon = strchr(ifr->ifr_name, ':');
991         if (colon)
992                 *colon = 0;
993
994         dev_load(net, ifr->ifr_name);
995
996         switch (cmd) {
997         case SIOCGIFADDR:       /* Get interface address */
998         case SIOCGIFBRDADDR:    /* Get the broadcast address */
999         case SIOCGIFDSTADDR:    /* Get the destination address */
1000         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1001                 /* Note that these ioctls will not sleep,
1002                    so that we do not impose a lock.
1003                    One day we will be forced to put shlock here (I mean SMP)
1004                  */
1005                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
1006                 memset(sin, 0, sizeof(*sin));
1007                 sin->sin_family = AF_INET;
1008                 break;
1009
1010         case SIOCSIFFLAGS:
1011                 ret = -EPERM;
1012                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1013                         goto out;
1014                 break;
1015         case SIOCSIFADDR:       /* Set interface address (and family) */
1016         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1017         case SIOCSIFDSTADDR:    /* Set the destination address */
1018         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1019                 ret = -EPERM;
1020                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1021                         goto out;
1022                 ret = -EINVAL;
1023                 if (sin->sin_family != AF_INET)
1024                         goto out;
1025                 break;
1026         default:
1027                 ret = -EINVAL;
1028                 goto out;
1029         }
1030
1031         rtnl_lock();
1032
1033         ret = -ENODEV;
1034         dev = __dev_get_by_name(net, ifr->ifr_name);
1035         if (!dev)
1036                 goto done;
1037
1038         if (colon)
1039                 *colon = ':';
1040
1041         in_dev = __in_dev_get_rtnl(dev);
1042         if (in_dev) {
1043                 if (tryaddrmatch) {
1044                         /* Matthias Andree */
1045                         /* compare label and address (4.4BSD style) */
1046                         /* note: we only do this for a limited set of ioctls
1047                            and only if the original address family was AF_INET.
1048                            This is checked above. */
1049                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1050                              ifap = &ifa->ifa_next) {
1051                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1052                                     sin_orig.sin_addr.s_addr ==
1053                                                         ifa->ifa_local) {
1054                                         break; /* found */
1055                                 }
1056                         }
1057                 }
1058                 /* we didn't get a match, maybe the application is
1059                    4.3BSD-style and passed in junk so we fall back to
1060                    comparing just the label */
1061                 if (!ifa) {
1062                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1063                              ifap = &ifa->ifa_next)
1064                                 if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1065                                         break;
1066                 }
1067         }
1068
1069         ret = -EADDRNOTAVAIL;
1070         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1071                 goto done;
1072
1073         switch (cmd) {
1074         case SIOCGIFADDR:       /* Get interface address */
1075                 ret = 0;
1076                 sin->sin_addr.s_addr = ifa->ifa_local;
1077                 break;
1078
1079         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1080                 ret = 0;
1081                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1082                 break;
1083
1084         case SIOCGIFDSTADDR:    /* Get the destination address */
1085                 ret = 0;
1086                 sin->sin_addr.s_addr = ifa->ifa_address;
1087                 break;
1088
1089         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1090                 ret = 0;
1091                 sin->sin_addr.s_addr = ifa->ifa_mask;
1092                 break;
1093
1094         case SIOCSIFFLAGS:
1095                 if (colon) {
1096                         ret = -EADDRNOTAVAIL;
1097                         if (!ifa)
1098                                 break;
1099                         ret = 0;
1100                         if (!(ifr->ifr_flags & IFF_UP))
1101                                 inet_del_ifa(in_dev, ifap, 1);
1102                         break;
1103                 }
1104                 ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1105                 break;
1106
1107         case SIOCSIFADDR:       /* Set interface address (and family) */
1108                 ret = -EINVAL;
1109                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1110                         break;
1111
1112                 if (!ifa) {
1113                         ret = -ENOBUFS;
1114                         ifa = inet_alloc_ifa();
1115                         if (!ifa)
1116                                 break;
1117                         INIT_HLIST_NODE(&ifa->hash);
1118                         if (colon)
1119                                 memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1120                         else
1121                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1122                 } else {
1123                         ret = 0;
1124                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1125                                 break;
1126                         inet_del_ifa(in_dev, ifap, 0);
1127                         ifa->ifa_broadcast = 0;
1128                         ifa->ifa_scope = 0;
1129                 }
1130
1131                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1132
1133                 if (!(dev->flags & IFF_POINTOPOINT)) {
1134                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1135                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1136                         if ((dev->flags & IFF_BROADCAST) &&
1137                             ifa->ifa_prefixlen < 31)
1138                                 ifa->ifa_broadcast = ifa->ifa_address |
1139                                                      ~ifa->ifa_mask;
1140                 } else {
1141                         ifa->ifa_prefixlen = 32;
1142                         ifa->ifa_mask = inet_make_mask(32);
1143                 }
1144                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1145                 ret = inet_set_ifa(dev, ifa);
1146                 break;
1147
1148         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1149                 ret = 0;
1150                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1151                         inet_del_ifa(in_dev, ifap, 0);
1152                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1153                         inet_insert_ifa(ifa);
1154                 }
1155                 break;
1156
1157         case SIOCSIFDSTADDR:    /* Set the destination address */
1158                 ret = 0;
1159                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1160                         break;
1161                 ret = -EINVAL;
1162                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1163                         break;
1164                 ret = 0;
1165                 inet_del_ifa(in_dev, ifap, 0);
1166                 ifa->ifa_address = sin->sin_addr.s_addr;
1167                 inet_insert_ifa(ifa);
1168                 break;
1169
1170         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1171
1172                 /*
1173                  *      The mask we set must be legal.
1174                  */
1175                 ret = -EINVAL;
1176                 if (bad_mask(sin->sin_addr.s_addr, 0))
1177                         break;
1178                 ret = 0;
1179                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1180                         __be32 old_mask = ifa->ifa_mask;
1181                         inet_del_ifa(in_dev, ifap, 0);
1182                         ifa->ifa_mask = sin->sin_addr.s_addr;
1183                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1184
1185                         /* See if current broadcast address matches
1186                          * with current netmask, then recalculate
1187                          * the broadcast address. Otherwise it's a
1188                          * funny address, so don't touch it since
1189                          * the user seems to know what (s)he's doing...
1190                          */
1191                         if ((dev->flags & IFF_BROADCAST) &&
1192                             (ifa->ifa_prefixlen < 31) &&
1193                             (ifa->ifa_broadcast ==
1194                              (ifa->ifa_local|~old_mask))) {
1195                                 ifa->ifa_broadcast = (ifa->ifa_local |
1196                                                       ~sin->sin_addr.s_addr);
1197                         }
1198                         inet_insert_ifa(ifa);
1199                 }
1200                 break;
1201         }
1202 done:
1203         rtnl_unlock();
1204 out:
1205         return ret;
1206 }
1207
1208 static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1209 {
1210         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1211         struct in_ifaddr *ifa;
1212         struct ifreq ifr;
1213         int done = 0;
1214
1215         if (WARN_ON(size > sizeof(struct ifreq)))
1216                 goto out;
1217
1218         if (!in_dev)
1219                 goto out;
1220
1221         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1222                 if (!buf) {
1223                         done += size;
1224                         continue;
1225                 }
1226                 if (len < size)
1227                         break;
1228                 memset(&ifr, 0, sizeof(struct ifreq));
1229                 strcpy(ifr.ifr_name, ifa->ifa_label);
1230
1231                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1232                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1233                                                                 ifa->ifa_local;
1234
1235                 if (copy_to_user(buf + done, &ifr, size)) {
1236                         done = -EFAULT;
1237                         break;
1238                 }
1239                 len  -= size;
1240                 done += size;
1241         }
1242 out:
1243         return done;
1244 }
1245
1246 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1247                                  int scope)
1248 {
1249         for_primary_ifa(in_dev) {
1250                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1251                     ifa->ifa_scope <= scope)
1252                         return ifa->ifa_local;
1253         } endfor_ifa(in_dev);
1254
1255         return 0;
1256 }
1257
1258 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1259 {
1260         __be32 addr = 0;
1261         struct in_device *in_dev;
1262         struct net *net = dev_net(dev);
1263         int master_idx;
1264
1265         rcu_read_lock();
1266         in_dev = __in_dev_get_rcu(dev);
1267         if (!in_dev)
1268                 goto no_in_dev;
1269
1270         for_primary_ifa(in_dev) {
1271                 if (ifa->ifa_scope > scope)
1272                         continue;
1273                 if (!dst || inet_ifa_match(dst, ifa)) {
1274                         addr = ifa->ifa_local;
1275                         break;
1276                 }
1277                 if (!addr)
1278                         addr = ifa->ifa_local;
1279         } endfor_ifa(in_dev);
1280
1281         if (addr)
1282                 goto out_unlock;
1283 no_in_dev:
1284         master_idx = l3mdev_master_ifindex_rcu(dev);
1285
1286         /* For VRFs, the VRF device takes the place of the loopback device,
1287          * with addresses on it being preferred.  Note in such cases the
1288          * loopback device will be among the devices that fail the master_idx
1289          * equality check in the loop below.
1290          */
1291         if (master_idx &&
1292             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1293             (in_dev = __in_dev_get_rcu(dev))) {
1294                 addr = in_dev_select_addr(in_dev, scope);
1295                 if (addr)
1296                         goto out_unlock;
1297         }
1298
1299         /* Not loopback addresses on loopback should be preferred
1300            in this case. It is important that lo is the first interface
1301            in dev_base list.
1302          */
1303         for_each_netdev_rcu(net, dev) {
1304                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1305                         continue;
1306
1307                 in_dev = __in_dev_get_rcu(dev);
1308                 if (!in_dev)
1309                         continue;
1310
1311                 addr = in_dev_select_addr(in_dev, scope);
1312                 if (addr)
1313                         goto out_unlock;
1314         }
1315 out_unlock:
1316         rcu_read_unlock();
1317         return addr;
1318 }
1319 EXPORT_SYMBOL(inet_select_addr);
1320
1321 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1322                               __be32 local, int scope)
1323 {
1324         int same = 0;
1325         __be32 addr = 0;
1326
1327         for_ifa(in_dev) {
1328                 if (!addr &&
1329                     (local == ifa->ifa_local || !local) &&
1330                     ifa->ifa_scope <= scope) {
1331                         addr = ifa->ifa_local;
1332                         if (same)
1333                                 break;
1334                 }
1335                 if (!same) {
1336                         same = (!local || inet_ifa_match(local, ifa)) &&
1337                                 (!dst || inet_ifa_match(dst, ifa));
1338                         if (same && addr) {
1339                                 if (local || !dst)
1340                                         break;
1341                                 /* Is the selected addr into dst subnet? */
1342                                 if (inet_ifa_match(addr, ifa))
1343                                         break;
1344                                 /* No, then can we use new local src? */
1345                                 if (ifa->ifa_scope <= scope) {
1346                                         addr = ifa->ifa_local;
1347                                         break;
1348                                 }
1349                                 /* search for large dst subnet for addr */
1350                                 same = 0;
1351                         }
1352                 }
1353         } endfor_ifa(in_dev);
1354
1355         return same ? addr : 0;
1356 }
1357
1358 /*
1359  * Confirm that local IP address exists using wildcards:
1360  * - net: netns to check, cannot be NULL
1361  * - in_dev: only on this interface, NULL=any interface
1362  * - dst: only in the same subnet as dst, 0=any dst
1363  * - local: address, 0=autoselect the local address
1364  * - scope: maximum allowed scope value for the local address
1365  */
1366 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1367                          __be32 dst, __be32 local, int scope)
1368 {
1369         __be32 addr = 0;
1370         struct net_device *dev;
1371
1372         if (in_dev)
1373                 return confirm_addr_indev(in_dev, dst, local, scope);
1374
1375         rcu_read_lock();
1376         for_each_netdev_rcu(net, dev) {
1377                 in_dev = __in_dev_get_rcu(dev);
1378                 if (in_dev) {
1379                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1380                         if (addr)
1381                                 break;
1382                 }
1383         }
1384         rcu_read_unlock();
1385
1386         return addr;
1387 }
1388 EXPORT_SYMBOL(inet_confirm_addr);
1389
1390 /*
1391  *      Device notifier
1392  */
1393
1394 int register_inetaddr_notifier(struct notifier_block *nb)
1395 {
1396         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1397 }
1398 EXPORT_SYMBOL(register_inetaddr_notifier);
1399
1400 int unregister_inetaddr_notifier(struct notifier_block *nb)
1401 {
1402         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1403 }
1404 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1405
1406 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1407 {
1408         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1409 }
1410 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1411
1412 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1413 {
1414         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1415             nb);
1416 }
1417 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1418
1419 /* Rename ifa_labels for a device name change. Make some effort to preserve
1420  * existing alias numbering and to create unique labels if possible.
1421 */
1422 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1423 {
1424         struct in_ifaddr *ifa;
1425         int named = 0;
1426
1427         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1428                 char old[IFNAMSIZ], *dot;
1429
1430                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1431                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1432                 if (named++ == 0)
1433                         goto skip;
1434                 dot = strchr(old, ':');
1435                 if (!dot) {
1436                         sprintf(old, ":%d", named);
1437                         dot = old;
1438                 }
1439                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1440                         strcat(ifa->ifa_label, dot);
1441                 else
1442                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1443 skip:
1444                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1445         }
1446 }
1447
1448 static bool inetdev_valid_mtu(unsigned int mtu)
1449 {
1450         return mtu >= IPV4_MIN_MTU;
1451 }
1452
1453 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1454                                         struct in_device *in_dev)
1455
1456 {
1457         struct in_ifaddr *ifa;
1458
1459         for (ifa = in_dev->ifa_list; ifa;
1460              ifa = ifa->ifa_next) {
1461                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1462                          ifa->ifa_local, dev,
1463                          ifa->ifa_local, NULL,
1464                          dev->dev_addr, NULL);
1465         }
1466 }
1467
1468 /* Called only under RTNL semaphore */
1469
1470 static int inetdev_event(struct notifier_block *this, unsigned long event,
1471                          void *ptr)
1472 {
1473         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1474         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1475
1476         ASSERT_RTNL();
1477
1478         if (!in_dev) {
1479                 if (event == NETDEV_REGISTER) {
1480                         in_dev = inetdev_init(dev);
1481                         if (IS_ERR(in_dev))
1482                                 return notifier_from_errno(PTR_ERR(in_dev));
1483                         if (dev->flags & IFF_LOOPBACK) {
1484                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1485                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1486                         }
1487                 } else if (event == NETDEV_CHANGEMTU) {
1488                         /* Re-enabling IP */
1489                         if (inetdev_valid_mtu(dev->mtu))
1490                                 in_dev = inetdev_init(dev);
1491                 }
1492                 goto out;
1493         }
1494
1495         switch (event) {
1496         case NETDEV_REGISTER:
1497                 pr_debug("%s: bug\n", __func__);
1498                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1499                 break;
1500         case NETDEV_UP:
1501                 if (!inetdev_valid_mtu(dev->mtu))
1502                         break;
1503                 if (dev->flags & IFF_LOOPBACK) {
1504                         struct in_ifaddr *ifa = inet_alloc_ifa();
1505
1506                         if (ifa) {
1507                                 INIT_HLIST_NODE(&ifa->hash);
1508                                 ifa->ifa_local =
1509                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1510                                 ifa->ifa_prefixlen = 8;
1511                                 ifa->ifa_mask = inet_make_mask(8);
1512                                 in_dev_hold(in_dev);
1513                                 ifa->ifa_dev = in_dev;
1514                                 ifa->ifa_scope = RT_SCOPE_HOST;
1515                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1516                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1517                                                  INFINITY_LIFE_TIME);
1518                                 ipv4_devconf_setall(in_dev);
1519                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1520                                 inet_insert_ifa(ifa);
1521                         }
1522                 }
1523                 ip_mc_up(in_dev);
1524                 /* fall through */
1525         case NETDEV_CHANGEADDR:
1526                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1527                         break;
1528                 /* fall through */
1529         case NETDEV_NOTIFY_PEERS:
1530                 /* Send gratuitous ARP to notify of link change */
1531                 inetdev_send_gratuitous_arp(dev, in_dev);
1532                 break;
1533         case NETDEV_DOWN:
1534                 ip_mc_down(in_dev);
1535                 break;
1536         case NETDEV_PRE_TYPE_CHANGE:
1537                 ip_mc_unmap(in_dev);
1538                 break;
1539         case NETDEV_POST_TYPE_CHANGE:
1540                 ip_mc_remap(in_dev);
1541                 break;
1542         case NETDEV_CHANGEMTU:
1543                 if (inetdev_valid_mtu(dev->mtu))
1544                         break;
1545                 /* disable IP when MTU is not enough */
1546                 /* fall through */
1547         case NETDEV_UNREGISTER:
1548                 inetdev_destroy(in_dev);
1549                 break;
1550         case NETDEV_CHANGENAME:
1551                 /* Do not notify about label change, this event is
1552                  * not interesting to applications using netlink.
1553                  */
1554                 inetdev_changename(dev, in_dev);
1555
1556                 devinet_sysctl_unregister(in_dev);
1557                 devinet_sysctl_register(in_dev);
1558                 break;
1559         }
1560 out:
1561         return NOTIFY_DONE;
1562 }
1563
1564 static struct notifier_block ip_netdev_notifier = {
1565         .notifier_call = inetdev_event,
1566 };
1567
1568 static size_t inet_nlmsg_size(void)
1569 {
1570         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1571                + nla_total_size(4) /* IFA_ADDRESS */
1572                + nla_total_size(4) /* IFA_LOCAL */
1573                + nla_total_size(4) /* IFA_BROADCAST */
1574                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1575                + nla_total_size(4)  /* IFA_FLAGS */
1576                + nla_total_size(4)  /* IFA_RT_PRIORITY */
1577                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1578 }
1579
1580 static inline u32 cstamp_delta(unsigned long cstamp)
1581 {
1582         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1583 }
1584
1585 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1586                          unsigned long tstamp, u32 preferred, u32 valid)
1587 {
1588         struct ifa_cacheinfo ci;
1589
1590         ci.cstamp = cstamp_delta(cstamp);
1591         ci.tstamp = cstamp_delta(tstamp);
1592         ci.ifa_prefered = preferred;
1593         ci.ifa_valid = valid;
1594
1595         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1596 }
1597
1598 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1599                             struct inet_fill_args *args)
1600 {
1601         struct ifaddrmsg *ifm;
1602         struct nlmsghdr  *nlh;
1603         u32 preferred, valid;
1604
1605         nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1606                         args->flags);
1607         if (!nlh)
1608                 return -EMSGSIZE;
1609
1610         ifm = nlmsg_data(nlh);
1611         ifm->ifa_family = AF_INET;
1612         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1613         ifm->ifa_flags = ifa->ifa_flags;
1614         ifm->ifa_scope = ifa->ifa_scope;
1615         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1616
1617         if (args->netnsid >= 0 &&
1618             nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1619                 goto nla_put_failure;
1620
1621         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1622                 preferred = ifa->ifa_preferred_lft;
1623                 valid = ifa->ifa_valid_lft;
1624                 if (preferred != INFINITY_LIFE_TIME) {
1625                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1626
1627                         if (preferred > tval)
1628                                 preferred -= tval;
1629                         else
1630                                 preferred = 0;
1631                         if (valid != INFINITY_LIFE_TIME) {
1632                                 if (valid > tval)
1633                                         valid -= tval;
1634                                 else
1635                                         valid = 0;
1636                         }
1637                 }
1638         } else {
1639                 preferred = INFINITY_LIFE_TIME;
1640                 valid = INFINITY_LIFE_TIME;
1641         }
1642         if ((ifa->ifa_address &&
1643              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1644             (ifa->ifa_local &&
1645              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1646             (ifa->ifa_broadcast &&
1647              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1648             (ifa->ifa_label[0] &&
1649              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1650             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1651             (ifa->ifa_rt_priority &&
1652              nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1653             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1654                           preferred, valid))
1655                 goto nla_put_failure;
1656
1657         nlmsg_end(skb, nlh);
1658         return 0;
1659
1660 nla_put_failure:
1661         nlmsg_cancel(skb, nlh);
1662         return -EMSGSIZE;
1663 }
1664
1665 static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1666                                       struct inet_fill_args *fillargs,
1667                                       struct net **tgt_net, struct sock *sk,
1668                                       struct netlink_callback *cb)
1669 {
1670         struct netlink_ext_ack *extack = cb->extack;
1671         struct nlattr *tb[IFA_MAX+1];
1672         struct ifaddrmsg *ifm;
1673         int err, i;
1674
1675         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1676                 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1677                 return -EINVAL;
1678         }
1679
1680         ifm = nlmsg_data(nlh);
1681         if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1682                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1683                 return -EINVAL;
1684         }
1685
1686         fillargs->ifindex = ifm->ifa_index;
1687         if (fillargs->ifindex) {
1688                 cb->answer_flags |= NLM_F_DUMP_FILTERED;
1689                 fillargs->flags |= NLM_F_DUMP_FILTERED;
1690         }
1691
1692         err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1693                                  ifa_ipv4_policy, extack);
1694         if (err < 0)
1695                 return err;
1696
1697         for (i = 0; i <= IFA_MAX; ++i) {
1698                 if (!tb[i])
1699                         continue;
1700
1701                 if (i == IFA_TARGET_NETNSID) {
1702                         struct net *net;
1703
1704                         fillargs->netnsid = nla_get_s32(tb[i]);
1705
1706                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1707                         if (IS_ERR(net)) {
1708                                 fillargs->netnsid = -1;
1709                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1710                                 return PTR_ERR(net);
1711                         }
1712                         *tgt_net = net;
1713                 } else {
1714                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1715                         return -EINVAL;
1716                 }
1717         }
1718
1719         return 0;
1720 }
1721
1722 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1723                             struct netlink_callback *cb, int s_ip_idx,
1724                             struct inet_fill_args *fillargs)
1725 {
1726         struct in_ifaddr *ifa;
1727         int ip_idx = 0;
1728         int err;
1729
1730         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next, ip_idx++) {
1731                 if (ip_idx < s_ip_idx)
1732                         continue;
1733
1734                 err = inet_fill_ifaddr(skb, ifa, fillargs);
1735                 if (err < 0)
1736                         goto done;
1737
1738                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1739         }
1740         err = 0;
1741
1742 done:
1743         cb->args[2] = ip_idx;
1744
1745         return err;
1746 }
1747
1748 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1749 {
1750         const struct nlmsghdr *nlh = cb->nlh;
1751         struct inet_fill_args fillargs = {
1752                 .portid = NETLINK_CB(cb->skb).portid,
1753                 .seq = nlh->nlmsg_seq,
1754                 .event = RTM_NEWADDR,
1755                 .flags = NLM_F_MULTI,
1756                 .netnsid = -1,
1757         };
1758         struct net *net = sock_net(skb->sk);
1759         struct net *tgt_net = net;
1760         int h, s_h;
1761         int idx, s_idx;
1762         int s_ip_idx;
1763         struct net_device *dev;
1764         struct in_device *in_dev;
1765         struct hlist_head *head;
1766         int err = 0;
1767
1768         s_h = cb->args[0];
1769         s_idx = idx = cb->args[1];
1770         s_ip_idx = cb->args[2];
1771
1772         if (cb->strict_check) {
1773                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1774                                                  skb->sk, cb);
1775                 if (err < 0)
1776                         goto put_tgt_net;
1777
1778                 err = 0;
1779                 if (fillargs.ifindex) {
1780                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1781                         if (!dev) {
1782                                 err = -ENODEV;
1783                                 goto put_tgt_net;
1784                         }
1785
1786                         in_dev = __in_dev_get_rtnl(dev);
1787                         if (in_dev) {
1788                                 err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1789                                                        &fillargs);
1790                         }
1791                         goto put_tgt_net;
1792                 }
1793         }
1794
1795         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1796                 idx = 0;
1797                 head = &tgt_net->dev_index_head[h];
1798                 rcu_read_lock();
1799                 cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1800                           tgt_net->dev_base_seq;
1801                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1802                         if (idx < s_idx)
1803                                 goto cont;
1804                         if (h > s_h || idx > s_idx)
1805                                 s_ip_idx = 0;
1806                         in_dev = __in_dev_get_rcu(dev);
1807                         if (!in_dev)
1808                                 goto cont;
1809
1810                         err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1811                                                &fillargs);
1812                         if (err < 0) {
1813                                 rcu_read_unlock();
1814                                 goto done;
1815                         }
1816 cont:
1817                         idx++;
1818                 }
1819                 rcu_read_unlock();
1820         }
1821
1822 done:
1823         cb->args[0] = h;
1824         cb->args[1] = idx;
1825 put_tgt_net:
1826         if (fillargs.netnsid >= 0)
1827                 put_net(tgt_net);
1828
1829         return skb->len ? : err;
1830 }
1831
1832 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1833                       u32 portid)
1834 {
1835         struct inet_fill_args fillargs = {
1836                 .portid = portid,
1837                 .seq = nlh ? nlh->nlmsg_seq : 0,
1838                 .event = event,
1839                 .flags = 0,
1840                 .netnsid = -1,
1841         };
1842         struct sk_buff *skb;
1843         int err = -ENOBUFS;
1844         struct net *net;
1845
1846         net = dev_net(ifa->ifa_dev->dev);
1847         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1848         if (!skb)
1849                 goto errout;
1850
1851         err = inet_fill_ifaddr(skb, ifa, &fillargs);
1852         if (err < 0) {
1853                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1854                 WARN_ON(err == -EMSGSIZE);
1855                 kfree_skb(skb);
1856                 goto errout;
1857         }
1858         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1859         return;
1860 errout:
1861         if (err < 0)
1862                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1863 }
1864
1865 static size_t inet_get_link_af_size(const struct net_device *dev,
1866                                     u32 ext_filter_mask)
1867 {
1868         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1869
1870         if (!in_dev)
1871                 return 0;
1872
1873         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1874 }
1875
1876 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1877                              u32 ext_filter_mask)
1878 {
1879         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1880         struct nlattr *nla;
1881         int i;
1882
1883         if (!in_dev)
1884                 return -ENODATA;
1885
1886         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1887         if (!nla)
1888                 return -EMSGSIZE;
1889
1890         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1891                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1892
1893         return 0;
1894 }
1895
1896 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1897         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1898 };
1899
1900 static int inet_validate_link_af(const struct net_device *dev,
1901                                  const struct nlattr *nla)
1902 {
1903         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1904         int err, rem;
1905
1906         if (dev && !__in_dev_get_rcu(dev))
1907                 return -EAFNOSUPPORT;
1908
1909         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1910         if (err < 0)
1911                 return err;
1912
1913         if (tb[IFLA_INET_CONF]) {
1914                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1915                         int cfgid = nla_type(a);
1916
1917                         if (nla_len(a) < 4)
1918                                 return -EINVAL;
1919
1920                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1921                                 return -EINVAL;
1922                 }
1923         }
1924
1925         return 0;
1926 }
1927
1928 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1929 {
1930         struct in_device *in_dev = __in_dev_get_rcu(dev);
1931         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1932         int rem;
1933
1934         if (!in_dev)
1935                 return -EAFNOSUPPORT;
1936
1937         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1938                 BUG();
1939
1940         if (tb[IFLA_INET_CONF]) {
1941                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1942                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1943         }
1944
1945         return 0;
1946 }
1947
1948 static int inet_netconf_msgsize_devconf(int type)
1949 {
1950         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1951                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1952         bool all = false;
1953
1954         if (type == NETCONFA_ALL)
1955                 all = true;
1956
1957         if (all || type == NETCONFA_FORWARDING)
1958                 size += nla_total_size(4);
1959         if (all || type == NETCONFA_RP_FILTER)
1960                 size += nla_total_size(4);
1961         if (all || type == NETCONFA_MC_FORWARDING)
1962                 size += nla_total_size(4);
1963         if (all || type == NETCONFA_BC_FORWARDING)
1964                 size += nla_total_size(4);
1965         if (all || type == NETCONFA_PROXY_NEIGH)
1966                 size += nla_total_size(4);
1967         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1968                 size += nla_total_size(4);
1969
1970         return size;
1971 }
1972
1973 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1974                                      struct ipv4_devconf *devconf, u32 portid,
1975                                      u32 seq, int event, unsigned int flags,
1976                                      int type)
1977 {
1978         struct nlmsghdr  *nlh;
1979         struct netconfmsg *ncm;
1980         bool all = false;
1981
1982         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1983                         flags);
1984         if (!nlh)
1985                 return -EMSGSIZE;
1986
1987         if (type == NETCONFA_ALL)
1988                 all = true;
1989
1990         ncm = nlmsg_data(nlh);
1991         ncm->ncm_family = AF_INET;
1992
1993         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1994                 goto nla_put_failure;
1995
1996         if (!devconf)
1997                 goto out;
1998
1999         if ((all || type == NETCONFA_FORWARDING) &&
2000             nla_put_s32(skb, NETCONFA_FORWARDING,
2001                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2002                 goto nla_put_failure;
2003         if ((all || type == NETCONFA_RP_FILTER) &&
2004             nla_put_s32(skb, NETCONFA_RP_FILTER,
2005                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2006                 goto nla_put_failure;
2007         if ((all || type == NETCONFA_MC_FORWARDING) &&
2008             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2009                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2010                 goto nla_put_failure;
2011         if ((all || type == NETCONFA_BC_FORWARDING) &&
2012             nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2013                         IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2014                 goto nla_put_failure;
2015         if ((all || type == NETCONFA_PROXY_NEIGH) &&
2016             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2017                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2018                 goto nla_put_failure;
2019         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2020             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2021                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2022                 goto nla_put_failure;
2023
2024 out:
2025         nlmsg_end(skb, nlh);
2026         return 0;
2027
2028 nla_put_failure:
2029         nlmsg_cancel(skb, nlh);
2030         return -EMSGSIZE;
2031 }
2032
2033 void inet_netconf_notify_devconf(struct net *net, int event, int type,
2034                                  int ifindex, struct ipv4_devconf *devconf)
2035 {
2036         struct sk_buff *skb;
2037         int err = -ENOBUFS;
2038
2039         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2040         if (!skb)
2041                 goto errout;
2042
2043         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2044                                         event, 0, type);
2045         if (err < 0) {
2046                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2047                 WARN_ON(err == -EMSGSIZE);
2048                 kfree_skb(skb);
2049                 goto errout;
2050         }
2051         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2052         return;
2053 errout:
2054         if (err < 0)
2055                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2056 }
2057
2058 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2059         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
2060         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
2061         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
2062         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
2063         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
2064 };
2065
2066 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2067                                     struct nlmsghdr *nlh,
2068                                     struct netlink_ext_ack *extack)
2069 {
2070         struct net *net = sock_net(in_skb->sk);
2071         struct nlattr *tb[NETCONFA_MAX+1];
2072         struct netconfmsg *ncm;
2073         struct sk_buff *skb;
2074         struct ipv4_devconf *devconf;
2075         struct in_device *in_dev;
2076         struct net_device *dev;
2077         int ifindex;
2078         int err;
2079
2080         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
2081                           devconf_ipv4_policy, extack);
2082         if (err < 0)
2083                 goto errout;
2084
2085         err = -EINVAL;
2086         if (!tb[NETCONFA_IFINDEX])
2087                 goto errout;
2088
2089         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2090         switch (ifindex) {
2091         case NETCONFA_IFINDEX_ALL:
2092                 devconf = net->ipv4.devconf_all;
2093                 break;
2094         case NETCONFA_IFINDEX_DEFAULT:
2095                 devconf = net->ipv4.devconf_dflt;
2096                 break;
2097         default:
2098                 dev = __dev_get_by_index(net, ifindex);
2099                 if (!dev)
2100                         goto errout;
2101                 in_dev = __in_dev_get_rtnl(dev);
2102                 if (!in_dev)
2103                         goto errout;
2104                 devconf = &in_dev->cnf;
2105                 break;
2106         }
2107
2108         err = -ENOBUFS;
2109         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2110         if (!skb)
2111                 goto errout;
2112
2113         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2114                                         NETLINK_CB(in_skb).portid,
2115                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2116                                         NETCONFA_ALL);
2117         if (err < 0) {
2118                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2119                 WARN_ON(err == -EMSGSIZE);
2120                 kfree_skb(skb);
2121                 goto errout;
2122         }
2123         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2124 errout:
2125         return err;
2126 }
2127
2128 static int inet_netconf_dump_devconf(struct sk_buff *skb,
2129                                      struct netlink_callback *cb)
2130 {
2131         const struct nlmsghdr *nlh = cb->nlh;
2132         struct net *net = sock_net(skb->sk);
2133         int h, s_h;
2134         int idx, s_idx;
2135         struct net_device *dev;
2136         struct in_device *in_dev;
2137         struct hlist_head *head;
2138
2139         if (cb->strict_check) {
2140                 struct netlink_ext_ack *extack = cb->extack;
2141                 struct netconfmsg *ncm;
2142
2143                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2144                         NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2145                         return -EINVAL;
2146                 }
2147
2148                 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2149                         NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2150                         return -EINVAL;
2151                 }
2152         }
2153
2154         s_h = cb->args[0];
2155         s_idx = idx = cb->args[1];
2156
2157         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2158                 idx = 0;
2159                 head = &net->dev_index_head[h];
2160                 rcu_read_lock();
2161                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2162                           net->dev_base_seq;
2163                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
2164                         if (idx < s_idx)
2165                                 goto cont;
2166                         in_dev = __in_dev_get_rcu(dev);
2167                         if (!in_dev)
2168                                 goto cont;
2169
2170                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
2171                                                       &in_dev->cnf,
2172                                                       NETLINK_CB(cb->skb).portid,
2173                                                       nlh->nlmsg_seq,
2174                                                       RTM_NEWNETCONF,
2175                                                       NLM_F_MULTI,
2176                                                       NETCONFA_ALL) < 0) {
2177                                 rcu_read_unlock();
2178                                 goto done;
2179                         }
2180                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2181 cont:
2182                         idx++;
2183                 }
2184                 rcu_read_unlock();
2185         }
2186         if (h == NETDEV_HASHENTRIES) {
2187                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2188                                               net->ipv4.devconf_all,
2189                                               NETLINK_CB(cb->skb).portid,
2190                                               nlh->nlmsg_seq,
2191                                               RTM_NEWNETCONF, NLM_F_MULTI,
2192                                               NETCONFA_ALL) < 0)
2193                         goto done;
2194                 else
2195                         h++;
2196         }
2197         if (h == NETDEV_HASHENTRIES + 1) {
2198                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2199                                               net->ipv4.devconf_dflt,
2200                                               NETLINK_CB(cb->skb).portid,
2201                                               nlh->nlmsg_seq,
2202                                               RTM_NEWNETCONF, NLM_F_MULTI,
2203                                               NETCONFA_ALL) < 0)
2204                         goto done;
2205                 else
2206                         h++;
2207         }
2208 done:
2209         cb->args[0] = h;
2210         cb->args[1] = idx;
2211
2212         return skb->len;
2213 }
2214
2215 #ifdef CONFIG_SYSCTL
2216
2217 static void devinet_copy_dflt_conf(struct net *net, int i)
2218 {
2219         struct net_device *dev;
2220
2221         rcu_read_lock();
2222         for_each_netdev_rcu(net, dev) {
2223                 struct in_device *in_dev;
2224
2225                 in_dev = __in_dev_get_rcu(dev);
2226                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2227                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2228         }
2229         rcu_read_unlock();
2230 }
2231
2232 /* called with RTNL locked */
2233 static void inet_forward_change(struct net *net)
2234 {
2235         struct net_device *dev;
2236         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2237
2238         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2239         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2240         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2241                                     NETCONFA_FORWARDING,
2242                                     NETCONFA_IFINDEX_ALL,
2243                                     net->ipv4.devconf_all);
2244         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2245                                     NETCONFA_FORWARDING,
2246                                     NETCONFA_IFINDEX_DEFAULT,
2247                                     net->ipv4.devconf_dflt);
2248
2249         for_each_netdev(net, dev) {
2250                 struct in_device *in_dev;
2251
2252                 if (on)
2253                         dev_disable_lro(dev);
2254
2255                 in_dev = __in_dev_get_rtnl(dev);
2256                 if (in_dev) {
2257                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2258                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2259                                                     NETCONFA_FORWARDING,
2260                                                     dev->ifindex, &in_dev->cnf);
2261                 }
2262         }
2263 }
2264
2265 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2266 {
2267         if (cnf == net->ipv4.devconf_dflt)
2268                 return NETCONFA_IFINDEX_DEFAULT;
2269         else if (cnf == net->ipv4.devconf_all)
2270                 return NETCONFA_IFINDEX_ALL;
2271         else {
2272                 struct in_device *idev
2273                         = container_of(cnf, struct in_device, cnf);
2274                 return idev->dev->ifindex;
2275         }
2276 }
2277
2278 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2279                              void __user *buffer,
2280                              size_t *lenp, loff_t *ppos)
2281 {
2282         int old_value = *(int *)ctl->data;
2283         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2284         int new_value = *(int *)ctl->data;
2285
2286         if (write) {
2287                 struct ipv4_devconf *cnf = ctl->extra1;
2288                 struct net *net = ctl->extra2;
2289                 int i = (int *)ctl->data - cnf->data;
2290                 int ifindex;
2291
2292                 set_bit(i, cnf->state);
2293
2294                 if (cnf == net->ipv4.devconf_dflt)
2295                         devinet_copy_dflt_conf(net, i);
2296                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2297                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2298                         if ((new_value == 0) && (old_value != 0))
2299                                 rt_cache_flush(net);
2300
2301                 if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2302                     new_value != old_value)
2303                         rt_cache_flush(net);
2304
2305                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2306                     new_value != old_value) {
2307                         ifindex = devinet_conf_ifindex(net, cnf);
2308                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2309                                                     NETCONFA_RP_FILTER,
2310                                                     ifindex, cnf);
2311                 }
2312                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2313                     new_value != old_value) {
2314                         ifindex = devinet_conf_ifindex(net, cnf);
2315                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2316                                                     NETCONFA_PROXY_NEIGH,
2317                                                     ifindex, cnf);
2318                 }
2319                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2320                     new_value != old_value) {
2321                         ifindex = devinet_conf_ifindex(net, cnf);
2322                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2323                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2324                                                     ifindex, cnf);
2325                 }
2326         }
2327
2328         return ret;
2329 }
2330
2331 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2332                                   void __user *buffer,
2333                                   size_t *lenp, loff_t *ppos)
2334 {
2335         int *valp = ctl->data;
2336         int val = *valp;
2337         loff_t pos = *ppos;
2338         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2339
2340         if (write && *valp != val) {
2341                 struct net *net = ctl->extra2;
2342
2343                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2344                         if (!rtnl_trylock()) {
2345                                 /* Restore the original values before restarting */
2346                                 *valp = val;
2347                                 *ppos = pos;
2348                                 return restart_syscall();
2349                         }
2350                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2351                                 inet_forward_change(net);
2352                         } else {
2353                                 struct ipv4_devconf *cnf = ctl->extra1;
2354                                 struct in_device *idev =
2355                                         container_of(cnf, struct in_device, cnf);
2356                                 if (*valp)
2357                                         dev_disable_lro(idev->dev);
2358                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2359                                                             NETCONFA_FORWARDING,
2360                                                             idev->dev->ifindex,
2361                                                             cnf);
2362                         }
2363                         rtnl_unlock();
2364                         rt_cache_flush(net);
2365                 } else
2366                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2367                                                     NETCONFA_FORWARDING,
2368                                                     NETCONFA_IFINDEX_DEFAULT,
2369                                                     net->ipv4.devconf_dflt);
2370         }
2371
2372         return ret;
2373 }
2374
2375 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2376                                 void __user *buffer,
2377                                 size_t *lenp, loff_t *ppos)
2378 {
2379         int *valp = ctl->data;
2380         int val = *valp;
2381         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2382         struct net *net = ctl->extra2;
2383
2384         if (write && *valp != val)
2385                 rt_cache_flush(net);
2386
2387         return ret;
2388 }
2389
2390 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2391         { \
2392                 .procname       = name, \
2393                 .data           = ipv4_devconf.data + \
2394                                   IPV4_DEVCONF_ ## attr - 1, \
2395                 .maxlen         = sizeof(int), \
2396                 .mode           = mval, \
2397                 .proc_handler   = proc, \
2398                 .extra1         = &ipv4_devconf, \
2399         }
2400
2401 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2402         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2403
2404 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2405         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2406
2407 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2408         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2409
2410 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2411         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2412
2413 static struct devinet_sysctl_table {
2414         struct ctl_table_header *sysctl_header;
2415         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2416 } devinet_sysctl = {
2417         .devinet_vars = {
2418                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2419                                              devinet_sysctl_forward),
2420                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2421                 DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2422
2423                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2424                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2425                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2426                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2427                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2428                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2429                                         "accept_source_route"),
2430                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2431                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2432                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2433                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2434                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2435                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2436                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2437                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2438                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2439                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2440                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2441                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2442                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2443                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2444                                         "force_igmp_version"),
2445                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2446                                         "igmpv2_unsolicited_report_interval"),
2447                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2448                                         "igmpv3_unsolicited_report_interval"),
2449                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2450                                         "ignore_routes_with_linkdown"),
2451                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2452                                         "drop_gratuitous_arp"),
2453
2454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2455                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2456                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2457                                               "promote_secondaries"),
2458                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2459                                               "route_localnet"),
2460                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2461                                               "drop_unicast_in_l2_multicast"),
2462         },
2463 };
2464
2465 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2466                                      int ifindex, struct ipv4_devconf *p)
2467 {
2468         int i;
2469         struct devinet_sysctl_table *t;
2470         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2471
2472         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2473         if (!t)
2474                 goto out;
2475
2476         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2477                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2478                 t->devinet_vars[i].extra1 = p;
2479                 t->devinet_vars[i].extra2 = net;
2480         }
2481
2482         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2483
2484         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2485         if (!t->sysctl_header)
2486                 goto free;
2487
2488         p->sysctl = t;
2489
2490         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2491                                     ifindex, p);
2492         return 0;
2493
2494 free:
2495         kfree(t);
2496 out:
2497         return -ENOBUFS;
2498 }
2499
2500 static void __devinet_sysctl_unregister(struct net *net,
2501                                         struct ipv4_devconf *cnf, int ifindex)
2502 {
2503         struct devinet_sysctl_table *t = cnf->sysctl;
2504
2505         if (t) {
2506                 cnf->sysctl = NULL;
2507                 unregister_net_sysctl_table(t->sysctl_header);
2508                 kfree(t);
2509         }
2510
2511         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2512 }
2513
2514 static int devinet_sysctl_register(struct in_device *idev)
2515 {
2516         int err;
2517
2518         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2519                 return -EINVAL;
2520
2521         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2522         if (err)
2523                 return err;
2524         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2525                                         idev->dev->ifindex, &idev->cnf);
2526         if (err)
2527                 neigh_sysctl_unregister(idev->arp_parms);
2528         return err;
2529 }
2530
2531 static void devinet_sysctl_unregister(struct in_device *idev)
2532 {
2533         struct net *net = dev_net(idev->dev);
2534
2535         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2536         neigh_sysctl_unregister(idev->arp_parms);
2537 }
2538
2539 static struct ctl_table ctl_forward_entry[] = {
2540         {
2541                 .procname       = "ip_forward",
2542                 .data           = &ipv4_devconf.data[
2543                                         IPV4_DEVCONF_FORWARDING - 1],
2544                 .maxlen         = sizeof(int),
2545                 .mode           = 0644,
2546                 .proc_handler   = devinet_sysctl_forward,
2547                 .extra1         = &ipv4_devconf,
2548                 .extra2         = &init_net,
2549         },
2550         { },
2551 };
2552 #endif
2553
2554 static __net_init int devinet_init_net(struct net *net)
2555 {
2556         int err;
2557         struct ipv4_devconf *all, *dflt;
2558 #ifdef CONFIG_SYSCTL
2559         struct ctl_table *tbl = ctl_forward_entry;
2560         struct ctl_table_header *forw_hdr;
2561 #endif
2562
2563         err = -ENOMEM;
2564         all = &ipv4_devconf;
2565         dflt = &ipv4_devconf_dflt;
2566
2567         if (!net_eq(net, &init_net)) {
2568                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2569                 if (!all)
2570                         goto err_alloc_all;
2571
2572                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2573                 if (!dflt)
2574                         goto err_alloc_dflt;
2575
2576 #ifdef CONFIG_SYSCTL
2577                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2578                 if (!tbl)
2579                         goto err_alloc_ctl;
2580
2581                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2582                 tbl[0].extra1 = all;
2583                 tbl[0].extra2 = net;
2584 #endif
2585         }
2586
2587 #ifdef CONFIG_SYSCTL
2588         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2589         if (err < 0)
2590                 goto err_reg_all;
2591
2592         err = __devinet_sysctl_register(net, "default",
2593                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2594         if (err < 0)
2595                 goto err_reg_dflt;
2596
2597         err = -ENOMEM;
2598         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2599         if (!forw_hdr)
2600                 goto err_reg_ctl;
2601         net->ipv4.forw_hdr = forw_hdr;
2602 #endif
2603
2604         net->ipv4.devconf_all = all;
2605         net->ipv4.devconf_dflt = dflt;
2606         return 0;
2607
2608 #ifdef CONFIG_SYSCTL
2609 err_reg_ctl:
2610         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2611 err_reg_dflt:
2612         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2613 err_reg_all:
2614         if (tbl != ctl_forward_entry)
2615                 kfree(tbl);
2616 err_alloc_ctl:
2617 #endif
2618         if (dflt != &ipv4_devconf_dflt)
2619                 kfree(dflt);
2620 err_alloc_dflt:
2621         if (all != &ipv4_devconf)
2622                 kfree(all);
2623 err_alloc_all:
2624         return err;
2625 }
2626
2627 static __net_exit void devinet_exit_net(struct net *net)
2628 {
2629 #ifdef CONFIG_SYSCTL
2630         struct ctl_table *tbl;
2631
2632         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2633         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2634         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2635                                     NETCONFA_IFINDEX_DEFAULT);
2636         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2637                                     NETCONFA_IFINDEX_ALL);
2638         kfree(tbl);
2639 #endif
2640         kfree(net->ipv4.devconf_dflt);
2641         kfree(net->ipv4.devconf_all);
2642 }
2643
2644 static __net_initdata struct pernet_operations devinet_ops = {
2645         .init = devinet_init_net,
2646         .exit = devinet_exit_net,
2647 };
2648
2649 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2650         .family           = AF_INET,
2651         .fill_link_af     = inet_fill_link_af,
2652         .get_link_af_size = inet_get_link_af_size,
2653         .validate_link_af = inet_validate_link_af,
2654         .set_link_af      = inet_set_link_af,
2655 };
2656
2657 void __init devinet_init(void)
2658 {
2659         int i;
2660
2661         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2662                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2663
2664         register_pernet_subsys(&devinet_ops);
2665
2666         register_gifconf(PF_INET, inet_gifconf);
2667         register_netdevice_notifier(&ip_netdev_notifier);
2668
2669         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2670
2671         rtnl_af_register(&inet_af_ops);
2672
2673         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2674         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2675         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2676         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2677                       inet_netconf_dump_devconf, 0);
2678 }