tcp/dccp: fix other lockdep splats accessing ireq_opt
[sfrench/cifs-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <linux/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/sched/signal.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59 #include <linux/netconf.h>
60
61 #include <net/arp.h>
62 #include <net/ip.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67 #include <net/addrconf.h>
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
76                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
77         },
78 };
79
80 static struct ipv4_devconf ipv4_devconf_dflt = {
81         .data = {
82                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
86                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
88                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
89         },
90 };
91
92 #define IPV4_DEVCONF_DFLT(net, attr) \
93         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
94
95 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
96         [IFA_LOCAL]             = { .type = NLA_U32 },
97         [IFA_ADDRESS]           = { .type = NLA_U32 },
98         [IFA_BROADCAST]         = { .type = NLA_U32 },
99         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
100         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
101         [IFA_FLAGS]             = { .type = NLA_U32 },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108
109 static u32 inet_addr_hash(const struct net *net, __be32 addr)
110 {
111         u32 val = (__force u32) addr ^ net_hash_mix(net);
112
113         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
114 }
115
116 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
117 {
118         u32 hash = inet_addr_hash(net, ifa->ifa_local);
119
120         ASSERT_RTNL();
121         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126         ASSERT_RTNL();
127         hlist_del_init_rcu(&ifa->hash);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         u32 hash = inet_addr_hash(net, addr);
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
146                 if (ifa->ifa_local == addr) {
147                         struct net_device *dev = ifa->ifa_dev->dev;
148
149                         if (!net_eq(dev_net(dev), net))
150                                 continue;
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         refcount_set(&in_dev->refcnt, 1);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         if (in_dev->dead)
338                 goto no_promotions;
339
340         /* 1. Deleting primary ifaddr forces deletion all secondaries
341          * unless alias promotion is set
342          **/
343
344         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
345                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
346
347                 while ((ifa = *ifap1) != NULL) {
348                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
349                             ifa1->ifa_scope <= ifa->ifa_scope)
350                                 last_prim = ifa;
351
352                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
353                             ifa1->ifa_mask != ifa->ifa_mask ||
354                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
355                                 ifap1 = &ifa->ifa_next;
356                                 prev_prom = ifa;
357                                 continue;
358                         }
359
360                         if (!do_promote) {
361                                 inet_hash_remove(ifa);
362                                 *ifap1 = ifa->ifa_next;
363
364                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
365                                 blocking_notifier_call_chain(&inetaddr_chain,
366                                                 NETDEV_DOWN, ifa);
367                                 inet_free_ifa(ifa);
368                         } else {
369                                 promote = ifa;
370                                 break;
371                         }
372                 }
373         }
374
375         /* On promotion all secondaries from subnet are changing
376          * the primary IP, we must remove all their routes silently
377          * and later to add them back with new prefsrc. Do this
378          * while all addresses are on the device list.
379          */
380         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
381                 if (ifa1->ifa_mask == ifa->ifa_mask &&
382                     inet_ifa_match(ifa1->ifa_address, ifa))
383                         fib_del_ifaddr(ifa, ifa1);
384         }
385
386 no_promotions:
387         /* 2. Unlink it */
388
389         *ifap = ifa1->ifa_next;
390         inet_hash_remove(ifa1);
391
392         /* 3. Announce address deletion */
393
394         /* Send message first, then call notifier.
395            At first sight, FIB update triggered by notifier
396            will refer to already deleted ifaddr, that could confuse
397            netlink listeners. It is not true: look, gated sees
398            that route deleted and if it still thinks that ifaddr
399            is valid, it will try to restore deleted routes... Grr.
400            So that, this order is correct.
401          */
402         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
403         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
404
405         if (promote) {
406                 struct in_ifaddr *next_sec = promote->ifa_next;
407
408                 if (prev_prom) {
409                         prev_prom->ifa_next = promote->ifa_next;
410                         promote->ifa_next = last_prim->ifa_next;
411                         last_prim->ifa_next = promote;
412                 }
413
414                 promote->ifa_flags &= ~IFA_F_SECONDARY;
415                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
416                 blocking_notifier_call_chain(&inetaddr_chain,
417                                 NETDEV_UP, promote);
418                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
419                         if (ifa1->ifa_mask != ifa->ifa_mask ||
420                             !inet_ifa_match(ifa1->ifa_address, ifa))
421                                         continue;
422                         fib_add_ifaddr(ifa);
423                 }
424
425         }
426         if (destroy)
427                 inet_free_ifa(ifa1);
428 }
429
430 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
431                          int destroy)
432 {
433         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
434 }
435
436 static void check_lifetime(struct work_struct *work);
437
438 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
439
440 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
441                              u32 portid)
442 {
443         struct in_device *in_dev = ifa->ifa_dev;
444         struct in_ifaddr *ifa1, **ifap, **last_primary;
445         struct in_validator_info ivi;
446         int ret;
447
448         ASSERT_RTNL();
449
450         if (!ifa->ifa_local) {
451                 inet_free_ifa(ifa);
452                 return 0;
453         }
454
455         ifa->ifa_flags &= ~IFA_F_SECONDARY;
456         last_primary = &in_dev->ifa_list;
457
458         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
459              ifap = &ifa1->ifa_next) {
460                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
461                     ifa->ifa_scope <= ifa1->ifa_scope)
462                         last_primary = &ifa1->ifa_next;
463                 if (ifa1->ifa_mask == ifa->ifa_mask &&
464                     inet_ifa_match(ifa1->ifa_address, ifa)) {
465                         if (ifa1->ifa_local == ifa->ifa_local) {
466                                 inet_free_ifa(ifa);
467                                 return -EEXIST;
468                         }
469                         if (ifa1->ifa_scope != ifa->ifa_scope) {
470                                 inet_free_ifa(ifa);
471                                 return -EINVAL;
472                         }
473                         ifa->ifa_flags |= IFA_F_SECONDARY;
474                 }
475         }
476
477         /* Allow any devices that wish to register ifaddr validtors to weigh
478          * in now, before changes are committed.  The rntl lock is serializing
479          * access here, so the state should not change between a validator call
480          * and a final notify on commit.  This isn't invoked on promotion under
481          * the assumption that validators are checking the address itself, and
482          * not the flags.
483          */
484         ivi.ivi_addr = ifa->ifa_address;
485         ivi.ivi_dev = ifa->ifa_dev;
486         ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
487                                            NETDEV_UP, &ivi);
488         ret = notifier_to_errno(ret);
489         if (ret) {
490                 inet_free_ifa(ifa);
491                 return ret;
492         }
493
494         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
495                 prandom_seed((__force u32) ifa->ifa_local);
496                 ifap = last_primary;
497         }
498
499         ifa->ifa_next = *ifap;
500         *ifap = ifa;
501
502         inet_hash_insert(dev_net(in_dev->dev), ifa);
503
504         cancel_delayed_work(&check_lifetime_work);
505         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
506
507         /* Send message first, then call notifier.
508            Notifier will trigger FIB update, so that
509            listeners of netlink will know about new ifaddr */
510         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
511         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
512
513         return 0;
514 }
515
516 static int inet_insert_ifa(struct in_ifaddr *ifa)
517 {
518         return __inet_insert_ifa(ifa, NULL, 0);
519 }
520
521 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
522 {
523         struct in_device *in_dev = __in_dev_get_rtnl(dev);
524
525         ASSERT_RTNL();
526
527         if (!in_dev) {
528                 inet_free_ifa(ifa);
529                 return -ENOBUFS;
530         }
531         ipv4_devconf_setall(in_dev);
532         neigh_parms_data_state_setall(in_dev->arp_parms);
533         if (ifa->ifa_dev != in_dev) {
534                 WARN_ON(ifa->ifa_dev);
535                 in_dev_hold(in_dev);
536                 ifa->ifa_dev = in_dev;
537         }
538         if (ipv4_is_loopback(ifa->ifa_local))
539                 ifa->ifa_scope = RT_SCOPE_HOST;
540         return inet_insert_ifa(ifa);
541 }
542
543 /* Caller must hold RCU or RTNL :
544  * We dont take a reference on found in_device
545  */
546 struct in_device *inetdev_by_index(struct net *net, int ifindex)
547 {
548         struct net_device *dev;
549         struct in_device *in_dev = NULL;
550
551         rcu_read_lock();
552         dev = dev_get_by_index_rcu(net, ifindex);
553         if (dev)
554                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
555         rcu_read_unlock();
556         return in_dev;
557 }
558 EXPORT_SYMBOL(inetdev_by_index);
559
560 /* Called only from RTNL semaphored context. No locks. */
561
562 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
563                                     __be32 mask)
564 {
565         ASSERT_RTNL();
566
567         for_primary_ifa(in_dev) {
568                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
569                         return ifa;
570         } endfor_ifa(in_dev);
571         return NULL;
572 }
573
574 static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
575 {
576         struct ip_mreqn mreq = {
577                 .imr_multiaddr.s_addr = ifa->ifa_address,
578                 .imr_ifindex = ifa->ifa_dev->dev->ifindex,
579         };
580         int ret;
581
582         ASSERT_RTNL();
583
584         lock_sock(sk);
585         if (join)
586                 ret = ip_mc_join_group(sk, &mreq);
587         else
588                 ret = ip_mc_leave_group(sk, &mreq);
589         release_sock(sk);
590
591         return ret;
592 }
593
594 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
595                             struct netlink_ext_ack *extack)
596 {
597         struct net *net = sock_net(skb->sk);
598         struct nlattr *tb[IFA_MAX+1];
599         struct in_device *in_dev;
600         struct ifaddrmsg *ifm;
601         struct in_ifaddr *ifa, **ifap;
602         int err = -EINVAL;
603
604         ASSERT_RTNL();
605
606         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
607                           extack);
608         if (err < 0)
609                 goto errout;
610
611         ifm = nlmsg_data(nlh);
612         in_dev = inetdev_by_index(net, ifm->ifa_index);
613         if (!in_dev) {
614                 err = -ENODEV;
615                 goto errout;
616         }
617
618         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
619              ifap = &ifa->ifa_next) {
620                 if (tb[IFA_LOCAL] &&
621                     ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
622                         continue;
623
624                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
625                         continue;
626
627                 if (tb[IFA_ADDRESS] &&
628                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
629                     !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
630                         continue;
631
632                 if (ipv4_is_multicast(ifa->ifa_address))
633                         ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
634                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
635                 return 0;
636         }
637
638         err = -EADDRNOTAVAIL;
639 errout:
640         return err;
641 }
642
643 #define INFINITY_LIFE_TIME      0xFFFFFFFF
644
645 static void check_lifetime(struct work_struct *work)
646 {
647         unsigned long now, next, next_sec, next_sched;
648         struct in_ifaddr *ifa;
649         struct hlist_node *n;
650         int i;
651
652         now = jiffies;
653         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
654
655         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
656                 bool change_needed = false;
657
658                 rcu_read_lock();
659                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
660                         unsigned long age;
661
662                         if (ifa->ifa_flags & IFA_F_PERMANENT)
663                                 continue;
664
665                         /* We try to batch several events at once. */
666                         age = (now - ifa->ifa_tstamp +
667                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
668
669                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
670                             age >= ifa->ifa_valid_lft) {
671                                 change_needed = true;
672                         } else if (ifa->ifa_preferred_lft ==
673                                    INFINITY_LIFE_TIME) {
674                                 continue;
675                         } else if (age >= ifa->ifa_preferred_lft) {
676                                 if (time_before(ifa->ifa_tstamp +
677                                                 ifa->ifa_valid_lft * HZ, next))
678                                         next = ifa->ifa_tstamp +
679                                                ifa->ifa_valid_lft * HZ;
680
681                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
682                                         change_needed = true;
683                         } else if (time_before(ifa->ifa_tstamp +
684                                                ifa->ifa_preferred_lft * HZ,
685                                                next)) {
686                                 next = ifa->ifa_tstamp +
687                                        ifa->ifa_preferred_lft * HZ;
688                         }
689                 }
690                 rcu_read_unlock();
691                 if (!change_needed)
692                         continue;
693                 rtnl_lock();
694                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
695                         unsigned long age;
696
697                         if (ifa->ifa_flags & IFA_F_PERMANENT)
698                                 continue;
699
700                         /* We try to batch several events at once. */
701                         age = (now - ifa->ifa_tstamp +
702                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
703
704                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
705                             age >= ifa->ifa_valid_lft) {
706                                 struct in_ifaddr **ifap;
707
708                                 for (ifap = &ifa->ifa_dev->ifa_list;
709                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
710                                         if (*ifap == ifa) {
711                                                 inet_del_ifa(ifa->ifa_dev,
712                                                              ifap, 1);
713                                                 break;
714                                         }
715                                 }
716                         } else if (ifa->ifa_preferred_lft !=
717                                    INFINITY_LIFE_TIME &&
718                                    age >= ifa->ifa_preferred_lft &&
719                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
720                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
721                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
722                         }
723                 }
724                 rtnl_unlock();
725         }
726
727         next_sec = round_jiffies_up(next);
728         next_sched = next;
729
730         /* If rounded timeout is accurate enough, accept it. */
731         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
732                 next_sched = next_sec;
733
734         now = jiffies;
735         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
736         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
737                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
738
739         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
740                         next_sched - now);
741 }
742
743 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
744                              __u32 prefered_lft)
745 {
746         unsigned long timeout;
747
748         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
749
750         timeout = addrconf_timeout_fixup(valid_lft, HZ);
751         if (addrconf_finite_timeout(timeout))
752                 ifa->ifa_valid_lft = timeout;
753         else
754                 ifa->ifa_flags |= IFA_F_PERMANENT;
755
756         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
757         if (addrconf_finite_timeout(timeout)) {
758                 if (timeout == 0)
759                         ifa->ifa_flags |= IFA_F_DEPRECATED;
760                 ifa->ifa_preferred_lft = timeout;
761         }
762         ifa->ifa_tstamp = jiffies;
763         if (!ifa->ifa_cstamp)
764                 ifa->ifa_cstamp = ifa->ifa_tstamp;
765 }
766
767 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
768                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
769 {
770         struct nlattr *tb[IFA_MAX+1];
771         struct in_ifaddr *ifa;
772         struct ifaddrmsg *ifm;
773         struct net_device *dev;
774         struct in_device *in_dev;
775         int err;
776
777         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy,
778                           NULL);
779         if (err < 0)
780                 goto errout;
781
782         ifm = nlmsg_data(nlh);
783         err = -EINVAL;
784         if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
785                 goto errout;
786
787         dev = __dev_get_by_index(net, ifm->ifa_index);
788         err = -ENODEV;
789         if (!dev)
790                 goto errout;
791
792         in_dev = __in_dev_get_rtnl(dev);
793         err = -ENOBUFS;
794         if (!in_dev)
795                 goto errout;
796
797         ifa = inet_alloc_ifa();
798         if (!ifa)
799                 /*
800                  * A potential indev allocation can be left alive, it stays
801                  * assigned to its device and is destroy with it.
802                  */
803                 goto errout;
804
805         ipv4_devconf_setall(in_dev);
806         neigh_parms_data_state_setall(in_dev->arp_parms);
807         in_dev_hold(in_dev);
808
809         if (!tb[IFA_ADDRESS])
810                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
811
812         INIT_HLIST_NODE(&ifa->hash);
813         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
814         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
815         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
816                                          ifm->ifa_flags;
817         ifa->ifa_scope = ifm->ifa_scope;
818         ifa->ifa_dev = in_dev;
819
820         ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
821         ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
822
823         if (tb[IFA_BROADCAST])
824                 ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
825
826         if (tb[IFA_LABEL])
827                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
828         else
829                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
830
831         if (tb[IFA_CACHEINFO]) {
832                 struct ifa_cacheinfo *ci;
833
834                 ci = nla_data(tb[IFA_CACHEINFO]);
835                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
836                         err = -EINVAL;
837                         goto errout_free;
838                 }
839                 *pvalid_lft = ci->ifa_valid;
840                 *pprefered_lft = ci->ifa_prefered;
841         }
842
843         return ifa;
844
845 errout_free:
846         inet_free_ifa(ifa);
847 errout:
848         return ERR_PTR(err);
849 }
850
851 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
852 {
853         struct in_device *in_dev = ifa->ifa_dev;
854         struct in_ifaddr *ifa1, **ifap;
855
856         if (!ifa->ifa_local)
857                 return NULL;
858
859         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
860              ifap = &ifa1->ifa_next) {
861                 if (ifa1->ifa_mask == ifa->ifa_mask &&
862                     inet_ifa_match(ifa1->ifa_address, ifa) &&
863                     ifa1->ifa_local == ifa->ifa_local)
864                         return ifa1;
865         }
866         return NULL;
867 }
868
869 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
870                             struct netlink_ext_ack *extack)
871 {
872         struct net *net = sock_net(skb->sk);
873         struct in_ifaddr *ifa;
874         struct in_ifaddr *ifa_existing;
875         __u32 valid_lft = INFINITY_LIFE_TIME;
876         __u32 prefered_lft = INFINITY_LIFE_TIME;
877
878         ASSERT_RTNL();
879
880         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
881         if (IS_ERR(ifa))
882                 return PTR_ERR(ifa);
883
884         ifa_existing = find_matching_ifa(ifa);
885         if (!ifa_existing) {
886                 /* It would be best to check for !NLM_F_CREATE here but
887                  * userspace already relies on not having to provide this.
888                  */
889                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
890                 if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
891                         int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
892                                                true, ifa);
893
894                         if (ret < 0) {
895                                 inet_free_ifa(ifa);
896                                 return ret;
897                         }
898                 }
899                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
900         } else {
901                 inet_free_ifa(ifa);
902
903                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
904                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
905                         return -EEXIST;
906                 ifa = ifa_existing;
907                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
908                 cancel_delayed_work(&check_lifetime_work);
909                 queue_delayed_work(system_power_efficient_wq,
910                                 &check_lifetime_work, 0);
911                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
912         }
913         return 0;
914 }
915
916 /*
917  *      Determine a default network mask, based on the IP address.
918  */
919
920 static int inet_abc_len(__be32 addr)
921 {
922         int rc = -1;    /* Something else, probably a multicast. */
923
924         if (ipv4_is_zeronet(addr))
925                 rc = 0;
926         else {
927                 __u32 haddr = ntohl(addr);
928
929                 if (IN_CLASSA(haddr))
930                         rc = 8;
931                 else if (IN_CLASSB(haddr))
932                         rc = 16;
933                 else if (IN_CLASSC(haddr))
934                         rc = 24;
935         }
936
937         return rc;
938 }
939
940
941 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
942 {
943         struct ifreq ifr;
944         struct sockaddr_in sin_orig;
945         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
946         struct in_device *in_dev;
947         struct in_ifaddr **ifap = NULL;
948         struct in_ifaddr *ifa = NULL;
949         struct net_device *dev;
950         char *colon;
951         int ret = -EFAULT;
952         int tryaddrmatch = 0;
953
954         /*
955          *      Fetch the caller's info block into kernel space
956          */
957
958         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
959                 goto out;
960         ifr.ifr_name[IFNAMSIZ - 1] = 0;
961
962         /* save original address for comparison */
963         memcpy(&sin_orig, sin, sizeof(*sin));
964
965         colon = strchr(ifr.ifr_name, ':');
966         if (colon)
967                 *colon = 0;
968
969         dev_load(net, ifr.ifr_name);
970
971         switch (cmd) {
972         case SIOCGIFADDR:       /* Get interface address */
973         case SIOCGIFBRDADDR:    /* Get the broadcast address */
974         case SIOCGIFDSTADDR:    /* Get the destination address */
975         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
976                 /* Note that these ioctls will not sleep,
977                    so that we do not impose a lock.
978                    One day we will be forced to put shlock here (I mean SMP)
979                  */
980                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
981                 memset(sin, 0, sizeof(*sin));
982                 sin->sin_family = AF_INET;
983                 break;
984
985         case SIOCSIFFLAGS:
986                 ret = -EPERM;
987                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
988                         goto out;
989                 break;
990         case SIOCSIFADDR:       /* Set interface address (and family) */
991         case SIOCSIFBRDADDR:    /* Set the broadcast address */
992         case SIOCSIFDSTADDR:    /* Set the destination address */
993         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
994                 ret = -EPERM;
995                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
996                         goto out;
997                 ret = -EINVAL;
998                 if (sin->sin_family != AF_INET)
999                         goto out;
1000                 break;
1001         default:
1002                 ret = -EINVAL;
1003                 goto out;
1004         }
1005
1006         rtnl_lock();
1007
1008         ret = -ENODEV;
1009         dev = __dev_get_by_name(net, ifr.ifr_name);
1010         if (!dev)
1011                 goto done;
1012
1013         if (colon)
1014                 *colon = ':';
1015
1016         in_dev = __in_dev_get_rtnl(dev);
1017         if (in_dev) {
1018                 if (tryaddrmatch) {
1019                         /* Matthias Andree */
1020                         /* compare label and address (4.4BSD style) */
1021                         /* note: we only do this for a limited set of ioctls
1022                            and only if the original address family was AF_INET.
1023                            This is checked above. */
1024                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1025                              ifap = &ifa->ifa_next) {
1026                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
1027                                     sin_orig.sin_addr.s_addr ==
1028                                                         ifa->ifa_local) {
1029                                         break; /* found */
1030                                 }
1031                         }
1032                 }
1033                 /* we didn't get a match, maybe the application is
1034                    4.3BSD-style and passed in junk so we fall back to
1035                    comparing just the label */
1036                 if (!ifa) {
1037                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
1038                              ifap = &ifa->ifa_next)
1039                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
1040                                         break;
1041                 }
1042         }
1043
1044         ret = -EADDRNOTAVAIL;
1045         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1046                 goto done;
1047
1048         switch (cmd) {
1049         case SIOCGIFADDR:       /* Get interface address */
1050                 sin->sin_addr.s_addr = ifa->ifa_local;
1051                 goto rarok;
1052
1053         case SIOCGIFBRDADDR:    /* Get the broadcast address */
1054                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
1055                 goto rarok;
1056
1057         case SIOCGIFDSTADDR:    /* Get the destination address */
1058                 sin->sin_addr.s_addr = ifa->ifa_address;
1059                 goto rarok;
1060
1061         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1062                 sin->sin_addr.s_addr = ifa->ifa_mask;
1063                 goto rarok;
1064
1065         case SIOCSIFFLAGS:
1066                 if (colon) {
1067                         ret = -EADDRNOTAVAIL;
1068                         if (!ifa)
1069                                 break;
1070                         ret = 0;
1071                         if (!(ifr.ifr_flags & IFF_UP))
1072                                 inet_del_ifa(in_dev, ifap, 1);
1073                         break;
1074                 }
1075                 ret = dev_change_flags(dev, ifr.ifr_flags);
1076                 break;
1077
1078         case SIOCSIFADDR:       /* Set interface address (and family) */
1079                 ret = -EINVAL;
1080                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1081                         break;
1082
1083                 if (!ifa) {
1084                         ret = -ENOBUFS;
1085                         ifa = inet_alloc_ifa();
1086                         if (!ifa)
1087                                 break;
1088                         INIT_HLIST_NODE(&ifa->hash);
1089                         if (colon)
1090                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1091                         else
1092                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1093                 } else {
1094                         ret = 0;
1095                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1096                                 break;
1097                         inet_del_ifa(in_dev, ifap, 0);
1098                         ifa->ifa_broadcast = 0;
1099                         ifa->ifa_scope = 0;
1100                 }
1101
1102                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1103
1104                 if (!(dev->flags & IFF_POINTOPOINT)) {
1105                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1106                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1107                         if ((dev->flags & IFF_BROADCAST) &&
1108                             ifa->ifa_prefixlen < 31)
1109                                 ifa->ifa_broadcast = ifa->ifa_address |
1110                                                      ~ifa->ifa_mask;
1111                 } else {
1112                         ifa->ifa_prefixlen = 32;
1113                         ifa->ifa_mask = inet_make_mask(32);
1114                 }
1115                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1116                 ret = inet_set_ifa(dev, ifa);
1117                 break;
1118
1119         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1120                 ret = 0;
1121                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1122                         inet_del_ifa(in_dev, ifap, 0);
1123                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1124                         inet_insert_ifa(ifa);
1125                 }
1126                 break;
1127
1128         case SIOCSIFDSTADDR:    /* Set the destination address */
1129                 ret = 0;
1130                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1131                         break;
1132                 ret = -EINVAL;
1133                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1134                         break;
1135                 ret = 0;
1136                 inet_del_ifa(in_dev, ifap, 0);
1137                 ifa->ifa_address = sin->sin_addr.s_addr;
1138                 inet_insert_ifa(ifa);
1139                 break;
1140
1141         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1142
1143                 /*
1144                  *      The mask we set must be legal.
1145                  */
1146                 ret = -EINVAL;
1147                 if (bad_mask(sin->sin_addr.s_addr, 0))
1148                         break;
1149                 ret = 0;
1150                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1151                         __be32 old_mask = ifa->ifa_mask;
1152                         inet_del_ifa(in_dev, ifap, 0);
1153                         ifa->ifa_mask = sin->sin_addr.s_addr;
1154                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1155
1156                         /* See if current broadcast address matches
1157                          * with current netmask, then recalculate
1158                          * the broadcast address. Otherwise it's a
1159                          * funny address, so don't touch it since
1160                          * the user seems to know what (s)he's doing...
1161                          */
1162                         if ((dev->flags & IFF_BROADCAST) &&
1163                             (ifa->ifa_prefixlen < 31) &&
1164                             (ifa->ifa_broadcast ==
1165                              (ifa->ifa_local|~old_mask))) {
1166                                 ifa->ifa_broadcast = (ifa->ifa_local |
1167                                                       ~sin->sin_addr.s_addr);
1168                         }
1169                         inet_insert_ifa(ifa);
1170                 }
1171                 break;
1172         }
1173 done:
1174         rtnl_unlock();
1175 out:
1176         return ret;
1177 rarok:
1178         rtnl_unlock();
1179         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1180         goto out;
1181 }
1182
1183 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1184 {
1185         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1186         struct in_ifaddr *ifa;
1187         struct ifreq ifr;
1188         int done = 0;
1189
1190         if (!in_dev)
1191                 goto out;
1192
1193         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1194                 if (!buf) {
1195                         done += sizeof(ifr);
1196                         continue;
1197                 }
1198                 if (len < (int) sizeof(ifr))
1199                         break;
1200                 memset(&ifr, 0, sizeof(struct ifreq));
1201                 strcpy(ifr.ifr_name, ifa->ifa_label);
1202
1203                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1204                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1205                                                                 ifa->ifa_local;
1206
1207                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1208                         done = -EFAULT;
1209                         break;
1210                 }
1211                 buf  += sizeof(struct ifreq);
1212                 len  -= sizeof(struct ifreq);
1213                 done += sizeof(struct ifreq);
1214         }
1215 out:
1216         return done;
1217 }
1218
1219 static __be32 in_dev_select_addr(const struct in_device *in_dev,
1220                                  int scope)
1221 {
1222         for_primary_ifa(in_dev) {
1223                 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1224                     ifa->ifa_scope <= scope)
1225                         return ifa->ifa_local;
1226         } endfor_ifa(in_dev);
1227
1228         return 0;
1229 }
1230
1231 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1232 {
1233         __be32 addr = 0;
1234         struct in_device *in_dev;
1235         struct net *net = dev_net(dev);
1236         int master_idx;
1237
1238         rcu_read_lock();
1239         in_dev = __in_dev_get_rcu(dev);
1240         if (!in_dev)
1241                 goto no_in_dev;
1242
1243         for_primary_ifa(in_dev) {
1244                 if (ifa->ifa_scope > scope)
1245                         continue;
1246                 if (!dst || inet_ifa_match(dst, ifa)) {
1247                         addr = ifa->ifa_local;
1248                         break;
1249                 }
1250                 if (!addr)
1251                         addr = ifa->ifa_local;
1252         } endfor_ifa(in_dev);
1253
1254         if (addr)
1255                 goto out_unlock;
1256 no_in_dev:
1257         master_idx = l3mdev_master_ifindex_rcu(dev);
1258
1259         /* For VRFs, the VRF device takes the place of the loopback device,
1260          * with addresses on it being preferred.  Note in such cases the
1261          * loopback device will be among the devices that fail the master_idx
1262          * equality check in the loop below.
1263          */
1264         if (master_idx &&
1265             (dev = dev_get_by_index_rcu(net, master_idx)) &&
1266             (in_dev = __in_dev_get_rcu(dev))) {
1267                 addr = in_dev_select_addr(in_dev, scope);
1268                 if (addr)
1269                         goto out_unlock;
1270         }
1271
1272         /* Not loopback addresses on loopback should be preferred
1273            in this case. It is important that lo is the first interface
1274            in dev_base list.
1275          */
1276         for_each_netdev_rcu(net, dev) {
1277                 if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1278                         continue;
1279
1280                 in_dev = __in_dev_get_rcu(dev);
1281                 if (!in_dev)
1282                         continue;
1283
1284                 addr = in_dev_select_addr(in_dev, scope);
1285                 if (addr)
1286                         goto out_unlock;
1287         }
1288 out_unlock:
1289         rcu_read_unlock();
1290         return addr;
1291 }
1292 EXPORT_SYMBOL(inet_select_addr);
1293
1294 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1295                               __be32 local, int scope)
1296 {
1297         int same = 0;
1298         __be32 addr = 0;
1299
1300         for_ifa(in_dev) {
1301                 if (!addr &&
1302                     (local == ifa->ifa_local || !local) &&
1303                     ifa->ifa_scope <= scope) {
1304                         addr = ifa->ifa_local;
1305                         if (same)
1306                                 break;
1307                 }
1308                 if (!same) {
1309                         same = (!local || inet_ifa_match(local, ifa)) &&
1310                                 (!dst || inet_ifa_match(dst, ifa));
1311                         if (same && addr) {
1312                                 if (local || !dst)
1313                                         break;
1314                                 /* Is the selected addr into dst subnet? */
1315                                 if (inet_ifa_match(addr, ifa))
1316                                         break;
1317                                 /* No, then can we use new local src? */
1318                                 if (ifa->ifa_scope <= scope) {
1319                                         addr = ifa->ifa_local;
1320                                         break;
1321                                 }
1322                                 /* search for large dst subnet for addr */
1323                                 same = 0;
1324                         }
1325                 }
1326         } endfor_ifa(in_dev);
1327
1328         return same ? addr : 0;
1329 }
1330
1331 /*
1332  * Confirm that local IP address exists using wildcards:
1333  * - net: netns to check, cannot be NULL
1334  * - in_dev: only on this interface, NULL=any interface
1335  * - dst: only in the same subnet as dst, 0=any dst
1336  * - local: address, 0=autoselect the local address
1337  * - scope: maximum allowed scope value for the local address
1338  */
1339 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1340                          __be32 dst, __be32 local, int scope)
1341 {
1342         __be32 addr = 0;
1343         struct net_device *dev;
1344
1345         if (in_dev)
1346                 return confirm_addr_indev(in_dev, dst, local, scope);
1347
1348         rcu_read_lock();
1349         for_each_netdev_rcu(net, dev) {
1350                 in_dev = __in_dev_get_rcu(dev);
1351                 if (in_dev) {
1352                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1353                         if (addr)
1354                                 break;
1355                 }
1356         }
1357         rcu_read_unlock();
1358
1359         return addr;
1360 }
1361 EXPORT_SYMBOL(inet_confirm_addr);
1362
1363 /*
1364  *      Device notifier
1365  */
1366
1367 int register_inetaddr_notifier(struct notifier_block *nb)
1368 {
1369         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1370 }
1371 EXPORT_SYMBOL(register_inetaddr_notifier);
1372
1373 int unregister_inetaddr_notifier(struct notifier_block *nb)
1374 {
1375         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1376 }
1377 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1378
1379 int register_inetaddr_validator_notifier(struct notifier_block *nb)
1380 {
1381         return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1382 }
1383 EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1384
1385 int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1386 {
1387         return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1388             nb);
1389 }
1390 EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1391
1392 /* Rename ifa_labels for a device name change. Make some effort to preserve
1393  * existing alias numbering and to create unique labels if possible.
1394 */
1395 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1396 {
1397         struct in_ifaddr *ifa;
1398         int named = 0;
1399
1400         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1401                 char old[IFNAMSIZ], *dot;
1402
1403                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1404                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1405                 if (named++ == 0)
1406                         goto skip;
1407                 dot = strchr(old, ':');
1408                 if (!dot) {
1409                         sprintf(old, ":%d", named);
1410                         dot = old;
1411                 }
1412                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1413                         strcat(ifa->ifa_label, dot);
1414                 else
1415                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1416 skip:
1417                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1418         }
1419 }
1420
1421 static bool inetdev_valid_mtu(unsigned int mtu)
1422 {
1423         return mtu >= 68;
1424 }
1425
1426 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1427                                         struct in_device *in_dev)
1428
1429 {
1430         struct in_ifaddr *ifa;
1431
1432         for (ifa = in_dev->ifa_list; ifa;
1433              ifa = ifa->ifa_next) {
1434                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1435                          ifa->ifa_local, dev,
1436                          ifa->ifa_local, NULL,
1437                          dev->dev_addr, NULL);
1438         }
1439 }
1440
1441 /* Called only under RTNL semaphore */
1442
1443 static int inetdev_event(struct notifier_block *this, unsigned long event,
1444                          void *ptr)
1445 {
1446         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1447         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1448
1449         ASSERT_RTNL();
1450
1451         if (!in_dev) {
1452                 if (event == NETDEV_REGISTER) {
1453                         in_dev = inetdev_init(dev);
1454                         if (IS_ERR(in_dev))
1455                                 return notifier_from_errno(PTR_ERR(in_dev));
1456                         if (dev->flags & IFF_LOOPBACK) {
1457                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1458                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1459                         }
1460                 } else if (event == NETDEV_CHANGEMTU) {
1461                         /* Re-enabling IP */
1462                         if (inetdev_valid_mtu(dev->mtu))
1463                                 in_dev = inetdev_init(dev);
1464                 }
1465                 goto out;
1466         }
1467
1468         switch (event) {
1469         case NETDEV_REGISTER:
1470                 pr_debug("%s: bug\n", __func__);
1471                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1472                 break;
1473         case NETDEV_UP:
1474                 if (!inetdev_valid_mtu(dev->mtu))
1475                         break;
1476                 if (dev->flags & IFF_LOOPBACK) {
1477                         struct in_ifaddr *ifa = inet_alloc_ifa();
1478
1479                         if (ifa) {
1480                                 INIT_HLIST_NODE(&ifa->hash);
1481                                 ifa->ifa_local =
1482                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1483                                 ifa->ifa_prefixlen = 8;
1484                                 ifa->ifa_mask = inet_make_mask(8);
1485                                 in_dev_hold(in_dev);
1486                                 ifa->ifa_dev = in_dev;
1487                                 ifa->ifa_scope = RT_SCOPE_HOST;
1488                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1489                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1490                                                  INFINITY_LIFE_TIME);
1491                                 ipv4_devconf_setall(in_dev);
1492                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1493                                 inet_insert_ifa(ifa);
1494                         }
1495                 }
1496                 ip_mc_up(in_dev);
1497                 /* fall through */
1498         case NETDEV_CHANGEADDR:
1499                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1500                         break;
1501                 /* fall through */
1502         case NETDEV_NOTIFY_PEERS:
1503                 /* Send gratuitous ARP to notify of link change */
1504                 inetdev_send_gratuitous_arp(dev, in_dev);
1505                 break;
1506         case NETDEV_DOWN:
1507                 ip_mc_down(in_dev);
1508                 break;
1509         case NETDEV_PRE_TYPE_CHANGE:
1510                 ip_mc_unmap(in_dev);
1511                 break;
1512         case NETDEV_POST_TYPE_CHANGE:
1513                 ip_mc_remap(in_dev);
1514                 break;
1515         case NETDEV_CHANGEMTU:
1516                 if (inetdev_valid_mtu(dev->mtu))
1517                         break;
1518                 /* disable IP when MTU is not enough */
1519         case NETDEV_UNREGISTER:
1520                 inetdev_destroy(in_dev);
1521                 break;
1522         case NETDEV_CHANGENAME:
1523                 /* Do not notify about label change, this event is
1524                  * not interesting to applications using netlink.
1525                  */
1526                 inetdev_changename(dev, in_dev);
1527
1528                 devinet_sysctl_unregister(in_dev);
1529                 devinet_sysctl_register(in_dev);
1530                 break;
1531         }
1532 out:
1533         return NOTIFY_DONE;
1534 }
1535
1536 static struct notifier_block ip_netdev_notifier = {
1537         .notifier_call = inetdev_event,
1538 };
1539
1540 static size_t inet_nlmsg_size(void)
1541 {
1542         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1543                + nla_total_size(4) /* IFA_ADDRESS */
1544                + nla_total_size(4) /* IFA_LOCAL */
1545                + nla_total_size(4) /* IFA_BROADCAST */
1546                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1547                + nla_total_size(4)  /* IFA_FLAGS */
1548                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1549 }
1550
1551 static inline u32 cstamp_delta(unsigned long cstamp)
1552 {
1553         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1554 }
1555
1556 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1557                          unsigned long tstamp, u32 preferred, u32 valid)
1558 {
1559         struct ifa_cacheinfo ci;
1560
1561         ci.cstamp = cstamp_delta(cstamp);
1562         ci.tstamp = cstamp_delta(tstamp);
1563         ci.ifa_prefered = preferred;
1564         ci.ifa_valid = valid;
1565
1566         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1567 }
1568
1569 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1570                             u32 portid, u32 seq, int event, unsigned int flags)
1571 {
1572         struct ifaddrmsg *ifm;
1573         struct nlmsghdr  *nlh;
1574         u32 preferred, valid;
1575
1576         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1577         if (!nlh)
1578                 return -EMSGSIZE;
1579
1580         ifm = nlmsg_data(nlh);
1581         ifm->ifa_family = AF_INET;
1582         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1583         ifm->ifa_flags = ifa->ifa_flags;
1584         ifm->ifa_scope = ifa->ifa_scope;
1585         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1586
1587         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1588                 preferred = ifa->ifa_preferred_lft;
1589                 valid = ifa->ifa_valid_lft;
1590                 if (preferred != INFINITY_LIFE_TIME) {
1591                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1592
1593                         if (preferred > tval)
1594                                 preferred -= tval;
1595                         else
1596                                 preferred = 0;
1597                         if (valid != INFINITY_LIFE_TIME) {
1598                                 if (valid > tval)
1599                                         valid -= tval;
1600                                 else
1601                                         valid = 0;
1602                         }
1603                 }
1604         } else {
1605                 preferred = INFINITY_LIFE_TIME;
1606                 valid = INFINITY_LIFE_TIME;
1607         }
1608         if ((ifa->ifa_address &&
1609              nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1610             (ifa->ifa_local &&
1611              nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1612             (ifa->ifa_broadcast &&
1613              nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1614             (ifa->ifa_label[0] &&
1615              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1616             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1617             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1618                           preferred, valid))
1619                 goto nla_put_failure;
1620
1621         nlmsg_end(skb, nlh);
1622         return 0;
1623
1624 nla_put_failure:
1625         nlmsg_cancel(skb, nlh);
1626         return -EMSGSIZE;
1627 }
1628
1629 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1630 {
1631         struct net *net = sock_net(skb->sk);
1632         int h, s_h;
1633         int idx, s_idx;
1634         int ip_idx, s_ip_idx;
1635         struct net_device *dev;
1636         struct in_device *in_dev;
1637         struct in_ifaddr *ifa;
1638         struct hlist_head *head;
1639
1640         s_h = cb->args[0];
1641         s_idx = idx = cb->args[1];
1642         s_ip_idx = ip_idx = cb->args[2];
1643
1644         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1645                 idx = 0;
1646                 head = &net->dev_index_head[h];
1647                 rcu_read_lock();
1648                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1649                           net->dev_base_seq;
1650                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1651                         if (idx < s_idx)
1652                                 goto cont;
1653                         if (h > s_h || idx > s_idx)
1654                                 s_ip_idx = 0;
1655                         in_dev = __in_dev_get_rcu(dev);
1656                         if (!in_dev)
1657                                 goto cont;
1658
1659                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1660                              ifa = ifa->ifa_next, ip_idx++) {
1661                                 if (ip_idx < s_ip_idx)
1662                                         continue;
1663                                 if (inet_fill_ifaddr(skb, ifa,
1664                                              NETLINK_CB(cb->skb).portid,
1665                                              cb->nlh->nlmsg_seq,
1666                                              RTM_NEWADDR, NLM_F_MULTI) < 0) {
1667                                         rcu_read_unlock();
1668                                         goto done;
1669                                 }
1670                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1671                         }
1672 cont:
1673                         idx++;
1674                 }
1675                 rcu_read_unlock();
1676         }
1677
1678 done:
1679         cb->args[0] = h;
1680         cb->args[1] = idx;
1681         cb->args[2] = ip_idx;
1682
1683         return skb->len;
1684 }
1685
1686 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1687                       u32 portid)
1688 {
1689         struct sk_buff *skb;
1690         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1691         int err = -ENOBUFS;
1692         struct net *net;
1693
1694         net = dev_net(ifa->ifa_dev->dev);
1695         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1696         if (!skb)
1697                 goto errout;
1698
1699         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1700         if (err < 0) {
1701                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1702                 WARN_ON(err == -EMSGSIZE);
1703                 kfree_skb(skb);
1704                 goto errout;
1705         }
1706         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1707         return;
1708 errout:
1709         if (err < 0)
1710                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1711 }
1712
1713 static size_t inet_get_link_af_size(const struct net_device *dev,
1714                                     u32 ext_filter_mask)
1715 {
1716         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1717
1718         if (!in_dev)
1719                 return 0;
1720
1721         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1722 }
1723
1724 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1725                              u32 ext_filter_mask)
1726 {
1727         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1728         struct nlattr *nla;
1729         int i;
1730
1731         if (!in_dev)
1732                 return -ENODATA;
1733
1734         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1735         if (!nla)
1736                 return -EMSGSIZE;
1737
1738         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1739                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1740
1741         return 0;
1742 }
1743
1744 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1745         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1746 };
1747
1748 static int inet_validate_link_af(const struct net_device *dev,
1749                                  const struct nlattr *nla)
1750 {
1751         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1752         int err, rem;
1753
1754         if (dev && !__in_dev_get_rtnl(dev))
1755                 return -EAFNOSUPPORT;
1756
1757         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
1758         if (err < 0)
1759                 return err;
1760
1761         if (tb[IFLA_INET_CONF]) {
1762                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1763                         int cfgid = nla_type(a);
1764
1765                         if (nla_len(a) < 4)
1766                                 return -EINVAL;
1767
1768                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1769                                 return -EINVAL;
1770                 }
1771         }
1772
1773         return 0;
1774 }
1775
1776 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1777 {
1778         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1779         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1780         int rem;
1781
1782         if (!in_dev)
1783                 return -EAFNOSUPPORT;
1784
1785         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1786                 BUG();
1787
1788         if (tb[IFLA_INET_CONF]) {
1789                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1790                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1791         }
1792
1793         return 0;
1794 }
1795
1796 static int inet_netconf_msgsize_devconf(int type)
1797 {
1798         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1799                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1800         bool all = false;
1801
1802         if (type == NETCONFA_ALL)
1803                 all = true;
1804
1805         if (all || type == NETCONFA_FORWARDING)
1806                 size += nla_total_size(4);
1807         if (all || type == NETCONFA_RP_FILTER)
1808                 size += nla_total_size(4);
1809         if (all || type == NETCONFA_MC_FORWARDING)
1810                 size += nla_total_size(4);
1811         if (all || type == NETCONFA_PROXY_NEIGH)
1812                 size += nla_total_size(4);
1813         if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
1814                 size += nla_total_size(4);
1815
1816         return size;
1817 }
1818
1819 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1820                                      struct ipv4_devconf *devconf, u32 portid,
1821                                      u32 seq, int event, unsigned int flags,
1822                                      int type)
1823 {
1824         struct nlmsghdr  *nlh;
1825         struct netconfmsg *ncm;
1826         bool all = false;
1827
1828         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1829                         flags);
1830         if (!nlh)
1831                 return -EMSGSIZE;
1832
1833         if (type == NETCONFA_ALL)
1834                 all = true;
1835
1836         ncm = nlmsg_data(nlh);
1837         ncm->ncm_family = AF_INET;
1838
1839         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1840                 goto nla_put_failure;
1841
1842         if (!devconf)
1843                 goto out;
1844
1845         if ((all || type == NETCONFA_FORWARDING) &&
1846             nla_put_s32(skb, NETCONFA_FORWARDING,
1847                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1848                 goto nla_put_failure;
1849         if ((all || type == NETCONFA_RP_FILTER) &&
1850             nla_put_s32(skb, NETCONFA_RP_FILTER,
1851                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1852                 goto nla_put_failure;
1853         if ((all || type == NETCONFA_MC_FORWARDING) &&
1854             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1855                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1856                 goto nla_put_failure;
1857         if ((all || type == NETCONFA_PROXY_NEIGH) &&
1858             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1859                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1860                 goto nla_put_failure;
1861         if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
1862             nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
1863                         IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
1864                 goto nla_put_failure;
1865
1866 out:
1867         nlmsg_end(skb, nlh);
1868         return 0;
1869
1870 nla_put_failure:
1871         nlmsg_cancel(skb, nlh);
1872         return -EMSGSIZE;
1873 }
1874
1875 void inet_netconf_notify_devconf(struct net *net, int event, int type,
1876                                  int ifindex, struct ipv4_devconf *devconf)
1877 {
1878         struct sk_buff *skb;
1879         int err = -ENOBUFS;
1880
1881         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
1882         if (!skb)
1883                 goto errout;
1884
1885         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1886                                         event, 0, type);
1887         if (err < 0) {
1888                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1889                 WARN_ON(err == -EMSGSIZE);
1890                 kfree_skb(skb);
1891                 goto errout;
1892         }
1893         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
1894         return;
1895 errout:
1896         if (err < 0)
1897                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1898 }
1899
1900 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1901         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1902         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1903         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1904         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1905         [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]  = { .len = sizeof(int) },
1906 };
1907
1908 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1909                                     struct nlmsghdr *nlh,
1910                                     struct netlink_ext_ack *extack)
1911 {
1912         struct net *net = sock_net(in_skb->sk);
1913         struct nlattr *tb[NETCONFA_MAX+1];
1914         struct netconfmsg *ncm;
1915         struct sk_buff *skb;
1916         struct ipv4_devconf *devconf;
1917         struct in_device *in_dev;
1918         struct net_device *dev;
1919         int ifindex;
1920         int err;
1921
1922         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1923                           devconf_ipv4_policy, extack);
1924         if (err < 0)
1925                 goto errout;
1926
1927         err = -EINVAL;
1928         if (!tb[NETCONFA_IFINDEX])
1929                 goto errout;
1930
1931         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1932         switch (ifindex) {
1933         case NETCONFA_IFINDEX_ALL:
1934                 devconf = net->ipv4.devconf_all;
1935                 break;
1936         case NETCONFA_IFINDEX_DEFAULT:
1937                 devconf = net->ipv4.devconf_dflt;
1938                 break;
1939         default:
1940                 dev = __dev_get_by_index(net, ifindex);
1941                 if (!dev)
1942                         goto errout;
1943                 in_dev = __in_dev_get_rtnl(dev);
1944                 if (!in_dev)
1945                         goto errout;
1946                 devconf = &in_dev->cnf;
1947                 break;
1948         }
1949
1950         err = -ENOBUFS;
1951         skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1952         if (!skb)
1953                 goto errout;
1954
1955         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1956                                         NETLINK_CB(in_skb).portid,
1957                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1958                                         NETCONFA_ALL);
1959         if (err < 0) {
1960                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1961                 WARN_ON(err == -EMSGSIZE);
1962                 kfree_skb(skb);
1963                 goto errout;
1964         }
1965         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1966 errout:
1967         return err;
1968 }
1969
1970 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1971                                      struct netlink_callback *cb)
1972 {
1973         struct net *net = sock_net(skb->sk);
1974         int h, s_h;
1975         int idx, s_idx;
1976         struct net_device *dev;
1977         struct in_device *in_dev;
1978         struct hlist_head *head;
1979
1980         s_h = cb->args[0];
1981         s_idx = idx = cb->args[1];
1982
1983         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1984                 idx = 0;
1985                 head = &net->dev_index_head[h];
1986                 rcu_read_lock();
1987                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1988                           net->dev_base_seq;
1989                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1990                         if (idx < s_idx)
1991                                 goto cont;
1992                         in_dev = __in_dev_get_rcu(dev);
1993                         if (!in_dev)
1994                                 goto cont;
1995
1996                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1997                                                       &in_dev->cnf,
1998                                                       NETLINK_CB(cb->skb).portid,
1999                                                       cb->nlh->nlmsg_seq,
2000                                                       RTM_NEWNETCONF,
2001                                                       NLM_F_MULTI,
2002                                                       NETCONFA_ALL) < 0) {
2003                                 rcu_read_unlock();
2004                                 goto done;
2005                         }
2006                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2007 cont:
2008                         idx++;
2009                 }
2010                 rcu_read_unlock();
2011         }
2012         if (h == NETDEV_HASHENTRIES) {
2013                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2014                                               net->ipv4.devconf_all,
2015                                               NETLINK_CB(cb->skb).portid,
2016                                               cb->nlh->nlmsg_seq,
2017                                               RTM_NEWNETCONF, NLM_F_MULTI,
2018                                               NETCONFA_ALL) < 0)
2019                         goto done;
2020                 else
2021                         h++;
2022         }
2023         if (h == NETDEV_HASHENTRIES + 1) {
2024                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2025                                               net->ipv4.devconf_dflt,
2026                                               NETLINK_CB(cb->skb).portid,
2027                                               cb->nlh->nlmsg_seq,
2028                                               RTM_NEWNETCONF, NLM_F_MULTI,
2029                                               NETCONFA_ALL) < 0)
2030                         goto done;
2031                 else
2032                         h++;
2033         }
2034 done:
2035         cb->args[0] = h;
2036         cb->args[1] = idx;
2037
2038         return skb->len;
2039 }
2040
2041 #ifdef CONFIG_SYSCTL
2042
2043 static void devinet_copy_dflt_conf(struct net *net, int i)
2044 {
2045         struct net_device *dev;
2046
2047         rcu_read_lock();
2048         for_each_netdev_rcu(net, dev) {
2049                 struct in_device *in_dev;
2050
2051                 in_dev = __in_dev_get_rcu(dev);
2052                 if (in_dev && !test_bit(i, in_dev->cnf.state))
2053                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2054         }
2055         rcu_read_unlock();
2056 }
2057
2058 /* called with RTNL locked */
2059 static void inet_forward_change(struct net *net)
2060 {
2061         struct net_device *dev;
2062         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2063
2064         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2065         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2066         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2067                                     NETCONFA_FORWARDING,
2068                                     NETCONFA_IFINDEX_ALL,
2069                                     net->ipv4.devconf_all);
2070         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2071                                     NETCONFA_FORWARDING,
2072                                     NETCONFA_IFINDEX_DEFAULT,
2073                                     net->ipv4.devconf_dflt);
2074
2075         for_each_netdev(net, dev) {
2076                 struct in_device *in_dev;
2077
2078                 if (on)
2079                         dev_disable_lro(dev);
2080
2081                 in_dev = __in_dev_get_rtnl(dev);
2082                 if (in_dev) {
2083                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2084                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2085                                                     NETCONFA_FORWARDING,
2086                                                     dev->ifindex, &in_dev->cnf);
2087                 }
2088         }
2089 }
2090
2091 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2092 {
2093         if (cnf == net->ipv4.devconf_dflt)
2094                 return NETCONFA_IFINDEX_DEFAULT;
2095         else if (cnf == net->ipv4.devconf_all)
2096                 return NETCONFA_IFINDEX_ALL;
2097         else {
2098                 struct in_device *idev
2099                         = container_of(cnf, struct in_device, cnf);
2100                 return idev->dev->ifindex;
2101         }
2102 }
2103
2104 static int devinet_conf_proc(struct ctl_table *ctl, int write,
2105                              void __user *buffer,
2106                              size_t *lenp, loff_t *ppos)
2107 {
2108         int old_value = *(int *)ctl->data;
2109         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2110         int new_value = *(int *)ctl->data;
2111
2112         if (write) {
2113                 struct ipv4_devconf *cnf = ctl->extra1;
2114                 struct net *net = ctl->extra2;
2115                 int i = (int *)ctl->data - cnf->data;
2116                 int ifindex;
2117
2118                 set_bit(i, cnf->state);
2119
2120                 if (cnf == net->ipv4.devconf_dflt)
2121                         devinet_copy_dflt_conf(net, i);
2122                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2123                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2124                         if ((new_value == 0) && (old_value != 0))
2125                                 rt_cache_flush(net);
2126
2127                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2128                     new_value != old_value) {
2129                         ifindex = devinet_conf_ifindex(net, cnf);
2130                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2131                                                     NETCONFA_RP_FILTER,
2132                                                     ifindex, cnf);
2133                 }
2134                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2135                     new_value != old_value) {
2136                         ifindex = devinet_conf_ifindex(net, cnf);
2137                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2138                                                     NETCONFA_PROXY_NEIGH,
2139                                                     ifindex, cnf);
2140                 }
2141                 if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2142                     new_value != old_value) {
2143                         ifindex = devinet_conf_ifindex(net, cnf);
2144                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2145                                                     NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2146                                                     ifindex, cnf);
2147                 }
2148         }
2149
2150         return ret;
2151 }
2152
2153 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2154                                   void __user *buffer,
2155                                   size_t *lenp, loff_t *ppos)
2156 {
2157         int *valp = ctl->data;
2158         int val = *valp;
2159         loff_t pos = *ppos;
2160         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2161
2162         if (write && *valp != val) {
2163                 struct net *net = ctl->extra2;
2164
2165                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2166                         if (!rtnl_trylock()) {
2167                                 /* Restore the original values before restarting */
2168                                 *valp = val;
2169                                 *ppos = pos;
2170                                 return restart_syscall();
2171                         }
2172                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2173                                 inet_forward_change(net);
2174                         } else {
2175                                 struct ipv4_devconf *cnf = ctl->extra1;
2176                                 struct in_device *idev =
2177                                         container_of(cnf, struct in_device, cnf);
2178                                 if (*valp)
2179                                         dev_disable_lro(idev->dev);
2180                                 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2181                                                             NETCONFA_FORWARDING,
2182                                                             idev->dev->ifindex,
2183                                                             cnf);
2184                         }
2185                         rtnl_unlock();
2186                         rt_cache_flush(net);
2187                 } else
2188                         inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2189                                                     NETCONFA_FORWARDING,
2190                                                     NETCONFA_IFINDEX_DEFAULT,
2191                                                     net->ipv4.devconf_dflt);
2192         }
2193
2194         return ret;
2195 }
2196
2197 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2198                                 void __user *buffer,
2199                                 size_t *lenp, loff_t *ppos)
2200 {
2201         int *valp = ctl->data;
2202         int val = *valp;
2203         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2204         struct net *net = ctl->extra2;
2205
2206         if (write && *valp != val)
2207                 rt_cache_flush(net);
2208
2209         return ret;
2210 }
2211
2212 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2213         { \
2214                 .procname       = name, \
2215                 .data           = ipv4_devconf.data + \
2216                                   IPV4_DEVCONF_ ## attr - 1, \
2217                 .maxlen         = sizeof(int), \
2218                 .mode           = mval, \
2219                 .proc_handler   = proc, \
2220                 .extra1         = &ipv4_devconf, \
2221         }
2222
2223 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2224         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2225
2226 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2227         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2228
2229 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2230         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2231
2232 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2233         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2234
2235 static struct devinet_sysctl_table {
2236         struct ctl_table_header *sysctl_header;
2237         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2238 } devinet_sysctl = {
2239         .devinet_vars = {
2240                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2241                                              devinet_sysctl_forward),
2242                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2243
2244                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2245                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2246                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2247                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2248                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2249                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2250                                         "accept_source_route"),
2251                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2252                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2253                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2254                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2255                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2256                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2257                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2258                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2259                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2260                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2261                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2262                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2263                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2264                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2265                                         "force_igmp_version"),
2266                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2267                                         "igmpv2_unsolicited_report_interval"),
2268                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2269                                         "igmpv3_unsolicited_report_interval"),
2270                 DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2271                                         "ignore_routes_with_linkdown"),
2272                 DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2273                                         "drop_gratuitous_arp"),
2274
2275                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2276                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2277                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2278                                               "promote_secondaries"),
2279                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2280                                               "route_localnet"),
2281                 DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2282                                               "drop_unicast_in_l2_multicast"),
2283         },
2284 };
2285
2286 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2287                                      int ifindex, struct ipv4_devconf *p)
2288 {
2289         int i;
2290         struct devinet_sysctl_table *t;
2291         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2292
2293         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2294         if (!t)
2295                 goto out;
2296
2297         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2298                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2299                 t->devinet_vars[i].extra1 = p;
2300                 t->devinet_vars[i].extra2 = net;
2301         }
2302
2303         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2304
2305         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2306         if (!t->sysctl_header)
2307                 goto free;
2308
2309         p->sysctl = t;
2310
2311         inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2312                                     ifindex, p);
2313         return 0;
2314
2315 free:
2316         kfree(t);
2317 out:
2318         return -ENOBUFS;
2319 }
2320
2321 static void __devinet_sysctl_unregister(struct net *net,
2322                                         struct ipv4_devconf *cnf, int ifindex)
2323 {
2324         struct devinet_sysctl_table *t = cnf->sysctl;
2325
2326         if (t) {
2327                 cnf->sysctl = NULL;
2328                 unregister_net_sysctl_table(t->sysctl_header);
2329                 kfree(t);
2330         }
2331
2332         inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2333 }
2334
2335 static int devinet_sysctl_register(struct in_device *idev)
2336 {
2337         int err;
2338
2339         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2340                 return -EINVAL;
2341
2342         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2343         if (err)
2344                 return err;
2345         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2346                                         idev->dev->ifindex, &idev->cnf);
2347         if (err)
2348                 neigh_sysctl_unregister(idev->arp_parms);
2349         return err;
2350 }
2351
2352 static void devinet_sysctl_unregister(struct in_device *idev)
2353 {
2354         struct net *net = dev_net(idev->dev);
2355
2356         __devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2357         neigh_sysctl_unregister(idev->arp_parms);
2358 }
2359
2360 static struct ctl_table ctl_forward_entry[] = {
2361         {
2362                 .procname       = "ip_forward",
2363                 .data           = &ipv4_devconf.data[
2364                                         IPV4_DEVCONF_FORWARDING - 1],
2365                 .maxlen         = sizeof(int),
2366                 .mode           = 0644,
2367                 .proc_handler   = devinet_sysctl_forward,
2368                 .extra1         = &ipv4_devconf,
2369                 .extra2         = &init_net,
2370         },
2371         { },
2372 };
2373 #endif
2374
2375 static __net_init int devinet_init_net(struct net *net)
2376 {
2377         int err;
2378         struct ipv4_devconf *all, *dflt;
2379 #ifdef CONFIG_SYSCTL
2380         struct ctl_table *tbl = ctl_forward_entry;
2381         struct ctl_table_header *forw_hdr;
2382 #endif
2383
2384         err = -ENOMEM;
2385         all = &ipv4_devconf;
2386         dflt = &ipv4_devconf_dflt;
2387
2388         if (!net_eq(net, &init_net)) {
2389                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2390                 if (!all)
2391                         goto err_alloc_all;
2392
2393                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2394                 if (!dflt)
2395                         goto err_alloc_dflt;
2396
2397 #ifdef CONFIG_SYSCTL
2398                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2399                 if (!tbl)
2400                         goto err_alloc_ctl;
2401
2402                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2403                 tbl[0].extra1 = all;
2404                 tbl[0].extra2 = net;
2405 #endif
2406         }
2407
2408 #ifdef CONFIG_SYSCTL
2409         err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2410         if (err < 0)
2411                 goto err_reg_all;
2412
2413         err = __devinet_sysctl_register(net, "default",
2414                                         NETCONFA_IFINDEX_DEFAULT, dflt);
2415         if (err < 0)
2416                 goto err_reg_dflt;
2417
2418         err = -ENOMEM;
2419         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2420         if (!forw_hdr)
2421                 goto err_reg_ctl;
2422         net->ipv4.forw_hdr = forw_hdr;
2423 #endif
2424
2425         net->ipv4.devconf_all = all;
2426         net->ipv4.devconf_dflt = dflt;
2427         return 0;
2428
2429 #ifdef CONFIG_SYSCTL
2430 err_reg_ctl:
2431         __devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2432 err_reg_dflt:
2433         __devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2434 err_reg_all:
2435         if (tbl != ctl_forward_entry)
2436                 kfree(tbl);
2437 err_alloc_ctl:
2438 #endif
2439         if (dflt != &ipv4_devconf_dflt)
2440                 kfree(dflt);
2441 err_alloc_dflt:
2442         if (all != &ipv4_devconf)
2443                 kfree(all);
2444 err_alloc_all:
2445         return err;
2446 }
2447
2448 static __net_exit void devinet_exit_net(struct net *net)
2449 {
2450 #ifdef CONFIG_SYSCTL
2451         struct ctl_table *tbl;
2452
2453         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2454         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2455         __devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2456                                     NETCONFA_IFINDEX_DEFAULT);
2457         __devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2458                                     NETCONFA_IFINDEX_ALL);
2459         kfree(tbl);
2460 #endif
2461         kfree(net->ipv4.devconf_dflt);
2462         kfree(net->ipv4.devconf_all);
2463 }
2464
2465 static __net_initdata struct pernet_operations devinet_ops = {
2466         .init = devinet_init_net,
2467         .exit = devinet_exit_net,
2468 };
2469
2470 static struct rtnl_af_ops inet_af_ops __read_mostly = {
2471         .family           = AF_INET,
2472         .fill_link_af     = inet_fill_link_af,
2473         .get_link_af_size = inet_get_link_af_size,
2474         .validate_link_af = inet_validate_link_af,
2475         .set_link_af      = inet_set_link_af,
2476 };
2477
2478 void __init devinet_init(void)
2479 {
2480         int i;
2481
2482         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2483                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2484
2485         register_pernet_subsys(&devinet_ops);
2486
2487         register_gifconf(PF_INET, inet_gifconf);
2488         register_netdevice_notifier(&ip_netdev_notifier);
2489
2490         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2491
2492         rtnl_af_register(&inet_af_ops);
2493
2494         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2495         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2496         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2497         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2498                       inet_netconf_dump_devconf, 0);
2499 }