rtnetlink: add rtnl_link_state check in rtnl_configure_link
[sfrench/cifs-2.6.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)              \
47 do {                                            \
48         if (level <= NEIGH_DEBUG)               \
49                 pr_debug(fmt, ##__VA_ARGS__);   \
50 } while (0)
51
52 #define PNEIGH_HASHMASK         0xF
53
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56                            u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59                                     struct net_device *dev);
60
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79
80    Reference count prevents destruction.
81
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95         kfree_skb(skb);
96         return -ENETDOWN;
97 }
98
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101         if (neigh->parms->neigh_cleanup)
102                 neigh->parms->neigh_cleanup(neigh);
103
104         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106         neigh_release(neigh);
107 }
108
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117         return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120
121
122 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
123                       struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125         bool retval = false;
126
127         write_lock(&n->lock);
128         if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
129             !(n->flags & flags)) {
130                 struct neighbour *neigh;
131
132                 neigh = rcu_dereference_protected(n->next,
133                                                   lockdep_is_held(&tbl->lock));
134                 rcu_assign_pointer(*np, neigh);
135                 n->dead = 1;
136                 retval = true;
137         }
138         write_unlock(&n->lock);
139         if (retval)
140                 neigh_cleanup_and_release(n);
141         return retval;
142 }
143
144 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
145 {
146         struct neigh_hash_table *nht;
147         void *pkey = ndel->primary_key;
148         u32 hash_val;
149         struct neighbour *n;
150         struct neighbour __rcu **np;
151
152         nht = rcu_dereference_protected(tbl->nht,
153                                         lockdep_is_held(&tbl->lock));
154         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
155         hash_val = hash_val >> (32 - nht->hash_shift);
156
157         np = &nht->hash_buckets[hash_val];
158         while ((n = rcu_dereference_protected(*np,
159                                               lockdep_is_held(&tbl->lock)))) {
160                 if (n == ndel)
161                         return neigh_del(n, 0, 0, np, tbl);
162                 np = &n->next;
163         }
164         return false;
165 }
166
167 static int neigh_forced_gc(struct neigh_table *tbl)
168 {
169         int shrunk = 0;
170         int i;
171         struct neigh_hash_table *nht;
172
173         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
174
175         write_lock_bh(&tbl->lock);
176         nht = rcu_dereference_protected(tbl->nht,
177                                         lockdep_is_held(&tbl->lock));
178         for (i = 0; i < (1 << nht->hash_shift); i++) {
179                 struct neighbour *n;
180                 struct neighbour __rcu **np;
181
182                 np = &nht->hash_buckets[i];
183                 while ((n = rcu_dereference_protected(*np,
184                                         lockdep_is_held(&tbl->lock))) != NULL) {
185                         /* Neighbour record may be discarded if:
186                          * - nobody refers to it.
187                          * - it is not permanent
188                          */
189                         if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
190                                       tbl)) {
191                                 shrunk = 1;
192                                 continue;
193                         }
194                         np = &n->next;
195                 }
196         }
197
198         tbl->last_flush = jiffies;
199
200         write_unlock_bh(&tbl->lock);
201
202         return shrunk;
203 }
204
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207         neigh_hold(n);
208         if (unlikely(mod_timer(&n->timer, when))) {
209                 printk("NEIGH: BUG, double timer add, state is %x\n",
210                        n->nud_state);
211                 dump_stack();
212         }
213 }
214
215 static int neigh_del_timer(struct neighbour *n)
216 {
217         if ((n->nud_state & NUD_IN_TIMER) &&
218             del_timer(&n->timer)) {
219                 neigh_release(n);
220                 return 1;
221         }
222         return 0;
223 }
224
225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227         struct sk_buff *skb;
228
229         while ((skb = skb_dequeue(list)) != NULL) {
230                 dev_put(skb->dev);
231                 kfree_skb(skb);
232         }
233 }
234
235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
236 {
237         int i;
238         struct neigh_hash_table *nht;
239
240         nht = rcu_dereference_protected(tbl->nht,
241                                         lockdep_is_held(&tbl->lock));
242
243         for (i = 0; i < (1 << nht->hash_shift); i++) {
244                 struct neighbour *n;
245                 struct neighbour __rcu **np = &nht->hash_buckets[i];
246
247                 while ((n = rcu_dereference_protected(*np,
248                                         lockdep_is_held(&tbl->lock))) != NULL) {
249                         if (dev && n->dev != dev) {
250                                 np = &n->next;
251                                 continue;
252                         }
253                         rcu_assign_pointer(*np,
254                                    rcu_dereference_protected(n->next,
255                                                 lockdep_is_held(&tbl->lock)));
256                         write_lock(&n->lock);
257                         neigh_del_timer(n);
258                         n->dead = 1;
259
260                         if (refcount_read(&n->refcnt) != 1) {
261                                 /* The most unpleasant situation.
262                                    We must destroy neighbour entry,
263                                    but someone still uses it.
264
265                                    The destroy will be delayed until
266                                    the last user releases us, but
267                                    we must kill timers etc. and move
268                                    it to safe state.
269                                  */
270                                 __skb_queue_purge(&n->arp_queue);
271                                 n->arp_queue_len_bytes = 0;
272                                 n->output = neigh_blackhole;
273                                 if (n->nud_state & NUD_VALID)
274                                         n->nud_state = NUD_NOARP;
275                                 else
276                                         n->nud_state = NUD_NONE;
277                                 neigh_dbg(2, "neigh %p is stray\n", n);
278                         }
279                         write_unlock(&n->lock);
280                         neigh_cleanup_and_release(n);
281                 }
282         }
283 }
284
285 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
286 {
287         write_lock_bh(&tbl->lock);
288         neigh_flush_dev(tbl, dev);
289         write_unlock_bh(&tbl->lock);
290 }
291 EXPORT_SYMBOL(neigh_changeaddr);
292
293 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
294 {
295         write_lock_bh(&tbl->lock);
296         neigh_flush_dev(tbl, dev);
297         pneigh_ifdown_and_unlock(tbl, dev);
298
299         del_timer_sync(&tbl->proxy_timer);
300         pneigh_queue_purge(&tbl->proxy_queue);
301         return 0;
302 }
303 EXPORT_SYMBOL(neigh_ifdown);
304
305 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
306 {
307         struct neighbour *n = NULL;
308         unsigned long now = jiffies;
309         int entries;
310
311         entries = atomic_inc_return(&tbl->entries) - 1;
312         if (entries >= tbl->gc_thresh3 ||
313             (entries >= tbl->gc_thresh2 &&
314              time_after(now, tbl->last_flush + 5 * HZ))) {
315                 if (!neigh_forced_gc(tbl) &&
316                     entries >= tbl->gc_thresh3) {
317                         net_info_ratelimited("%s: neighbor table overflow!\n",
318                                              tbl->id);
319                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
320                         goto out_entries;
321                 }
322         }
323
324         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
325         if (!n)
326                 goto out_entries;
327
328         __skb_queue_head_init(&n->arp_queue);
329         rwlock_init(&n->lock);
330         seqlock_init(&n->ha_lock);
331         n->updated        = n->used = now;
332         n->nud_state      = NUD_NONE;
333         n->output         = neigh_blackhole;
334         seqlock_init(&n->hh.hh_lock);
335         n->parms          = neigh_parms_clone(&tbl->parms);
336         timer_setup(&n->timer, neigh_timer_handler, 0);
337
338         NEIGH_CACHE_STAT_INC(tbl, allocs);
339         n->tbl            = tbl;
340         refcount_set(&n->refcnt, 1);
341         n->dead           = 1;
342 out:
343         return n;
344
345 out_entries:
346         atomic_dec(&tbl->entries);
347         goto out;
348 }
349
350 static void neigh_get_hash_rnd(u32 *x)
351 {
352         *x = get_random_u32() | 1;
353 }
354
355 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
356 {
357         size_t size = (1 << shift) * sizeof(struct neighbour *);
358         struct neigh_hash_table *ret;
359         struct neighbour __rcu **buckets;
360         int i;
361
362         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
363         if (!ret)
364                 return NULL;
365         if (size <= PAGE_SIZE)
366                 buckets = kzalloc(size, GFP_ATOMIC);
367         else
368                 buckets = (struct neighbour __rcu **)
369                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
370                                            get_order(size));
371         if (!buckets) {
372                 kfree(ret);
373                 return NULL;
374         }
375         ret->hash_buckets = buckets;
376         ret->hash_shift = shift;
377         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
378                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
379         return ret;
380 }
381
382 static void neigh_hash_free_rcu(struct rcu_head *head)
383 {
384         struct neigh_hash_table *nht = container_of(head,
385                                                     struct neigh_hash_table,
386                                                     rcu);
387         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
388         struct neighbour __rcu **buckets = nht->hash_buckets;
389
390         if (size <= PAGE_SIZE)
391                 kfree(buckets);
392         else
393                 free_pages((unsigned long)buckets, get_order(size));
394         kfree(nht);
395 }
396
397 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
398                                                 unsigned long new_shift)
399 {
400         unsigned int i, hash;
401         struct neigh_hash_table *new_nht, *old_nht;
402
403         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
404
405         old_nht = rcu_dereference_protected(tbl->nht,
406                                             lockdep_is_held(&tbl->lock));
407         new_nht = neigh_hash_alloc(new_shift);
408         if (!new_nht)
409                 return old_nht;
410
411         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
412                 struct neighbour *n, *next;
413
414                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
415                                                    lockdep_is_held(&tbl->lock));
416                      n != NULL;
417                      n = next) {
418                         hash = tbl->hash(n->primary_key, n->dev,
419                                          new_nht->hash_rnd);
420
421                         hash >>= (32 - new_nht->hash_shift);
422                         next = rcu_dereference_protected(n->next,
423                                                 lockdep_is_held(&tbl->lock));
424
425                         rcu_assign_pointer(n->next,
426                                            rcu_dereference_protected(
427                                                 new_nht->hash_buckets[hash],
428                                                 lockdep_is_held(&tbl->lock)));
429                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
430                 }
431         }
432
433         rcu_assign_pointer(tbl->nht, new_nht);
434         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
435         return new_nht;
436 }
437
438 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
439                                struct net_device *dev)
440 {
441         struct neighbour *n;
442
443         NEIGH_CACHE_STAT_INC(tbl, lookups);
444
445         rcu_read_lock_bh();
446         n = __neigh_lookup_noref(tbl, pkey, dev);
447         if (n) {
448                 if (!refcount_inc_not_zero(&n->refcnt))
449                         n = NULL;
450                 NEIGH_CACHE_STAT_INC(tbl, hits);
451         }
452
453         rcu_read_unlock_bh();
454         return n;
455 }
456 EXPORT_SYMBOL(neigh_lookup);
457
458 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
459                                      const void *pkey)
460 {
461         struct neighbour *n;
462         unsigned int key_len = tbl->key_len;
463         u32 hash_val;
464         struct neigh_hash_table *nht;
465
466         NEIGH_CACHE_STAT_INC(tbl, lookups);
467
468         rcu_read_lock_bh();
469         nht = rcu_dereference_bh(tbl->nht);
470         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
471
472         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
473              n != NULL;
474              n = rcu_dereference_bh(n->next)) {
475                 if (!memcmp(n->primary_key, pkey, key_len) &&
476                     net_eq(dev_net(n->dev), net)) {
477                         if (!refcount_inc_not_zero(&n->refcnt))
478                                 n = NULL;
479                         NEIGH_CACHE_STAT_INC(tbl, hits);
480                         break;
481                 }
482         }
483
484         rcu_read_unlock_bh();
485         return n;
486 }
487 EXPORT_SYMBOL(neigh_lookup_nodev);
488
489 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
490                                  struct net_device *dev, bool want_ref)
491 {
492         u32 hash_val;
493         unsigned int key_len = tbl->key_len;
494         int error;
495         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
496         struct neigh_hash_table *nht;
497
498         if (!n) {
499                 rc = ERR_PTR(-ENOBUFS);
500                 goto out;
501         }
502
503         memcpy(n->primary_key, pkey, key_len);
504         n->dev = dev;
505         dev_hold(dev);
506
507         /* Protocol specific setup. */
508         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
509                 rc = ERR_PTR(error);
510                 goto out_neigh_release;
511         }
512
513         if (dev->netdev_ops->ndo_neigh_construct) {
514                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
515                 if (error < 0) {
516                         rc = ERR_PTR(error);
517                         goto out_neigh_release;
518                 }
519         }
520
521         /* Device specific setup. */
522         if (n->parms->neigh_setup &&
523             (error = n->parms->neigh_setup(n)) < 0) {
524                 rc = ERR_PTR(error);
525                 goto out_neigh_release;
526         }
527
528         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
529
530         write_lock_bh(&tbl->lock);
531         nht = rcu_dereference_protected(tbl->nht,
532                                         lockdep_is_held(&tbl->lock));
533
534         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
535                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
536
537         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
538
539         if (n->parms->dead) {
540                 rc = ERR_PTR(-EINVAL);
541                 goto out_tbl_unlock;
542         }
543
544         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
545                                             lockdep_is_held(&tbl->lock));
546              n1 != NULL;
547              n1 = rcu_dereference_protected(n1->next,
548                         lockdep_is_held(&tbl->lock))) {
549                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
550                         if (want_ref)
551                                 neigh_hold(n1);
552                         rc = n1;
553                         goto out_tbl_unlock;
554                 }
555         }
556
557         n->dead = 0;
558         if (want_ref)
559                 neigh_hold(n);
560         rcu_assign_pointer(n->next,
561                            rcu_dereference_protected(nht->hash_buckets[hash_val],
562                                                      lockdep_is_held(&tbl->lock)));
563         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
564         write_unlock_bh(&tbl->lock);
565         neigh_dbg(2, "neigh %p is created\n", n);
566         rc = n;
567 out:
568         return rc;
569 out_tbl_unlock:
570         write_unlock_bh(&tbl->lock);
571 out_neigh_release:
572         neigh_release(n);
573         goto out;
574 }
575 EXPORT_SYMBOL(__neigh_create);
576
577 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
578 {
579         u32 hash_val = *(u32 *)(pkey + key_len - 4);
580         hash_val ^= (hash_val >> 16);
581         hash_val ^= hash_val >> 8;
582         hash_val ^= hash_val >> 4;
583         hash_val &= PNEIGH_HASHMASK;
584         return hash_val;
585 }
586
587 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
588                                               struct net *net,
589                                               const void *pkey,
590                                               unsigned int key_len,
591                                               struct net_device *dev)
592 {
593         while (n) {
594                 if (!memcmp(n->key, pkey, key_len) &&
595                     net_eq(pneigh_net(n), net) &&
596                     (n->dev == dev || !n->dev))
597                         return n;
598                 n = n->next;
599         }
600         return NULL;
601 }
602
603 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
604                 struct net *net, const void *pkey, struct net_device *dev)
605 {
606         unsigned int key_len = tbl->key_len;
607         u32 hash_val = pneigh_hash(pkey, key_len);
608
609         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
610                                  net, pkey, key_len, dev);
611 }
612 EXPORT_SYMBOL_GPL(__pneigh_lookup);
613
614 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
615                                     struct net *net, const void *pkey,
616                                     struct net_device *dev, int creat)
617 {
618         struct pneigh_entry *n;
619         unsigned int key_len = tbl->key_len;
620         u32 hash_val = pneigh_hash(pkey, key_len);
621
622         read_lock_bh(&tbl->lock);
623         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
624                               net, pkey, key_len, dev);
625         read_unlock_bh(&tbl->lock);
626
627         if (n || !creat)
628                 goto out;
629
630         ASSERT_RTNL();
631
632         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
633         if (!n)
634                 goto out;
635
636         write_pnet(&n->net, net);
637         memcpy(n->key, pkey, key_len);
638         n->dev = dev;
639         if (dev)
640                 dev_hold(dev);
641
642         if (tbl->pconstructor && tbl->pconstructor(n)) {
643                 if (dev)
644                         dev_put(dev);
645                 kfree(n);
646                 n = NULL;
647                 goto out;
648         }
649
650         write_lock_bh(&tbl->lock);
651         n->next = tbl->phash_buckets[hash_val];
652         tbl->phash_buckets[hash_val] = n;
653         write_unlock_bh(&tbl->lock);
654 out:
655         return n;
656 }
657 EXPORT_SYMBOL(pneigh_lookup);
658
659
660 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
661                   struct net_device *dev)
662 {
663         struct pneigh_entry *n, **np;
664         unsigned int key_len = tbl->key_len;
665         u32 hash_val = pneigh_hash(pkey, key_len);
666
667         write_lock_bh(&tbl->lock);
668         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
669              np = &n->next) {
670                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
671                     net_eq(pneigh_net(n), net)) {
672                         *np = n->next;
673                         write_unlock_bh(&tbl->lock);
674                         if (tbl->pdestructor)
675                                 tbl->pdestructor(n);
676                         if (n->dev)
677                                 dev_put(n->dev);
678                         kfree(n);
679                         return 0;
680                 }
681         }
682         write_unlock_bh(&tbl->lock);
683         return -ENOENT;
684 }
685
686 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
687                                     struct net_device *dev)
688 {
689         struct pneigh_entry *n, **np, *freelist = NULL;
690         u32 h;
691
692         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
693                 np = &tbl->phash_buckets[h];
694                 while ((n = *np) != NULL) {
695                         if (!dev || n->dev == dev) {
696                                 *np = n->next;
697                                 n->next = freelist;
698                                 freelist = n;
699                                 continue;
700                         }
701                         np = &n->next;
702                 }
703         }
704         write_unlock_bh(&tbl->lock);
705         while ((n = freelist)) {
706                 freelist = n->next;
707                 n->next = NULL;
708                 if (tbl->pdestructor)
709                         tbl->pdestructor(n);
710                 if (n->dev)
711                         dev_put(n->dev);
712                 kfree(n);
713         }
714         return -ENOENT;
715 }
716
717 static void neigh_parms_destroy(struct neigh_parms *parms);
718
719 static inline void neigh_parms_put(struct neigh_parms *parms)
720 {
721         if (refcount_dec_and_test(&parms->refcnt))
722                 neigh_parms_destroy(parms);
723 }
724
725 /*
726  *      neighbour must already be out of the table;
727  *
728  */
729 void neigh_destroy(struct neighbour *neigh)
730 {
731         struct net_device *dev = neigh->dev;
732
733         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
734
735         if (!neigh->dead) {
736                 pr_warn("Destroying alive neighbour %p\n", neigh);
737                 dump_stack();
738                 return;
739         }
740
741         if (neigh_del_timer(neigh))
742                 pr_warn("Impossible event\n");
743
744         write_lock_bh(&neigh->lock);
745         __skb_queue_purge(&neigh->arp_queue);
746         write_unlock_bh(&neigh->lock);
747         neigh->arp_queue_len_bytes = 0;
748
749         if (dev->netdev_ops->ndo_neigh_destroy)
750                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
751
752         dev_put(dev);
753         neigh_parms_put(neigh->parms);
754
755         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
756
757         atomic_dec(&neigh->tbl->entries);
758         kfree_rcu(neigh, rcu);
759 }
760 EXPORT_SYMBOL(neigh_destroy);
761
762 /* Neighbour state is suspicious;
763    disable fast path.
764
765    Called with write_locked neigh.
766  */
767 static void neigh_suspect(struct neighbour *neigh)
768 {
769         neigh_dbg(2, "neigh %p is suspected\n", neigh);
770
771         neigh->output = neigh->ops->output;
772 }
773
774 /* Neighbour state is OK;
775    enable fast path.
776
777    Called with write_locked neigh.
778  */
779 static void neigh_connect(struct neighbour *neigh)
780 {
781         neigh_dbg(2, "neigh %p is connected\n", neigh);
782
783         neigh->output = neigh->ops->connected_output;
784 }
785
786 static void neigh_periodic_work(struct work_struct *work)
787 {
788         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
789         struct neighbour *n;
790         struct neighbour __rcu **np;
791         unsigned int i;
792         struct neigh_hash_table *nht;
793
794         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
795
796         write_lock_bh(&tbl->lock);
797         nht = rcu_dereference_protected(tbl->nht,
798                                         lockdep_is_held(&tbl->lock));
799
800         /*
801          *      periodically recompute ReachableTime from random function
802          */
803
804         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
805                 struct neigh_parms *p;
806                 tbl->last_rand = jiffies;
807                 list_for_each_entry(p, &tbl->parms_list, list)
808                         p->reachable_time =
809                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
810         }
811
812         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
813                 goto out;
814
815         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
816                 np = &nht->hash_buckets[i];
817
818                 while ((n = rcu_dereference_protected(*np,
819                                 lockdep_is_held(&tbl->lock))) != NULL) {
820                         unsigned int state;
821
822                         write_lock(&n->lock);
823
824                         state = n->nud_state;
825                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
826                             (n->flags & NTF_EXT_LEARNED)) {
827                                 write_unlock(&n->lock);
828                                 goto next_elt;
829                         }
830
831                         if (time_before(n->used, n->confirmed))
832                                 n->used = n->confirmed;
833
834                         if (refcount_read(&n->refcnt) == 1 &&
835                             (state == NUD_FAILED ||
836                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
837                                 *np = n->next;
838                                 n->dead = 1;
839                                 write_unlock(&n->lock);
840                                 neigh_cleanup_and_release(n);
841                                 continue;
842                         }
843                         write_unlock(&n->lock);
844
845 next_elt:
846                         np = &n->next;
847                 }
848                 /*
849                  * It's fine to release lock here, even if hash table
850                  * grows while we are preempted.
851                  */
852                 write_unlock_bh(&tbl->lock);
853                 cond_resched();
854                 write_lock_bh(&tbl->lock);
855                 nht = rcu_dereference_protected(tbl->nht,
856                                                 lockdep_is_held(&tbl->lock));
857         }
858 out:
859         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
860          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
861          * BASE_REACHABLE_TIME.
862          */
863         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
864                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
865         write_unlock_bh(&tbl->lock);
866 }
867
868 static __inline__ int neigh_max_probes(struct neighbour *n)
869 {
870         struct neigh_parms *p = n->parms;
871         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
872                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
873                 NEIGH_VAR(p, MCAST_PROBES));
874 }
875
876 static void neigh_invalidate(struct neighbour *neigh)
877         __releases(neigh->lock)
878         __acquires(neigh->lock)
879 {
880         struct sk_buff *skb;
881
882         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
883         neigh_dbg(2, "neigh %p is failed\n", neigh);
884         neigh->updated = jiffies;
885
886         /* It is very thin place. report_unreachable is very complicated
887            routine. Particularly, it can hit the same neighbour entry!
888
889            So that, we try to be accurate and avoid dead loop. --ANK
890          */
891         while (neigh->nud_state == NUD_FAILED &&
892                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
893                 write_unlock(&neigh->lock);
894                 neigh->ops->error_report(neigh, skb);
895                 write_lock(&neigh->lock);
896         }
897         __skb_queue_purge(&neigh->arp_queue);
898         neigh->arp_queue_len_bytes = 0;
899 }
900
901 static void neigh_probe(struct neighbour *neigh)
902         __releases(neigh->lock)
903 {
904         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
905         /* keep skb alive even if arp_queue overflows */
906         if (skb)
907                 skb = skb_clone(skb, GFP_ATOMIC);
908         write_unlock(&neigh->lock);
909         if (neigh->ops->solicit)
910                 neigh->ops->solicit(neigh, skb);
911         atomic_inc(&neigh->probes);
912         kfree_skb(skb);
913 }
914
915 /* Called when a timer expires for a neighbour entry. */
916
917 static void neigh_timer_handler(struct timer_list *t)
918 {
919         unsigned long now, next;
920         struct neighbour *neigh = from_timer(neigh, t, timer);
921         unsigned int state;
922         int notify = 0;
923
924         write_lock(&neigh->lock);
925
926         state = neigh->nud_state;
927         now = jiffies;
928         next = now + HZ;
929
930         if (!(state & NUD_IN_TIMER))
931                 goto out;
932
933         if (state & NUD_REACHABLE) {
934                 if (time_before_eq(now,
935                                    neigh->confirmed + neigh->parms->reachable_time)) {
936                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
937                         next = neigh->confirmed + neigh->parms->reachable_time;
938                 } else if (time_before_eq(now,
939                                           neigh->used +
940                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
941                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
942                         neigh->nud_state = NUD_DELAY;
943                         neigh->updated = jiffies;
944                         neigh_suspect(neigh);
945                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
946                 } else {
947                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
948                         neigh->nud_state = NUD_STALE;
949                         neigh->updated = jiffies;
950                         neigh_suspect(neigh);
951                         notify = 1;
952                 }
953         } else if (state & NUD_DELAY) {
954                 if (time_before_eq(now,
955                                    neigh->confirmed +
956                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
957                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
958                         neigh->nud_state = NUD_REACHABLE;
959                         neigh->updated = jiffies;
960                         neigh_connect(neigh);
961                         notify = 1;
962                         next = neigh->confirmed + neigh->parms->reachable_time;
963                 } else {
964                         neigh_dbg(2, "neigh %p is probed\n", neigh);
965                         neigh->nud_state = NUD_PROBE;
966                         neigh->updated = jiffies;
967                         atomic_set(&neigh->probes, 0);
968                         notify = 1;
969                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
970                 }
971         } else {
972                 /* NUD_PROBE|NUD_INCOMPLETE */
973                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
974         }
975
976         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
977             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
978                 neigh->nud_state = NUD_FAILED;
979                 notify = 1;
980                 neigh_invalidate(neigh);
981                 goto out;
982         }
983
984         if (neigh->nud_state & NUD_IN_TIMER) {
985                 if (time_before(next, jiffies + HZ/2))
986                         next = jiffies + HZ/2;
987                 if (!mod_timer(&neigh->timer, next))
988                         neigh_hold(neigh);
989         }
990         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
991                 neigh_probe(neigh);
992         } else {
993 out:
994                 write_unlock(&neigh->lock);
995         }
996
997         if (notify)
998                 neigh_update_notify(neigh, 0);
999
1000         neigh_release(neigh);
1001 }
1002
1003 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1004 {
1005         int rc;
1006         bool immediate_probe = false;
1007
1008         write_lock_bh(&neigh->lock);
1009
1010         rc = 0;
1011         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1012                 goto out_unlock_bh;
1013         if (neigh->dead)
1014                 goto out_dead;
1015
1016         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1017                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1018                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1019                         unsigned long next, now = jiffies;
1020
1021                         atomic_set(&neigh->probes,
1022                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1023                         neigh->nud_state     = NUD_INCOMPLETE;
1024                         neigh->updated = now;
1025                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1026                                          HZ/2);
1027                         neigh_add_timer(neigh, next);
1028                         immediate_probe = true;
1029                 } else {
1030                         neigh->nud_state = NUD_FAILED;
1031                         neigh->updated = jiffies;
1032                         write_unlock_bh(&neigh->lock);
1033
1034                         kfree_skb(skb);
1035                         return 1;
1036                 }
1037         } else if (neigh->nud_state & NUD_STALE) {
1038                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1039                 neigh->nud_state = NUD_DELAY;
1040                 neigh->updated = jiffies;
1041                 neigh_add_timer(neigh, jiffies +
1042                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1043         }
1044
1045         if (neigh->nud_state == NUD_INCOMPLETE) {
1046                 if (skb) {
1047                         while (neigh->arp_queue_len_bytes + skb->truesize >
1048                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1049                                 struct sk_buff *buff;
1050
1051                                 buff = __skb_dequeue(&neigh->arp_queue);
1052                                 if (!buff)
1053                                         break;
1054                                 neigh->arp_queue_len_bytes -= buff->truesize;
1055                                 kfree_skb(buff);
1056                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1057                         }
1058                         skb_dst_force(skb);
1059                         __skb_queue_tail(&neigh->arp_queue, skb);
1060                         neigh->arp_queue_len_bytes += skb->truesize;
1061                 }
1062                 rc = 1;
1063         }
1064 out_unlock_bh:
1065         if (immediate_probe)
1066                 neigh_probe(neigh);
1067         else
1068                 write_unlock(&neigh->lock);
1069         local_bh_enable();
1070         return rc;
1071
1072 out_dead:
1073         if (neigh->nud_state & NUD_STALE)
1074                 goto out_unlock_bh;
1075         write_unlock_bh(&neigh->lock);
1076         kfree_skb(skb);
1077         return 1;
1078 }
1079 EXPORT_SYMBOL(__neigh_event_send);
1080
1081 static void neigh_update_hhs(struct neighbour *neigh)
1082 {
1083         struct hh_cache *hh;
1084         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1085                 = NULL;
1086
1087         if (neigh->dev->header_ops)
1088                 update = neigh->dev->header_ops->cache_update;
1089
1090         if (update) {
1091                 hh = &neigh->hh;
1092                 if (hh->hh_len) {
1093                         write_seqlock_bh(&hh->hh_lock);
1094                         update(hh, neigh->dev, neigh->ha);
1095                         write_sequnlock_bh(&hh->hh_lock);
1096                 }
1097         }
1098 }
1099
1100
1101
1102 /* Generic update routine.
1103    -- lladdr is new lladdr or NULL, if it is not supplied.
1104    -- new    is new state.
1105    -- flags
1106         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1107                                 if it is different.
1108         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1109                                 lladdr instead of overriding it
1110                                 if it is different.
1111         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1112
1113         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1114                                 NTF_ROUTER flag.
1115         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1116                                 a router.
1117
1118    Caller MUST hold reference count on the entry.
1119  */
1120
1121 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1122                  u32 flags, u32 nlmsg_pid)
1123 {
1124         u8 old;
1125         int err;
1126         int notify = 0;
1127         struct net_device *dev;
1128         int update_isrouter = 0;
1129
1130         write_lock_bh(&neigh->lock);
1131
1132         dev    = neigh->dev;
1133         old    = neigh->nud_state;
1134         err    = -EPERM;
1135
1136         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1137             (old & (NUD_NOARP | NUD_PERMANENT)))
1138                 goto out;
1139         if (neigh->dead)
1140                 goto out;
1141
1142         neigh_update_ext_learned(neigh, flags, &notify);
1143
1144         if (!(new & NUD_VALID)) {
1145                 neigh_del_timer(neigh);
1146                 if (old & NUD_CONNECTED)
1147                         neigh_suspect(neigh);
1148                 neigh->nud_state = new;
1149                 err = 0;
1150                 notify = old & NUD_VALID;
1151                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1152                     (new & NUD_FAILED)) {
1153                         neigh_invalidate(neigh);
1154                         notify = 1;
1155                 }
1156                 goto out;
1157         }
1158
1159         /* Compare new lladdr with cached one */
1160         if (!dev->addr_len) {
1161                 /* First case: device needs no address. */
1162                 lladdr = neigh->ha;
1163         } else if (lladdr) {
1164                 /* The second case: if something is already cached
1165                    and a new address is proposed:
1166                    - compare new & old
1167                    - if they are different, check override flag
1168                  */
1169                 if ((old & NUD_VALID) &&
1170                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1171                         lladdr = neigh->ha;
1172         } else {
1173                 /* No address is supplied; if we know something,
1174                    use it, otherwise discard the request.
1175                  */
1176                 err = -EINVAL;
1177                 if (!(old & NUD_VALID))
1178                         goto out;
1179                 lladdr = neigh->ha;
1180         }
1181
1182         /* If entry was valid and address is not changed,
1183            do not change entry state, if new one is STALE.
1184          */
1185         err = 0;
1186         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1187         if (old & NUD_VALID) {
1188                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1189                         update_isrouter = 0;
1190                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1191                             (old & NUD_CONNECTED)) {
1192                                 lladdr = neigh->ha;
1193                                 new = NUD_STALE;
1194                         } else
1195                                 goto out;
1196                 } else {
1197                         if (lladdr == neigh->ha && new == NUD_STALE &&
1198                             !(flags & NEIGH_UPDATE_F_ADMIN))
1199                                 new = old;
1200                 }
1201         }
1202
1203         /* Update timestamps only once we know we will make a change to the
1204          * neighbour entry. Otherwise we risk to move the locktime window with
1205          * noop updates and ignore relevant ARP updates.
1206          */
1207         if (new != old || lladdr != neigh->ha) {
1208                 if (new & NUD_CONNECTED)
1209                         neigh->confirmed = jiffies;
1210                 neigh->updated = jiffies;
1211         }
1212
1213         if (new != old) {
1214                 neigh_del_timer(neigh);
1215                 if (new & NUD_PROBE)
1216                         atomic_set(&neigh->probes, 0);
1217                 if (new & NUD_IN_TIMER)
1218                         neigh_add_timer(neigh, (jiffies +
1219                                                 ((new & NUD_REACHABLE) ?
1220                                                  neigh->parms->reachable_time :
1221                                                  0)));
1222                 neigh->nud_state = new;
1223                 notify = 1;
1224         }
1225
1226         if (lladdr != neigh->ha) {
1227                 write_seqlock(&neigh->ha_lock);
1228                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1229                 write_sequnlock(&neigh->ha_lock);
1230                 neigh_update_hhs(neigh);
1231                 if (!(new & NUD_CONNECTED))
1232                         neigh->confirmed = jiffies -
1233                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1234                 notify = 1;
1235         }
1236         if (new == old)
1237                 goto out;
1238         if (new & NUD_CONNECTED)
1239                 neigh_connect(neigh);
1240         else
1241                 neigh_suspect(neigh);
1242         if (!(old & NUD_VALID)) {
1243                 struct sk_buff *skb;
1244
1245                 /* Again: avoid dead loop if something went wrong */
1246
1247                 while (neigh->nud_state & NUD_VALID &&
1248                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1249                         struct dst_entry *dst = skb_dst(skb);
1250                         struct neighbour *n2, *n1 = neigh;
1251                         write_unlock_bh(&neigh->lock);
1252
1253                         rcu_read_lock();
1254
1255                         /* Why not just use 'neigh' as-is?  The problem is that
1256                          * things such as shaper, eql, and sch_teql can end up
1257                          * using alternative, different, neigh objects to output
1258                          * the packet in the output path.  So what we need to do
1259                          * here is re-lookup the top-level neigh in the path so
1260                          * we can reinject the packet there.
1261                          */
1262                         n2 = NULL;
1263                         if (dst) {
1264                                 n2 = dst_neigh_lookup_skb(dst, skb);
1265                                 if (n2)
1266                                         n1 = n2;
1267                         }
1268                         n1->output(n1, skb);
1269                         if (n2)
1270                                 neigh_release(n2);
1271                         rcu_read_unlock();
1272
1273                         write_lock_bh(&neigh->lock);
1274                 }
1275                 __skb_queue_purge(&neigh->arp_queue);
1276                 neigh->arp_queue_len_bytes = 0;
1277         }
1278 out:
1279         if (update_isrouter) {
1280                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1281                         (neigh->flags | NTF_ROUTER) :
1282                         (neigh->flags & ~NTF_ROUTER);
1283         }
1284         write_unlock_bh(&neigh->lock);
1285
1286         if (notify)
1287                 neigh_update_notify(neigh, nlmsg_pid);
1288
1289         return err;
1290 }
1291 EXPORT_SYMBOL(neigh_update);
1292
1293 /* Update the neigh to listen temporarily for probe responses, even if it is
1294  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1295  */
1296 void __neigh_set_probe_once(struct neighbour *neigh)
1297 {
1298         if (neigh->dead)
1299                 return;
1300         neigh->updated = jiffies;
1301         if (!(neigh->nud_state & NUD_FAILED))
1302                 return;
1303         neigh->nud_state = NUD_INCOMPLETE;
1304         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1305         neigh_add_timer(neigh,
1306                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1307 }
1308 EXPORT_SYMBOL(__neigh_set_probe_once);
1309
1310 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1311                                  u8 *lladdr, void *saddr,
1312                                  struct net_device *dev)
1313 {
1314         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1315                                                  lladdr || !dev->addr_len);
1316         if (neigh)
1317                 neigh_update(neigh, lladdr, NUD_STALE,
1318                              NEIGH_UPDATE_F_OVERRIDE, 0);
1319         return neigh;
1320 }
1321 EXPORT_SYMBOL(neigh_event_ns);
1322
1323 /* called with read_lock_bh(&n->lock); */
1324 static void neigh_hh_init(struct neighbour *n)
1325 {
1326         struct net_device *dev = n->dev;
1327         __be16 prot = n->tbl->protocol;
1328         struct hh_cache *hh = &n->hh;
1329
1330         write_lock_bh(&n->lock);
1331
1332         /* Only one thread can come in here and initialize the
1333          * hh_cache entry.
1334          */
1335         if (!hh->hh_len)
1336                 dev->header_ops->cache(n, hh, prot);
1337
1338         write_unlock_bh(&n->lock);
1339 }
1340
1341 /* Slow and careful. */
1342
1343 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1344 {
1345         int rc = 0;
1346
1347         if (!neigh_event_send(neigh, skb)) {
1348                 int err;
1349                 struct net_device *dev = neigh->dev;
1350                 unsigned int seq;
1351
1352                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1353                         neigh_hh_init(neigh);
1354
1355                 do {
1356                         __skb_pull(skb, skb_network_offset(skb));
1357                         seq = read_seqbegin(&neigh->ha_lock);
1358                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1359                                               neigh->ha, NULL, skb->len);
1360                 } while (read_seqretry(&neigh->ha_lock, seq));
1361
1362                 if (err >= 0)
1363                         rc = dev_queue_xmit(skb);
1364                 else
1365                         goto out_kfree_skb;
1366         }
1367 out:
1368         return rc;
1369 out_kfree_skb:
1370         rc = -EINVAL;
1371         kfree_skb(skb);
1372         goto out;
1373 }
1374 EXPORT_SYMBOL(neigh_resolve_output);
1375
1376 /* As fast as possible without hh cache */
1377
1378 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1379 {
1380         struct net_device *dev = neigh->dev;
1381         unsigned int seq;
1382         int err;
1383
1384         do {
1385                 __skb_pull(skb, skb_network_offset(skb));
1386                 seq = read_seqbegin(&neigh->ha_lock);
1387                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1388                                       neigh->ha, NULL, skb->len);
1389         } while (read_seqretry(&neigh->ha_lock, seq));
1390
1391         if (err >= 0)
1392                 err = dev_queue_xmit(skb);
1393         else {
1394                 err = -EINVAL;
1395                 kfree_skb(skb);
1396         }
1397         return err;
1398 }
1399 EXPORT_SYMBOL(neigh_connected_output);
1400
1401 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1402 {
1403         return dev_queue_xmit(skb);
1404 }
1405 EXPORT_SYMBOL(neigh_direct_output);
1406
1407 static void neigh_proxy_process(struct timer_list *t)
1408 {
1409         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1410         long sched_next = 0;
1411         unsigned long now = jiffies;
1412         struct sk_buff *skb, *n;
1413
1414         spin_lock(&tbl->proxy_queue.lock);
1415
1416         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1417                 long tdif = NEIGH_CB(skb)->sched_next - now;
1418
1419                 if (tdif <= 0) {
1420                         struct net_device *dev = skb->dev;
1421
1422                         __skb_unlink(skb, &tbl->proxy_queue);
1423                         if (tbl->proxy_redo && netif_running(dev)) {
1424                                 rcu_read_lock();
1425                                 tbl->proxy_redo(skb);
1426                                 rcu_read_unlock();
1427                         } else {
1428                                 kfree_skb(skb);
1429                         }
1430
1431                         dev_put(dev);
1432                 } else if (!sched_next || tdif < sched_next)
1433                         sched_next = tdif;
1434         }
1435         del_timer(&tbl->proxy_timer);
1436         if (sched_next)
1437                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1438         spin_unlock(&tbl->proxy_queue.lock);
1439 }
1440
1441 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1442                     struct sk_buff *skb)
1443 {
1444         unsigned long now = jiffies;
1445
1446         unsigned long sched_next = now + (prandom_u32() %
1447                                           NEIGH_VAR(p, PROXY_DELAY));
1448
1449         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1450                 kfree_skb(skb);
1451                 return;
1452         }
1453
1454         NEIGH_CB(skb)->sched_next = sched_next;
1455         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1456
1457         spin_lock(&tbl->proxy_queue.lock);
1458         if (del_timer(&tbl->proxy_timer)) {
1459                 if (time_before(tbl->proxy_timer.expires, sched_next))
1460                         sched_next = tbl->proxy_timer.expires;
1461         }
1462         skb_dst_drop(skb);
1463         dev_hold(skb->dev);
1464         __skb_queue_tail(&tbl->proxy_queue, skb);
1465         mod_timer(&tbl->proxy_timer, sched_next);
1466         spin_unlock(&tbl->proxy_queue.lock);
1467 }
1468 EXPORT_SYMBOL(pneigh_enqueue);
1469
1470 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1471                                                       struct net *net, int ifindex)
1472 {
1473         struct neigh_parms *p;
1474
1475         list_for_each_entry(p, &tbl->parms_list, list) {
1476                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1477                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1478                         return p;
1479         }
1480
1481         return NULL;
1482 }
1483
1484 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1485                                       struct neigh_table *tbl)
1486 {
1487         struct neigh_parms *p;
1488         struct net *net = dev_net(dev);
1489         const struct net_device_ops *ops = dev->netdev_ops;
1490
1491         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1492         if (p) {
1493                 p->tbl            = tbl;
1494                 refcount_set(&p->refcnt, 1);
1495                 p->reachable_time =
1496                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1497                 dev_hold(dev);
1498                 p->dev = dev;
1499                 write_pnet(&p->net, net);
1500                 p->sysctl_table = NULL;
1501
1502                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1503                         dev_put(dev);
1504                         kfree(p);
1505                         return NULL;
1506                 }
1507
1508                 write_lock_bh(&tbl->lock);
1509                 list_add(&p->list, &tbl->parms.list);
1510                 write_unlock_bh(&tbl->lock);
1511
1512                 neigh_parms_data_state_cleanall(p);
1513         }
1514         return p;
1515 }
1516 EXPORT_SYMBOL(neigh_parms_alloc);
1517
1518 static void neigh_rcu_free_parms(struct rcu_head *head)
1519 {
1520         struct neigh_parms *parms =
1521                 container_of(head, struct neigh_parms, rcu_head);
1522
1523         neigh_parms_put(parms);
1524 }
1525
1526 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1527 {
1528         if (!parms || parms == &tbl->parms)
1529                 return;
1530         write_lock_bh(&tbl->lock);
1531         list_del(&parms->list);
1532         parms->dead = 1;
1533         write_unlock_bh(&tbl->lock);
1534         if (parms->dev)
1535                 dev_put(parms->dev);
1536         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1537 }
1538 EXPORT_SYMBOL(neigh_parms_release);
1539
1540 static void neigh_parms_destroy(struct neigh_parms *parms)
1541 {
1542         kfree(parms);
1543 }
1544
1545 static struct lock_class_key neigh_table_proxy_queue_class;
1546
1547 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1548
1549 void neigh_table_init(int index, struct neigh_table *tbl)
1550 {
1551         unsigned long now = jiffies;
1552         unsigned long phsize;
1553
1554         INIT_LIST_HEAD(&tbl->parms_list);
1555         list_add(&tbl->parms.list, &tbl->parms_list);
1556         write_pnet(&tbl->parms.net, &init_net);
1557         refcount_set(&tbl->parms.refcnt, 1);
1558         tbl->parms.reachable_time =
1559                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1560
1561         tbl->stats = alloc_percpu(struct neigh_statistics);
1562         if (!tbl->stats)
1563                 panic("cannot create neighbour cache statistics");
1564
1565 #ifdef CONFIG_PROC_FS
1566         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1567                               &neigh_stat_seq_ops, tbl))
1568                 panic("cannot create neighbour proc dir entry");
1569 #endif
1570
1571         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1572
1573         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1574         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1575
1576         if (!tbl->nht || !tbl->phash_buckets)
1577                 panic("cannot allocate neighbour cache hashes");
1578
1579         if (!tbl->entry_size)
1580                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1581                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1582         else
1583                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1584
1585         rwlock_init(&tbl->lock);
1586         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1587         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1588                         tbl->parms.reachable_time);
1589         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1590         skb_queue_head_init_class(&tbl->proxy_queue,
1591                         &neigh_table_proxy_queue_class);
1592
1593         tbl->last_flush = now;
1594         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1595
1596         neigh_tables[index] = tbl;
1597 }
1598 EXPORT_SYMBOL(neigh_table_init);
1599
1600 int neigh_table_clear(int index, struct neigh_table *tbl)
1601 {
1602         neigh_tables[index] = NULL;
1603         /* It is not clean... Fix it to unload IPv6 module safely */
1604         cancel_delayed_work_sync(&tbl->gc_work);
1605         del_timer_sync(&tbl->proxy_timer);
1606         pneigh_queue_purge(&tbl->proxy_queue);
1607         neigh_ifdown(tbl, NULL);
1608         if (atomic_read(&tbl->entries))
1609                 pr_crit("neighbour leakage\n");
1610
1611         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1612                  neigh_hash_free_rcu);
1613         tbl->nht = NULL;
1614
1615         kfree(tbl->phash_buckets);
1616         tbl->phash_buckets = NULL;
1617
1618         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1619
1620         free_percpu(tbl->stats);
1621         tbl->stats = NULL;
1622
1623         return 0;
1624 }
1625 EXPORT_SYMBOL(neigh_table_clear);
1626
1627 static struct neigh_table *neigh_find_table(int family)
1628 {
1629         struct neigh_table *tbl = NULL;
1630
1631         switch (family) {
1632         case AF_INET:
1633                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1634                 break;
1635         case AF_INET6:
1636                 tbl = neigh_tables[NEIGH_ND_TABLE];
1637                 break;
1638         case AF_DECnet:
1639                 tbl = neigh_tables[NEIGH_DN_TABLE];
1640                 break;
1641         }
1642
1643         return tbl;
1644 }
1645
1646 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1647                         struct netlink_ext_ack *extack)
1648 {
1649         struct net *net = sock_net(skb->sk);
1650         struct ndmsg *ndm;
1651         struct nlattr *dst_attr;
1652         struct neigh_table *tbl;
1653         struct neighbour *neigh;
1654         struct net_device *dev = NULL;
1655         int err = -EINVAL;
1656
1657         ASSERT_RTNL();
1658         if (nlmsg_len(nlh) < sizeof(*ndm))
1659                 goto out;
1660
1661         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1662         if (dst_attr == NULL)
1663                 goto out;
1664
1665         ndm = nlmsg_data(nlh);
1666         if (ndm->ndm_ifindex) {
1667                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1668                 if (dev == NULL) {
1669                         err = -ENODEV;
1670                         goto out;
1671                 }
1672         }
1673
1674         tbl = neigh_find_table(ndm->ndm_family);
1675         if (tbl == NULL)
1676                 return -EAFNOSUPPORT;
1677
1678         if (nla_len(dst_attr) < (int)tbl->key_len)
1679                 goto out;
1680
1681         if (ndm->ndm_flags & NTF_PROXY) {
1682                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1683                 goto out;
1684         }
1685
1686         if (dev == NULL)
1687                 goto out;
1688
1689         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1690         if (neigh == NULL) {
1691                 err = -ENOENT;
1692                 goto out;
1693         }
1694
1695         err = neigh_update(neigh, NULL, NUD_FAILED,
1696                            NEIGH_UPDATE_F_OVERRIDE |
1697                            NEIGH_UPDATE_F_ADMIN,
1698                            NETLINK_CB(skb).portid);
1699         write_lock_bh(&tbl->lock);
1700         neigh_release(neigh);
1701         neigh_remove_one(neigh, tbl);
1702         write_unlock_bh(&tbl->lock);
1703
1704 out:
1705         return err;
1706 }
1707
1708 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1709                      struct netlink_ext_ack *extack)
1710 {
1711         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1712         struct net *net = sock_net(skb->sk);
1713         struct ndmsg *ndm;
1714         struct nlattr *tb[NDA_MAX+1];
1715         struct neigh_table *tbl;
1716         struct net_device *dev = NULL;
1717         struct neighbour *neigh;
1718         void *dst, *lladdr;
1719         int err;
1720
1721         ASSERT_RTNL();
1722         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1723         if (err < 0)
1724                 goto out;
1725
1726         err = -EINVAL;
1727         if (tb[NDA_DST] == NULL)
1728                 goto out;
1729
1730         ndm = nlmsg_data(nlh);
1731         if (ndm->ndm_ifindex) {
1732                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1733                 if (dev == NULL) {
1734                         err = -ENODEV;
1735                         goto out;
1736                 }
1737
1738                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1739                         goto out;
1740         }
1741
1742         tbl = neigh_find_table(ndm->ndm_family);
1743         if (tbl == NULL)
1744                 return -EAFNOSUPPORT;
1745
1746         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1747                 goto out;
1748         dst = nla_data(tb[NDA_DST]);
1749         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1750
1751         if (ndm->ndm_flags & NTF_PROXY) {
1752                 struct pneigh_entry *pn;
1753
1754                 err = -ENOBUFS;
1755                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1756                 if (pn) {
1757                         pn->flags = ndm->ndm_flags;
1758                         err = 0;
1759                 }
1760                 goto out;
1761         }
1762
1763         if (dev == NULL)
1764                 goto out;
1765
1766         neigh = neigh_lookup(tbl, dst, dev);
1767         if (neigh == NULL) {
1768                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1769                         err = -ENOENT;
1770                         goto out;
1771                 }
1772
1773                 neigh = __neigh_lookup_errno(tbl, dst, dev);
1774                 if (IS_ERR(neigh)) {
1775                         err = PTR_ERR(neigh);
1776                         goto out;
1777                 }
1778         } else {
1779                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1780                         err = -EEXIST;
1781                         neigh_release(neigh);
1782                         goto out;
1783                 }
1784
1785                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1786                         flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1787         }
1788
1789         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1790                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1791
1792         if (ndm->ndm_flags & NTF_USE) {
1793                 neigh_event_send(neigh, NULL);
1794                 err = 0;
1795         } else
1796                 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1797                                    NETLINK_CB(skb).portid);
1798         neigh_release(neigh);
1799
1800 out:
1801         return err;
1802 }
1803
1804 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1805 {
1806         struct nlattr *nest;
1807
1808         nest = nla_nest_start(skb, NDTA_PARMS);
1809         if (nest == NULL)
1810                 return -ENOBUFS;
1811
1812         if ((parms->dev &&
1813              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1814             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1815             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1816                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1817             /* approximative value for deprecated QUEUE_LEN (in packets) */
1818             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1819                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1820             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1821             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1822             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1823                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1824             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1825                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1826             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1827                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1828             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1829                           NDTPA_PAD) ||
1830             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1831                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1832             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1833                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1834             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1835                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1836             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1837                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1838             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1839                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1840             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1841                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1842             nla_put_msecs(skb, NDTPA_LOCKTIME,
1843                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1844                 goto nla_put_failure;
1845         return nla_nest_end(skb, nest);
1846
1847 nla_put_failure:
1848         nla_nest_cancel(skb, nest);
1849         return -EMSGSIZE;
1850 }
1851
1852 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1853                               u32 pid, u32 seq, int type, int flags)
1854 {
1855         struct nlmsghdr *nlh;
1856         struct ndtmsg *ndtmsg;
1857
1858         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1859         if (nlh == NULL)
1860                 return -EMSGSIZE;
1861
1862         ndtmsg = nlmsg_data(nlh);
1863
1864         read_lock_bh(&tbl->lock);
1865         ndtmsg->ndtm_family = tbl->family;
1866         ndtmsg->ndtm_pad1   = 0;
1867         ndtmsg->ndtm_pad2   = 0;
1868
1869         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1870             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1871             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1872             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1873             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1874                 goto nla_put_failure;
1875         {
1876                 unsigned long now = jiffies;
1877                 unsigned int flush_delta = now - tbl->last_flush;
1878                 unsigned int rand_delta = now - tbl->last_rand;
1879                 struct neigh_hash_table *nht;
1880                 struct ndt_config ndc = {
1881                         .ndtc_key_len           = tbl->key_len,
1882                         .ndtc_entry_size        = tbl->entry_size,
1883                         .ndtc_entries           = atomic_read(&tbl->entries),
1884                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1885                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1886                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1887                 };
1888
1889                 rcu_read_lock_bh();
1890                 nht = rcu_dereference_bh(tbl->nht);
1891                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1892                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1893                 rcu_read_unlock_bh();
1894
1895                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1896                         goto nla_put_failure;
1897         }
1898
1899         {
1900                 int cpu;
1901                 struct ndt_stats ndst;
1902
1903                 memset(&ndst, 0, sizeof(ndst));
1904
1905                 for_each_possible_cpu(cpu) {
1906                         struct neigh_statistics *st;
1907
1908                         st = per_cpu_ptr(tbl->stats, cpu);
1909                         ndst.ndts_allocs                += st->allocs;
1910                         ndst.ndts_destroys              += st->destroys;
1911                         ndst.ndts_hash_grows            += st->hash_grows;
1912                         ndst.ndts_res_failed            += st->res_failed;
1913                         ndst.ndts_lookups               += st->lookups;
1914                         ndst.ndts_hits                  += st->hits;
1915                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1916                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1917                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1918                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1919                         ndst.ndts_table_fulls           += st->table_fulls;
1920                 }
1921
1922                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1923                                   NDTA_PAD))
1924                         goto nla_put_failure;
1925         }
1926
1927         BUG_ON(tbl->parms.dev);
1928         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1929                 goto nla_put_failure;
1930
1931         read_unlock_bh(&tbl->lock);
1932         nlmsg_end(skb, nlh);
1933         return 0;
1934
1935 nla_put_failure:
1936         read_unlock_bh(&tbl->lock);
1937         nlmsg_cancel(skb, nlh);
1938         return -EMSGSIZE;
1939 }
1940
1941 static int neightbl_fill_param_info(struct sk_buff *skb,
1942                                     struct neigh_table *tbl,
1943                                     struct neigh_parms *parms,
1944                                     u32 pid, u32 seq, int type,
1945                                     unsigned int flags)
1946 {
1947         struct ndtmsg *ndtmsg;
1948         struct nlmsghdr *nlh;
1949
1950         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1951         if (nlh == NULL)
1952                 return -EMSGSIZE;
1953
1954         ndtmsg = nlmsg_data(nlh);
1955
1956         read_lock_bh(&tbl->lock);
1957         ndtmsg->ndtm_family = tbl->family;
1958         ndtmsg->ndtm_pad1   = 0;
1959         ndtmsg->ndtm_pad2   = 0;
1960
1961         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1962             neightbl_fill_parms(skb, parms) < 0)
1963                 goto errout;
1964
1965         read_unlock_bh(&tbl->lock);
1966         nlmsg_end(skb, nlh);
1967         return 0;
1968 errout:
1969         read_unlock_bh(&tbl->lock);
1970         nlmsg_cancel(skb, nlh);
1971         return -EMSGSIZE;
1972 }
1973
1974 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1975         [NDTA_NAME]             = { .type = NLA_STRING },
1976         [NDTA_THRESH1]          = { .type = NLA_U32 },
1977         [NDTA_THRESH2]          = { .type = NLA_U32 },
1978         [NDTA_THRESH3]          = { .type = NLA_U32 },
1979         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1980         [NDTA_PARMS]            = { .type = NLA_NESTED },
1981 };
1982
1983 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1984         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1985         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1986         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1987         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1988         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1989         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1990         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
1991         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1992         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1993         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1994         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1995         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1996         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1997         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1998 };
1999
2000 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2001                         struct netlink_ext_ack *extack)
2002 {
2003         struct net *net = sock_net(skb->sk);
2004         struct neigh_table *tbl;
2005         struct ndtmsg *ndtmsg;
2006         struct nlattr *tb[NDTA_MAX+1];
2007         bool found = false;
2008         int err, tidx;
2009
2010         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2011                           nl_neightbl_policy, extack);
2012         if (err < 0)
2013                 goto errout;
2014
2015         if (tb[NDTA_NAME] == NULL) {
2016                 err = -EINVAL;
2017                 goto errout;
2018         }
2019
2020         ndtmsg = nlmsg_data(nlh);
2021
2022         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2023                 tbl = neigh_tables[tidx];
2024                 if (!tbl)
2025                         continue;
2026                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2027                         continue;
2028                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2029                         found = true;
2030                         break;
2031                 }
2032         }
2033
2034         if (!found)
2035                 return -ENOENT;
2036
2037         /*
2038          * We acquire tbl->lock to be nice to the periodic timers and
2039          * make sure they always see a consistent set of values.
2040          */
2041         write_lock_bh(&tbl->lock);
2042
2043         if (tb[NDTA_PARMS]) {
2044                 struct nlattr *tbp[NDTPA_MAX+1];
2045                 struct neigh_parms *p;
2046                 int i, ifindex = 0;
2047
2048                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2049                                        nl_ntbl_parm_policy, extack);
2050                 if (err < 0)
2051                         goto errout_tbl_lock;
2052
2053                 if (tbp[NDTPA_IFINDEX])
2054                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2055
2056                 p = lookup_neigh_parms(tbl, net, ifindex);
2057                 if (p == NULL) {
2058                         err = -ENOENT;
2059                         goto errout_tbl_lock;
2060                 }
2061
2062                 for (i = 1; i <= NDTPA_MAX; i++) {
2063                         if (tbp[i] == NULL)
2064                                 continue;
2065
2066                         switch (i) {
2067                         case NDTPA_QUEUE_LEN:
2068                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2069                                               nla_get_u32(tbp[i]) *
2070                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2071                                 break;
2072                         case NDTPA_QUEUE_LENBYTES:
2073                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2074                                               nla_get_u32(tbp[i]));
2075                                 break;
2076                         case NDTPA_PROXY_QLEN:
2077                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2078                                               nla_get_u32(tbp[i]));
2079                                 break;
2080                         case NDTPA_APP_PROBES:
2081                                 NEIGH_VAR_SET(p, APP_PROBES,
2082                                               nla_get_u32(tbp[i]));
2083                                 break;
2084                         case NDTPA_UCAST_PROBES:
2085                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2086                                               nla_get_u32(tbp[i]));
2087                                 break;
2088                         case NDTPA_MCAST_PROBES:
2089                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2090                                               nla_get_u32(tbp[i]));
2091                                 break;
2092                         case NDTPA_MCAST_REPROBES:
2093                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2094                                               nla_get_u32(tbp[i]));
2095                                 break;
2096                         case NDTPA_BASE_REACHABLE_TIME:
2097                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2098                                               nla_get_msecs(tbp[i]));
2099                                 /* update reachable_time as well, otherwise, the change will
2100                                  * only be effective after the next time neigh_periodic_work
2101                                  * decides to recompute it (can be multiple minutes)
2102                                  */
2103                                 p->reachable_time =
2104                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2105                                 break;
2106                         case NDTPA_GC_STALETIME:
2107                                 NEIGH_VAR_SET(p, GC_STALETIME,
2108                                               nla_get_msecs(tbp[i]));
2109                                 break;
2110                         case NDTPA_DELAY_PROBE_TIME:
2111                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2112                                               nla_get_msecs(tbp[i]));
2113                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2114                                 break;
2115                         case NDTPA_RETRANS_TIME:
2116                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2117                                               nla_get_msecs(tbp[i]));
2118                                 break;
2119                         case NDTPA_ANYCAST_DELAY:
2120                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2121                                               nla_get_msecs(tbp[i]));
2122                                 break;
2123                         case NDTPA_PROXY_DELAY:
2124                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2125                                               nla_get_msecs(tbp[i]));
2126                                 break;
2127                         case NDTPA_LOCKTIME:
2128                                 NEIGH_VAR_SET(p, LOCKTIME,
2129                                               nla_get_msecs(tbp[i]));
2130                                 break;
2131                         }
2132                 }
2133         }
2134
2135         err = -ENOENT;
2136         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2137              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2138             !net_eq(net, &init_net))
2139                 goto errout_tbl_lock;
2140
2141         if (tb[NDTA_THRESH1])
2142                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2143
2144         if (tb[NDTA_THRESH2])
2145                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2146
2147         if (tb[NDTA_THRESH3])
2148                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2149
2150         if (tb[NDTA_GC_INTERVAL])
2151                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2152
2153         err = 0;
2154
2155 errout_tbl_lock:
2156         write_unlock_bh(&tbl->lock);
2157 errout:
2158         return err;
2159 }
2160
2161 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2162 {
2163         struct net *net = sock_net(skb->sk);
2164         int family, tidx, nidx = 0;
2165         int tbl_skip = cb->args[0];
2166         int neigh_skip = cb->args[1];
2167         struct neigh_table *tbl;
2168
2169         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2170
2171         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2172                 struct neigh_parms *p;
2173
2174                 tbl = neigh_tables[tidx];
2175                 if (!tbl)
2176                         continue;
2177
2178                 if (tidx < tbl_skip || (family && tbl->family != family))
2179                         continue;
2180
2181                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2182                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2183                                        NLM_F_MULTI) < 0)
2184                         break;
2185
2186                 nidx = 0;
2187                 p = list_next_entry(&tbl->parms, list);
2188                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2189                         if (!net_eq(neigh_parms_net(p), net))
2190                                 continue;
2191
2192                         if (nidx < neigh_skip)
2193                                 goto next;
2194
2195                         if (neightbl_fill_param_info(skb, tbl, p,
2196                                                      NETLINK_CB(cb->skb).portid,
2197                                                      cb->nlh->nlmsg_seq,
2198                                                      RTM_NEWNEIGHTBL,
2199                                                      NLM_F_MULTI) < 0)
2200                                 goto out;
2201                 next:
2202                         nidx++;
2203                 }
2204
2205                 neigh_skip = 0;
2206         }
2207 out:
2208         cb->args[0] = tidx;
2209         cb->args[1] = nidx;
2210
2211         return skb->len;
2212 }
2213
2214 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2215                            u32 pid, u32 seq, int type, unsigned int flags)
2216 {
2217         unsigned long now = jiffies;
2218         struct nda_cacheinfo ci;
2219         struct nlmsghdr *nlh;
2220         struct ndmsg *ndm;
2221
2222         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2223         if (nlh == NULL)
2224                 return -EMSGSIZE;
2225
2226         ndm = nlmsg_data(nlh);
2227         ndm->ndm_family  = neigh->ops->family;
2228         ndm->ndm_pad1    = 0;
2229         ndm->ndm_pad2    = 0;
2230         ndm->ndm_flags   = neigh->flags;
2231         ndm->ndm_type    = neigh->type;
2232         ndm->ndm_ifindex = neigh->dev->ifindex;
2233
2234         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2235                 goto nla_put_failure;
2236
2237         read_lock_bh(&neigh->lock);
2238         ndm->ndm_state   = neigh->nud_state;
2239         if (neigh->nud_state & NUD_VALID) {
2240                 char haddr[MAX_ADDR_LEN];
2241
2242                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2243                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2244                         read_unlock_bh(&neigh->lock);
2245                         goto nla_put_failure;
2246                 }
2247         }
2248
2249         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2250         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2251         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2252         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2253         read_unlock_bh(&neigh->lock);
2254
2255         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2256             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2257                 goto nla_put_failure;
2258
2259         nlmsg_end(skb, nlh);
2260         return 0;
2261
2262 nla_put_failure:
2263         nlmsg_cancel(skb, nlh);
2264         return -EMSGSIZE;
2265 }
2266
2267 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2268                             u32 pid, u32 seq, int type, unsigned int flags,
2269                             struct neigh_table *tbl)
2270 {
2271         struct nlmsghdr *nlh;
2272         struct ndmsg *ndm;
2273
2274         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2275         if (nlh == NULL)
2276                 return -EMSGSIZE;
2277
2278         ndm = nlmsg_data(nlh);
2279         ndm->ndm_family  = tbl->family;
2280         ndm->ndm_pad1    = 0;
2281         ndm->ndm_pad2    = 0;
2282         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2283         ndm->ndm_type    = RTN_UNICAST;
2284         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2285         ndm->ndm_state   = NUD_NONE;
2286
2287         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2288                 goto nla_put_failure;
2289
2290         nlmsg_end(skb, nlh);
2291         return 0;
2292
2293 nla_put_failure:
2294         nlmsg_cancel(skb, nlh);
2295         return -EMSGSIZE;
2296 }
2297
2298 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2299 {
2300         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2301         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2302 }
2303
2304 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2305 {
2306         struct net_device *master;
2307
2308         if (!master_idx)
2309                 return false;
2310
2311         master = netdev_master_upper_dev_get(dev);
2312         if (!master || master->ifindex != master_idx)
2313                 return true;
2314
2315         return false;
2316 }
2317
2318 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2319 {
2320         if (filter_idx && dev->ifindex != filter_idx)
2321                 return true;
2322
2323         return false;
2324 }
2325
2326 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2327                             struct netlink_callback *cb)
2328 {
2329         struct net *net = sock_net(skb->sk);
2330         const struct nlmsghdr *nlh = cb->nlh;
2331         struct nlattr *tb[NDA_MAX + 1];
2332         struct neighbour *n;
2333         int rc, h, s_h = cb->args[1];
2334         int idx, s_idx = idx = cb->args[2];
2335         struct neigh_hash_table *nht;
2336         int filter_master_idx = 0, filter_idx = 0;
2337         unsigned int flags = NLM_F_MULTI;
2338         int err;
2339
2340         err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, NULL);
2341         if (!err) {
2342                 if (tb[NDA_IFINDEX]) {
2343                         if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
2344                                 return -EINVAL;
2345                         filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
2346                 }
2347                 if (tb[NDA_MASTER]) {
2348                         if (nla_len(tb[NDA_MASTER]) != sizeof(u32))
2349                                 return -EINVAL;
2350                         filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
2351                 }
2352                 if (filter_idx || filter_master_idx)
2353                         flags |= NLM_F_DUMP_FILTERED;
2354         }
2355
2356         rcu_read_lock_bh();
2357         nht = rcu_dereference_bh(tbl->nht);
2358
2359         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2360                 if (h > s_h)
2361                         s_idx = 0;
2362                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2363                      n != NULL;
2364                      n = rcu_dereference_bh(n->next)) {
2365                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2366                                 goto next;
2367                         if (neigh_ifindex_filtered(n->dev, filter_idx) ||
2368                             neigh_master_filtered(n->dev, filter_master_idx))
2369                                 goto next;
2370                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2371                                             cb->nlh->nlmsg_seq,
2372                                             RTM_NEWNEIGH,
2373                                             flags) < 0) {
2374                                 rc = -1;
2375                                 goto out;
2376                         }
2377 next:
2378                         idx++;
2379                 }
2380         }
2381         rc = skb->len;
2382 out:
2383         rcu_read_unlock_bh();
2384         cb->args[1] = h;
2385         cb->args[2] = idx;
2386         return rc;
2387 }
2388
2389 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2390                              struct netlink_callback *cb)
2391 {
2392         struct pneigh_entry *n;
2393         struct net *net = sock_net(skb->sk);
2394         int rc, h, s_h = cb->args[3];
2395         int idx, s_idx = idx = cb->args[4];
2396
2397         read_lock_bh(&tbl->lock);
2398
2399         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2400                 if (h > s_h)
2401                         s_idx = 0;
2402                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2403                         if (idx < s_idx || pneigh_net(n) != net)
2404                                 goto next;
2405                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2406                                             cb->nlh->nlmsg_seq,
2407                                             RTM_NEWNEIGH,
2408                                             NLM_F_MULTI, tbl) < 0) {
2409                                 read_unlock_bh(&tbl->lock);
2410                                 rc = -1;
2411                                 goto out;
2412                         }
2413                 next:
2414                         idx++;
2415                 }
2416         }
2417
2418         read_unlock_bh(&tbl->lock);
2419         rc = skb->len;
2420 out:
2421         cb->args[3] = h;
2422         cb->args[4] = idx;
2423         return rc;
2424
2425 }
2426
2427 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2428 {
2429         struct neigh_table *tbl;
2430         int t, family, s_t;
2431         int proxy = 0;
2432         int err;
2433
2434         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2435
2436         /* check for full ndmsg structure presence, family member is
2437          * the same for both structures
2438          */
2439         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2440             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2441                 proxy = 1;
2442
2443         s_t = cb->args[0];
2444
2445         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2446                 tbl = neigh_tables[t];
2447
2448                 if (!tbl)
2449                         continue;
2450                 if (t < s_t || (family && tbl->family != family))
2451                         continue;
2452                 if (t > s_t)
2453                         memset(&cb->args[1], 0, sizeof(cb->args) -
2454                                                 sizeof(cb->args[0]));
2455                 if (proxy)
2456                         err = pneigh_dump_table(tbl, skb, cb);
2457                 else
2458                         err = neigh_dump_table(tbl, skb, cb);
2459                 if (err < 0)
2460                         break;
2461         }
2462
2463         cb->args[0] = t;
2464         return skb->len;
2465 }
2466
2467 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2468 {
2469         int chain;
2470         struct neigh_hash_table *nht;
2471
2472         rcu_read_lock_bh();
2473         nht = rcu_dereference_bh(tbl->nht);
2474
2475         read_lock(&tbl->lock); /* avoid resizes */
2476         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2477                 struct neighbour *n;
2478
2479                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2480                      n != NULL;
2481                      n = rcu_dereference_bh(n->next))
2482                         cb(n, cookie);
2483         }
2484         read_unlock(&tbl->lock);
2485         rcu_read_unlock_bh();
2486 }
2487 EXPORT_SYMBOL(neigh_for_each);
2488
2489 /* The tbl->lock must be held as a writer and BH disabled. */
2490 void __neigh_for_each_release(struct neigh_table *tbl,
2491                               int (*cb)(struct neighbour *))
2492 {
2493         int chain;
2494         struct neigh_hash_table *nht;
2495
2496         nht = rcu_dereference_protected(tbl->nht,
2497                                         lockdep_is_held(&tbl->lock));
2498         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2499                 struct neighbour *n;
2500                 struct neighbour __rcu **np;
2501
2502                 np = &nht->hash_buckets[chain];
2503                 while ((n = rcu_dereference_protected(*np,
2504                                         lockdep_is_held(&tbl->lock))) != NULL) {
2505                         int release;
2506
2507                         write_lock(&n->lock);
2508                         release = cb(n);
2509                         if (release) {
2510                                 rcu_assign_pointer(*np,
2511                                         rcu_dereference_protected(n->next,
2512                                                 lockdep_is_held(&tbl->lock)));
2513                                 n->dead = 1;
2514                         } else
2515                                 np = &n->next;
2516                         write_unlock(&n->lock);
2517                         if (release)
2518                                 neigh_cleanup_and_release(n);
2519                 }
2520         }
2521 }
2522 EXPORT_SYMBOL(__neigh_for_each_release);
2523
2524 int neigh_xmit(int index, struct net_device *dev,
2525                const void *addr, struct sk_buff *skb)
2526 {
2527         int err = -EAFNOSUPPORT;
2528         if (likely(index < NEIGH_NR_TABLES)) {
2529                 struct neigh_table *tbl;
2530                 struct neighbour *neigh;
2531
2532                 tbl = neigh_tables[index];
2533                 if (!tbl)
2534                         goto out;
2535                 rcu_read_lock_bh();
2536                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2537                 if (!neigh)
2538                         neigh = __neigh_create(tbl, addr, dev, false);
2539                 err = PTR_ERR(neigh);
2540                 if (IS_ERR(neigh)) {
2541                         rcu_read_unlock_bh();
2542                         goto out_kfree_skb;
2543                 }
2544                 err = neigh->output(neigh, skb);
2545                 rcu_read_unlock_bh();
2546         }
2547         else if (index == NEIGH_LINK_TABLE) {
2548                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2549                                       addr, NULL, skb->len);
2550                 if (err < 0)
2551                         goto out_kfree_skb;
2552                 err = dev_queue_xmit(skb);
2553         }
2554 out:
2555         return err;
2556 out_kfree_skb:
2557         kfree_skb(skb);
2558         goto out;
2559 }
2560 EXPORT_SYMBOL(neigh_xmit);
2561
2562 #ifdef CONFIG_PROC_FS
2563
2564 static struct neighbour *neigh_get_first(struct seq_file *seq)
2565 {
2566         struct neigh_seq_state *state = seq->private;
2567         struct net *net = seq_file_net(seq);
2568         struct neigh_hash_table *nht = state->nht;
2569         struct neighbour *n = NULL;
2570         int bucket = state->bucket;
2571
2572         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2573         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2574                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2575
2576                 while (n) {
2577                         if (!net_eq(dev_net(n->dev), net))
2578                                 goto next;
2579                         if (state->neigh_sub_iter) {
2580                                 loff_t fakep = 0;
2581                                 void *v;
2582
2583                                 v = state->neigh_sub_iter(state, n, &fakep);
2584                                 if (!v)
2585                                         goto next;
2586                         }
2587                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2588                                 break;
2589                         if (n->nud_state & ~NUD_NOARP)
2590                                 break;
2591 next:
2592                         n = rcu_dereference_bh(n->next);
2593                 }
2594
2595                 if (n)
2596                         break;
2597         }
2598         state->bucket = bucket;
2599
2600         return n;
2601 }
2602
2603 static struct neighbour *neigh_get_next(struct seq_file *seq,
2604                                         struct neighbour *n,
2605                                         loff_t *pos)
2606 {
2607         struct neigh_seq_state *state = seq->private;
2608         struct net *net = seq_file_net(seq);
2609         struct neigh_hash_table *nht = state->nht;
2610
2611         if (state->neigh_sub_iter) {
2612                 void *v = state->neigh_sub_iter(state, n, pos);
2613                 if (v)
2614                         return n;
2615         }
2616         n = rcu_dereference_bh(n->next);
2617
2618         while (1) {
2619                 while (n) {
2620                         if (!net_eq(dev_net(n->dev), net))
2621                                 goto next;
2622                         if (state->neigh_sub_iter) {
2623                                 void *v = state->neigh_sub_iter(state, n, pos);
2624                                 if (v)
2625                                         return n;
2626                                 goto next;
2627                         }
2628                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2629                                 break;
2630
2631                         if (n->nud_state & ~NUD_NOARP)
2632                                 break;
2633 next:
2634                         n = rcu_dereference_bh(n->next);
2635                 }
2636
2637                 if (n)
2638                         break;
2639
2640                 if (++state->bucket >= (1 << nht->hash_shift))
2641                         break;
2642
2643                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2644         }
2645
2646         if (n && pos)
2647                 --(*pos);
2648         return n;
2649 }
2650
2651 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2652 {
2653         struct neighbour *n = neigh_get_first(seq);
2654
2655         if (n) {
2656                 --(*pos);
2657                 while (*pos) {
2658                         n = neigh_get_next(seq, n, pos);
2659                         if (!n)
2660                                 break;
2661                 }
2662         }
2663         return *pos ? NULL : n;
2664 }
2665
2666 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2667 {
2668         struct neigh_seq_state *state = seq->private;
2669         struct net *net = seq_file_net(seq);
2670         struct neigh_table *tbl = state->tbl;
2671         struct pneigh_entry *pn = NULL;
2672         int bucket = state->bucket;
2673
2674         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2675         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2676                 pn = tbl->phash_buckets[bucket];
2677                 while (pn && !net_eq(pneigh_net(pn), net))
2678                         pn = pn->next;
2679                 if (pn)
2680                         break;
2681         }
2682         state->bucket = bucket;
2683
2684         return pn;
2685 }
2686
2687 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2688                                             struct pneigh_entry *pn,
2689                                             loff_t *pos)
2690 {
2691         struct neigh_seq_state *state = seq->private;
2692         struct net *net = seq_file_net(seq);
2693         struct neigh_table *tbl = state->tbl;
2694
2695         do {
2696                 pn = pn->next;
2697         } while (pn && !net_eq(pneigh_net(pn), net));
2698
2699         while (!pn) {
2700                 if (++state->bucket > PNEIGH_HASHMASK)
2701                         break;
2702                 pn = tbl->phash_buckets[state->bucket];
2703                 while (pn && !net_eq(pneigh_net(pn), net))
2704                         pn = pn->next;
2705                 if (pn)
2706                         break;
2707         }
2708
2709         if (pn && pos)
2710                 --(*pos);
2711
2712         return pn;
2713 }
2714
2715 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2716 {
2717         struct pneigh_entry *pn = pneigh_get_first(seq);
2718
2719         if (pn) {
2720                 --(*pos);
2721                 while (*pos) {
2722                         pn = pneigh_get_next(seq, pn, pos);
2723                         if (!pn)
2724                                 break;
2725                 }
2726         }
2727         return *pos ? NULL : pn;
2728 }
2729
2730 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2731 {
2732         struct neigh_seq_state *state = seq->private;
2733         void *rc;
2734         loff_t idxpos = *pos;
2735
2736         rc = neigh_get_idx(seq, &idxpos);
2737         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2738                 rc = pneigh_get_idx(seq, &idxpos);
2739
2740         return rc;
2741 }
2742
2743 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2744         __acquires(rcu_bh)
2745 {
2746         struct neigh_seq_state *state = seq->private;
2747
2748         state->tbl = tbl;
2749         state->bucket = 0;
2750         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2751
2752         rcu_read_lock_bh();
2753         state->nht = rcu_dereference_bh(tbl->nht);
2754
2755         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2756 }
2757 EXPORT_SYMBOL(neigh_seq_start);
2758
2759 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2760 {
2761         struct neigh_seq_state *state;
2762         void *rc;
2763
2764         if (v == SEQ_START_TOKEN) {
2765                 rc = neigh_get_first(seq);
2766                 goto out;
2767         }
2768
2769         state = seq->private;
2770         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2771                 rc = neigh_get_next(seq, v, NULL);
2772                 if (rc)
2773                         goto out;
2774                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2775                         rc = pneigh_get_first(seq);
2776         } else {
2777                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2778                 rc = pneigh_get_next(seq, v, NULL);
2779         }
2780 out:
2781         ++(*pos);
2782         return rc;
2783 }
2784 EXPORT_SYMBOL(neigh_seq_next);
2785
2786 void neigh_seq_stop(struct seq_file *seq, void *v)
2787         __releases(rcu_bh)
2788 {
2789         rcu_read_unlock_bh();
2790 }
2791 EXPORT_SYMBOL(neigh_seq_stop);
2792
2793 /* statistics via seq_file */
2794
2795 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2796 {
2797         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2798         int cpu;
2799
2800         if (*pos == 0)
2801                 return SEQ_START_TOKEN;
2802
2803         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2804                 if (!cpu_possible(cpu))
2805                         continue;
2806                 *pos = cpu+1;
2807                 return per_cpu_ptr(tbl->stats, cpu);
2808         }
2809         return NULL;
2810 }
2811
2812 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2813 {
2814         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2815         int cpu;
2816
2817         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2818                 if (!cpu_possible(cpu))
2819                         continue;
2820                 *pos = cpu+1;
2821                 return per_cpu_ptr(tbl->stats, cpu);
2822         }
2823         return NULL;
2824 }
2825
2826 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2827 {
2828
2829 }
2830
2831 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2832 {
2833         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2834         struct neigh_statistics *st = v;
2835
2836         if (v == SEQ_START_TOKEN) {
2837                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2838                 return 0;
2839         }
2840
2841         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2842                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2843                    atomic_read(&tbl->entries),
2844
2845                    st->allocs,
2846                    st->destroys,
2847                    st->hash_grows,
2848
2849                    st->lookups,
2850                    st->hits,
2851
2852                    st->res_failed,
2853
2854                    st->rcv_probes_mcast,
2855                    st->rcv_probes_ucast,
2856
2857                    st->periodic_gc_runs,
2858                    st->forced_gc_runs,
2859                    st->unres_discards,
2860                    st->table_fulls
2861                    );
2862
2863         return 0;
2864 }
2865
2866 static const struct seq_operations neigh_stat_seq_ops = {
2867         .start  = neigh_stat_seq_start,
2868         .next   = neigh_stat_seq_next,
2869         .stop   = neigh_stat_seq_stop,
2870         .show   = neigh_stat_seq_show,
2871 };
2872 #endif /* CONFIG_PROC_FS */
2873
2874 static inline size_t neigh_nlmsg_size(void)
2875 {
2876         return NLMSG_ALIGN(sizeof(struct ndmsg))
2877                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2878                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2879                + nla_total_size(sizeof(struct nda_cacheinfo))
2880                + nla_total_size(4); /* NDA_PROBES */
2881 }
2882
2883 static void __neigh_notify(struct neighbour *n, int type, int flags,
2884                            u32 pid)
2885 {
2886         struct net *net = dev_net(n->dev);
2887         struct sk_buff *skb;
2888         int err = -ENOBUFS;
2889
2890         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2891         if (skb == NULL)
2892                 goto errout;
2893
2894         err = neigh_fill_info(skb, n, pid, 0, type, flags);
2895         if (err < 0) {
2896                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2897                 WARN_ON(err == -EMSGSIZE);
2898                 kfree_skb(skb);
2899                 goto errout;
2900         }
2901         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2902         return;
2903 errout:
2904         if (err < 0)
2905                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2906 }
2907
2908 void neigh_app_ns(struct neighbour *n)
2909 {
2910         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
2911 }
2912 EXPORT_SYMBOL(neigh_app_ns);
2913
2914 #ifdef CONFIG_SYSCTL
2915 static int zero;
2916 static int int_max = INT_MAX;
2917 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2918
2919 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2920                            void __user *buffer, size_t *lenp, loff_t *ppos)
2921 {
2922         int size, ret;
2923         struct ctl_table tmp = *ctl;
2924
2925         tmp.extra1 = &zero;
2926         tmp.extra2 = &unres_qlen_max;
2927         tmp.data = &size;
2928
2929         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2930         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2931
2932         if (write && !ret)
2933                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2934         return ret;
2935 }
2936
2937 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2938                                                    int family)
2939 {
2940         switch (family) {
2941         case AF_INET:
2942                 return __in_dev_arp_parms_get_rcu(dev);
2943         case AF_INET6:
2944                 return __in6_dev_nd_parms_get_rcu(dev);
2945         }
2946         return NULL;
2947 }
2948
2949 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2950                                   int index)
2951 {
2952         struct net_device *dev;
2953         int family = neigh_parms_family(p);
2954
2955         rcu_read_lock();
2956         for_each_netdev_rcu(net, dev) {
2957                 struct neigh_parms *dst_p =
2958                                 neigh_get_dev_parms_rcu(dev, family);
2959
2960                 if (dst_p && !test_bit(index, dst_p->data_state))
2961                         dst_p->data[index] = p->data[index];
2962         }
2963         rcu_read_unlock();
2964 }
2965
2966 static void neigh_proc_update(struct ctl_table *ctl, int write)
2967 {
2968         struct net_device *dev = ctl->extra1;
2969         struct neigh_parms *p = ctl->extra2;
2970         struct net *net = neigh_parms_net(p);
2971         int index = (int *) ctl->data - p->data;
2972
2973         if (!write)
2974                 return;
2975
2976         set_bit(index, p->data_state);
2977         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
2978                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2979         if (!dev) /* NULL dev means this is default value */
2980                 neigh_copy_dflt_parms(net, p, index);
2981 }
2982
2983 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2984                                            void __user *buffer,
2985                                            size_t *lenp, loff_t *ppos)
2986 {
2987         struct ctl_table tmp = *ctl;
2988         int ret;
2989
2990         tmp.extra1 = &zero;
2991         tmp.extra2 = &int_max;
2992
2993         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2994         neigh_proc_update(ctl, write);
2995         return ret;
2996 }
2997
2998 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2999                         void __user *buffer, size_t *lenp, loff_t *ppos)
3000 {
3001         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3002
3003         neigh_proc_update(ctl, write);
3004         return ret;
3005 }
3006 EXPORT_SYMBOL(neigh_proc_dointvec);
3007
3008 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3009                                 void __user *buffer,
3010                                 size_t *lenp, loff_t *ppos)
3011 {
3012         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3013
3014         neigh_proc_update(ctl, write);
3015         return ret;
3016 }
3017 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3018
3019 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3020                                               void __user *buffer,
3021                                               size_t *lenp, loff_t *ppos)
3022 {
3023         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3024
3025         neigh_proc_update(ctl, write);
3026         return ret;
3027 }
3028
3029 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3030                                    void __user *buffer,
3031                                    size_t *lenp, loff_t *ppos)
3032 {
3033         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3034
3035         neigh_proc_update(ctl, write);
3036         return ret;
3037 }
3038 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3039
3040 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3041                                           void __user *buffer,
3042                                           size_t *lenp, loff_t *ppos)
3043 {
3044         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3045
3046         neigh_proc_update(ctl, write);
3047         return ret;
3048 }
3049
3050 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3051                                           void __user *buffer,
3052                                           size_t *lenp, loff_t *ppos)
3053 {
3054         struct neigh_parms *p = ctl->extra2;
3055         int ret;
3056
3057         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3058                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3059         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3060                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3061         else
3062                 ret = -1;
3063
3064         if (write && ret == 0) {
3065                 /* update reachable_time as well, otherwise, the change will
3066                  * only be effective after the next time neigh_periodic_work
3067                  * decides to recompute it
3068                  */
3069                 p->reachable_time =
3070                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3071         }
3072         return ret;
3073 }
3074
3075 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3076         (&((struct neigh_parms *) 0)->data[index])
3077
3078 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3079         [NEIGH_VAR_ ## attr] = { \
3080                 .procname       = name, \
3081                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3082                 .maxlen         = sizeof(int), \
3083                 .mode           = mval, \
3084                 .proc_handler   = proc, \
3085         }
3086
3087 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3088         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3089
3090 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3091         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3092
3093 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3094         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3095
3096 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3097         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3098
3099 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3100         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3101
3102 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3103         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3104
3105 static struct neigh_sysctl_table {
3106         struct ctl_table_header *sysctl_header;
3107         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3108 } neigh_sysctl_template __read_mostly = {
3109         .neigh_vars = {
3110                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3111                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3112                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3113                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3114                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3115                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3116                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3117                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3118                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3119                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3120                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3121                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3122                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3123                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3124                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3125                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3126                 [NEIGH_VAR_GC_INTERVAL] = {
3127                         .procname       = "gc_interval",
3128                         .maxlen         = sizeof(int),
3129                         .mode           = 0644,
3130                         .proc_handler   = proc_dointvec_jiffies,
3131                 },
3132                 [NEIGH_VAR_GC_THRESH1] = {
3133                         .procname       = "gc_thresh1",
3134                         .maxlen         = sizeof(int),
3135                         .mode           = 0644,
3136                         .extra1         = &zero,
3137                         .extra2         = &int_max,
3138                         .proc_handler   = proc_dointvec_minmax,
3139                 },
3140                 [NEIGH_VAR_GC_THRESH2] = {
3141                         .procname       = "gc_thresh2",
3142                         .maxlen         = sizeof(int),
3143                         .mode           = 0644,
3144                         .extra1         = &zero,
3145                         .extra2         = &int_max,
3146                         .proc_handler   = proc_dointvec_minmax,
3147                 },
3148                 [NEIGH_VAR_GC_THRESH3] = {
3149                         .procname       = "gc_thresh3",
3150                         .maxlen         = sizeof(int),
3151                         .mode           = 0644,
3152                         .extra1         = &zero,
3153                         .extra2         = &int_max,
3154                         .proc_handler   = proc_dointvec_minmax,
3155                 },
3156                 {},
3157         },
3158 };
3159
3160 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3161                           proc_handler *handler)
3162 {
3163         int i;
3164         struct neigh_sysctl_table *t;
3165         const char *dev_name_source;
3166         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3167         char *p_name;
3168
3169         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3170         if (!t)
3171                 goto err;
3172
3173         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3174                 t->neigh_vars[i].data += (long) p;
3175                 t->neigh_vars[i].extra1 = dev;
3176                 t->neigh_vars[i].extra2 = p;
3177         }
3178
3179         if (dev) {
3180                 dev_name_source = dev->name;
3181                 /* Terminate the table early */
3182                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3183                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3184         } else {
3185                 struct neigh_table *tbl = p->tbl;
3186                 dev_name_source = "default";
3187                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3188                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3189                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3190                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3191         }
3192
3193         if (handler) {
3194                 /* RetransTime */
3195                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3196                 /* ReachableTime */
3197                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3198                 /* RetransTime (in milliseconds)*/
3199                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3200                 /* ReachableTime (in milliseconds) */
3201                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3202         } else {
3203                 /* Those handlers will update p->reachable_time after
3204                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3205                  * applied after the next neighbour update instead of waiting for
3206                  * neigh_periodic_work to update its value (can be multiple minutes)
3207                  * So any handler that replaces them should do this as well
3208                  */
3209                 /* ReachableTime */
3210                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3211                         neigh_proc_base_reachable_time;
3212                 /* ReachableTime (in milliseconds) */
3213                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3214                         neigh_proc_base_reachable_time;
3215         }
3216
3217         /* Don't export sysctls to unprivileged users */
3218         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3219                 t->neigh_vars[0].procname = NULL;
3220
3221         switch (neigh_parms_family(p)) {
3222         case AF_INET:
3223               p_name = "ipv4";
3224               break;
3225         case AF_INET6:
3226               p_name = "ipv6";
3227               break;
3228         default:
3229               BUG();
3230         }
3231
3232         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3233                 p_name, dev_name_source);
3234         t->sysctl_header =
3235                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3236         if (!t->sysctl_header)
3237                 goto free;
3238
3239         p->sysctl_table = t;
3240         return 0;
3241
3242 free:
3243         kfree(t);
3244 err:
3245         return -ENOBUFS;
3246 }
3247 EXPORT_SYMBOL(neigh_sysctl_register);
3248
3249 void neigh_sysctl_unregister(struct neigh_parms *p)
3250 {
3251         if (p->sysctl_table) {
3252                 struct neigh_sysctl_table *t = p->sysctl_table;
3253                 p->sysctl_table = NULL;
3254                 unregister_net_sysctl_table(t->sysctl_header);
3255                 kfree(t);
3256         }
3257 }
3258 EXPORT_SYMBOL(neigh_sysctl_unregister);
3259
3260 #endif  /* CONFIG_SYSCTL */
3261
3262 static int __init neigh_init(void)
3263 {
3264         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3265         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3266         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3267
3268         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3269                       0);
3270         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3271
3272         return 0;
3273 }
3274
3275 subsys_initcall(neigh_init);
3276