2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
29 #include <linux/sysctl.h>
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
36 #include <net/netevent.h>
37 #include <net/netlink.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/random.h>
40 #include <linux/string.h>
41 #include <linux/log2.h>
42 #include <linux/inetdevice.h>
43 #include <net/addrconf.h>
47 #define neigh_dbg(level, fmt, ...) \
49 if (level <= NEIGH_DEBUG) \
50 pr_debug(fmt, ##__VA_ARGS__); \
53 #define PNEIGH_HASHMASK 0xF
55 static void neigh_timer_handler(struct timer_list *t);
56 static void __neigh_notify(struct neighbour *n, int type, int flags,
58 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
59 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
60 struct net_device *dev);
63 static const struct seq_operations neigh_stat_seq_ops;
67 Neighbour hash table buckets are protected with rwlock tbl->lock.
69 - All the scans/updates to hash buckets MUST be made under this lock.
70 - NOTHING clever should be made under this lock: no callbacks
71 to protocol backends, no attempts to send something to network.
72 It will result in deadlocks, if backend/driver wants to use neighbour
74 - If the entry requires some non-trivial actions, increase
75 its reference count and release table lock.
77 Neighbour entries are protected:
78 - with reference count.
79 - with rwlock neigh->lock
81 Reference count prevents destruction.
83 neigh->lock mainly serializes ll address data and its validity state.
84 However, the same lock is used to protect another entry fields:
88 Again, nothing clever shall be made under neigh->lock,
89 the most complicated procedure, which we allow is dev->hard_header.
90 It is supposed, that dev->hard_header is simplistic and does
91 not make callbacks to neighbour tables.
94 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
100 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 if (neigh->parms->neigh_cleanup)
103 neigh->parms->neigh_cleanup(neigh);
105 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
106 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
107 neigh_release(neigh);
111 * It is random distribution in the interval (1/2)*base...(3/2)*base.
112 * It corresponds to default IPv6 settings and is not overridable,
113 * because it is really reasonable choice.
116 unsigned long neigh_rand_reach_time(unsigned long base)
118 return base ? (prandom_u32() % base) + (base >> 1) : 0;
120 EXPORT_SYMBOL(neigh_rand_reach_time);
122 static void neigh_mark_dead(struct neighbour *n)
125 if (!list_empty(&n->gc_list)) {
126 list_del_init(&n->gc_list);
127 atomic_dec(&n->tbl->gc_entries);
131 static void neigh_update_gc_list(struct neighbour *n)
133 bool on_gc_list, exempt_from_gc;
135 write_lock_bh(&n->tbl->lock);
136 write_lock(&n->lock);
138 /* remove from the gc list if new state is permanent or if neighbor
139 * is externally learned; otherwise entry should be on the gc list
141 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142 n->flags & NTF_EXT_LEARNED;
143 on_gc_list = !list_empty(&n->gc_list);
145 if (exempt_from_gc && on_gc_list) {
146 list_del_init(&n->gc_list);
147 atomic_dec(&n->tbl->gc_entries);
148 } else if (!exempt_from_gc && !on_gc_list) {
149 /* add entries to the tail; cleaning removes from the front */
150 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151 atomic_inc(&n->tbl->gc_entries);
154 write_unlock(&n->lock);
155 write_unlock_bh(&n->tbl->lock);
158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
164 if (!(flags & NEIGH_UPDATE_F_ADMIN))
167 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169 if (ndm_flags & NTF_EXT_LEARNED)
170 neigh->flags |= NTF_EXT_LEARNED;
172 neigh->flags &= ~NTF_EXT_LEARNED;
180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
181 struct neigh_table *tbl)
185 write_lock(&n->lock);
186 if (refcount_read(&n->refcnt) == 1) {
187 struct neighbour *neigh;
189 neigh = rcu_dereference_protected(n->next,
190 lockdep_is_held(&tbl->lock));
191 rcu_assign_pointer(*np, neigh);
195 write_unlock(&n->lock);
197 neigh_cleanup_and_release(n);
201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
203 struct neigh_hash_table *nht;
204 void *pkey = ndel->primary_key;
207 struct neighbour __rcu **np;
209 nht = rcu_dereference_protected(tbl->nht,
210 lockdep_is_held(&tbl->lock));
211 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
212 hash_val = hash_val >> (32 - nht->hash_shift);
214 np = &nht->hash_buckets[hash_val];
215 while ((n = rcu_dereference_protected(*np,
216 lockdep_is_held(&tbl->lock)))) {
218 return neigh_del(n, np, tbl);
224 static int neigh_forced_gc(struct neigh_table *tbl)
226 int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
227 unsigned long tref = jiffies - 5 * HZ;
228 struct neighbour *n, *tmp;
231 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
233 write_lock_bh(&tbl->lock);
235 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
236 if (refcount_read(&n->refcnt) == 1) {
239 write_lock(&n->lock);
240 if ((n->nud_state == NUD_FAILED) ||
241 time_after(tref, n->updated))
243 write_unlock(&n->lock);
245 if (remove && neigh_remove_one(n, tbl))
247 if (shrunk >= max_clean)
252 tbl->last_flush = jiffies;
254 write_unlock_bh(&tbl->lock);
259 static void neigh_add_timer(struct neighbour *n, unsigned long when)
262 if (unlikely(mod_timer(&n->timer, when))) {
263 printk("NEIGH: BUG, double timer add, state is %x\n",
269 static int neigh_del_timer(struct neighbour *n)
271 if ((n->nud_state & NUD_IN_TIMER) &&
272 del_timer(&n->timer)) {
279 static void pneigh_queue_purge(struct sk_buff_head *list)
283 while ((skb = skb_dequeue(list)) != NULL) {
289 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
293 struct neigh_hash_table *nht;
295 nht = rcu_dereference_protected(tbl->nht,
296 lockdep_is_held(&tbl->lock));
298 for (i = 0; i < (1 << nht->hash_shift); i++) {
300 struct neighbour __rcu **np = &nht->hash_buckets[i];
302 while ((n = rcu_dereference_protected(*np,
303 lockdep_is_held(&tbl->lock))) != NULL) {
304 if (dev && n->dev != dev) {
308 if (skip_perm && n->nud_state & NUD_PERMANENT) {
312 rcu_assign_pointer(*np,
313 rcu_dereference_protected(n->next,
314 lockdep_is_held(&tbl->lock)));
315 write_lock(&n->lock);
318 if (refcount_read(&n->refcnt) != 1) {
319 /* The most unpleasant situation.
320 We must destroy neighbour entry,
321 but someone still uses it.
323 The destroy will be delayed until
324 the last user releases us, but
325 we must kill timers etc. and move
328 __skb_queue_purge(&n->arp_queue);
329 n->arp_queue_len_bytes = 0;
330 n->output = neigh_blackhole;
331 if (n->nud_state & NUD_VALID)
332 n->nud_state = NUD_NOARP;
334 n->nud_state = NUD_NONE;
335 neigh_dbg(2, "neigh %p is stray\n", n);
337 write_unlock(&n->lock);
338 neigh_cleanup_and_release(n);
343 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
345 write_lock_bh(&tbl->lock);
346 neigh_flush_dev(tbl, dev, false);
347 write_unlock_bh(&tbl->lock);
349 EXPORT_SYMBOL(neigh_changeaddr);
351 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
354 write_lock_bh(&tbl->lock);
355 neigh_flush_dev(tbl, dev, skip_perm);
356 pneigh_ifdown_and_unlock(tbl, dev);
358 del_timer_sync(&tbl->proxy_timer);
359 pneigh_queue_purge(&tbl->proxy_queue);
363 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
365 __neigh_ifdown(tbl, dev, true);
368 EXPORT_SYMBOL(neigh_carrier_down);
370 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
372 __neigh_ifdown(tbl, dev, false);
375 EXPORT_SYMBOL(neigh_ifdown);
377 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
378 struct net_device *dev,
381 struct neighbour *n = NULL;
382 unsigned long now = jiffies;
388 entries = atomic_inc_return(&tbl->gc_entries) - 1;
389 if (entries >= tbl->gc_thresh3 ||
390 (entries >= tbl->gc_thresh2 &&
391 time_after(now, tbl->last_flush + 5 * HZ))) {
392 if (!neigh_forced_gc(tbl) &&
393 entries >= tbl->gc_thresh3) {
394 net_info_ratelimited("%s: neighbor table overflow!\n",
396 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
402 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
406 __skb_queue_head_init(&n->arp_queue);
407 rwlock_init(&n->lock);
408 seqlock_init(&n->ha_lock);
409 n->updated = n->used = now;
410 n->nud_state = NUD_NONE;
411 n->output = neigh_blackhole;
412 seqlock_init(&n->hh.hh_lock);
413 n->parms = neigh_parms_clone(&tbl->parms);
414 timer_setup(&n->timer, neigh_timer_handler, 0);
416 NEIGH_CACHE_STAT_INC(tbl, allocs);
418 refcount_set(&n->refcnt, 1);
420 INIT_LIST_HEAD(&n->gc_list);
422 atomic_inc(&tbl->entries);
428 atomic_dec(&tbl->gc_entries);
432 static void neigh_get_hash_rnd(u32 *x)
434 *x = get_random_u32() | 1;
437 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
439 size_t size = (1 << shift) * sizeof(struct neighbour *);
440 struct neigh_hash_table *ret;
441 struct neighbour __rcu **buckets;
444 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
447 if (size <= PAGE_SIZE) {
448 buckets = kzalloc(size, GFP_ATOMIC);
450 buckets = (struct neighbour __rcu **)
451 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
453 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
459 ret->hash_buckets = buckets;
460 ret->hash_shift = shift;
461 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
462 neigh_get_hash_rnd(&ret->hash_rnd[i]);
466 static void neigh_hash_free_rcu(struct rcu_head *head)
468 struct neigh_hash_table *nht = container_of(head,
469 struct neigh_hash_table,
471 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
472 struct neighbour __rcu **buckets = nht->hash_buckets;
474 if (size <= PAGE_SIZE) {
477 kmemleak_free(buckets);
478 free_pages((unsigned long)buckets, get_order(size));
483 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
484 unsigned long new_shift)
486 unsigned int i, hash;
487 struct neigh_hash_table *new_nht, *old_nht;
489 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
491 old_nht = rcu_dereference_protected(tbl->nht,
492 lockdep_is_held(&tbl->lock));
493 new_nht = neigh_hash_alloc(new_shift);
497 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
498 struct neighbour *n, *next;
500 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
501 lockdep_is_held(&tbl->lock));
504 hash = tbl->hash(n->primary_key, n->dev,
507 hash >>= (32 - new_nht->hash_shift);
508 next = rcu_dereference_protected(n->next,
509 lockdep_is_held(&tbl->lock));
511 rcu_assign_pointer(n->next,
512 rcu_dereference_protected(
513 new_nht->hash_buckets[hash],
514 lockdep_is_held(&tbl->lock)));
515 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
519 rcu_assign_pointer(tbl->nht, new_nht);
520 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
524 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
525 struct net_device *dev)
529 NEIGH_CACHE_STAT_INC(tbl, lookups);
532 n = __neigh_lookup_noref(tbl, pkey, dev);
534 if (!refcount_inc_not_zero(&n->refcnt))
536 NEIGH_CACHE_STAT_INC(tbl, hits);
539 rcu_read_unlock_bh();
542 EXPORT_SYMBOL(neigh_lookup);
544 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
548 unsigned int key_len = tbl->key_len;
550 struct neigh_hash_table *nht;
552 NEIGH_CACHE_STAT_INC(tbl, lookups);
555 nht = rcu_dereference_bh(tbl->nht);
556 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
558 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
560 n = rcu_dereference_bh(n->next)) {
561 if (!memcmp(n->primary_key, pkey, key_len) &&
562 net_eq(dev_net(n->dev), net)) {
563 if (!refcount_inc_not_zero(&n->refcnt))
565 NEIGH_CACHE_STAT_INC(tbl, hits);
570 rcu_read_unlock_bh();
573 EXPORT_SYMBOL(neigh_lookup_nodev);
575 static struct neighbour *___neigh_create(struct neigh_table *tbl,
577 struct net_device *dev,
578 bool exempt_from_gc, bool want_ref)
580 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
582 unsigned int key_len = tbl->key_len;
584 struct neigh_hash_table *nht;
587 rc = ERR_PTR(-ENOBUFS);
591 memcpy(n->primary_key, pkey, key_len);
595 /* Protocol specific setup. */
596 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
598 goto out_neigh_release;
601 if (dev->netdev_ops->ndo_neigh_construct) {
602 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
605 goto out_neigh_release;
609 /* Device specific setup. */
610 if (n->parms->neigh_setup &&
611 (error = n->parms->neigh_setup(n)) < 0) {
613 goto out_neigh_release;
616 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
618 write_lock_bh(&tbl->lock);
619 nht = rcu_dereference_protected(tbl->nht,
620 lockdep_is_held(&tbl->lock));
622 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
623 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
625 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
627 if (n->parms->dead) {
628 rc = ERR_PTR(-EINVAL);
632 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
633 lockdep_is_held(&tbl->lock));
635 n1 = rcu_dereference_protected(n1->next,
636 lockdep_is_held(&tbl->lock))) {
637 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
647 list_add_tail(&n->gc_list, &n->tbl->gc_list);
651 rcu_assign_pointer(n->next,
652 rcu_dereference_protected(nht->hash_buckets[hash_val],
653 lockdep_is_held(&tbl->lock)));
654 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
655 write_unlock_bh(&tbl->lock);
656 neigh_dbg(2, "neigh %p is created\n", n);
661 write_unlock_bh(&tbl->lock);
667 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
668 struct net_device *dev, bool want_ref)
670 return ___neigh_create(tbl, pkey, dev, false, want_ref);
672 EXPORT_SYMBOL(__neigh_create);
674 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
676 u32 hash_val = *(u32 *)(pkey + key_len - 4);
677 hash_val ^= (hash_val >> 16);
678 hash_val ^= hash_val >> 8;
679 hash_val ^= hash_val >> 4;
680 hash_val &= PNEIGH_HASHMASK;
684 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
687 unsigned int key_len,
688 struct net_device *dev)
691 if (!memcmp(n->key, pkey, key_len) &&
692 net_eq(pneigh_net(n), net) &&
693 (n->dev == dev || !n->dev))
700 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
701 struct net *net, const void *pkey, struct net_device *dev)
703 unsigned int key_len = tbl->key_len;
704 u32 hash_val = pneigh_hash(pkey, key_len);
706 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
707 net, pkey, key_len, dev);
709 EXPORT_SYMBOL_GPL(__pneigh_lookup);
711 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
712 struct net *net, const void *pkey,
713 struct net_device *dev, int creat)
715 struct pneigh_entry *n;
716 unsigned int key_len = tbl->key_len;
717 u32 hash_val = pneigh_hash(pkey, key_len);
719 read_lock_bh(&tbl->lock);
720 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
721 net, pkey, key_len, dev);
722 read_unlock_bh(&tbl->lock);
729 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
734 write_pnet(&n->net, net);
735 memcpy(n->key, pkey, key_len);
740 if (tbl->pconstructor && tbl->pconstructor(n)) {
748 write_lock_bh(&tbl->lock);
749 n->next = tbl->phash_buckets[hash_val];
750 tbl->phash_buckets[hash_val] = n;
751 write_unlock_bh(&tbl->lock);
755 EXPORT_SYMBOL(pneigh_lookup);
758 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
759 struct net_device *dev)
761 struct pneigh_entry *n, **np;
762 unsigned int key_len = tbl->key_len;
763 u32 hash_val = pneigh_hash(pkey, key_len);
765 write_lock_bh(&tbl->lock);
766 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
768 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
769 net_eq(pneigh_net(n), net)) {
771 write_unlock_bh(&tbl->lock);
772 if (tbl->pdestructor)
780 write_unlock_bh(&tbl->lock);
784 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
785 struct net_device *dev)
787 struct pneigh_entry *n, **np, *freelist = NULL;
790 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
791 np = &tbl->phash_buckets[h];
792 while ((n = *np) != NULL) {
793 if (!dev || n->dev == dev) {
802 write_unlock_bh(&tbl->lock);
803 while ((n = freelist)) {
806 if (tbl->pdestructor)
815 static void neigh_parms_destroy(struct neigh_parms *parms);
817 static inline void neigh_parms_put(struct neigh_parms *parms)
819 if (refcount_dec_and_test(&parms->refcnt))
820 neigh_parms_destroy(parms);
824 * neighbour must already be out of the table;
827 void neigh_destroy(struct neighbour *neigh)
829 struct net_device *dev = neigh->dev;
831 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
834 pr_warn("Destroying alive neighbour %p\n", neigh);
839 if (neigh_del_timer(neigh))
840 pr_warn("Impossible event\n");
842 write_lock_bh(&neigh->lock);
843 __skb_queue_purge(&neigh->arp_queue);
844 write_unlock_bh(&neigh->lock);
845 neigh->arp_queue_len_bytes = 0;
847 if (dev->netdev_ops->ndo_neigh_destroy)
848 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
851 neigh_parms_put(neigh->parms);
853 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
855 atomic_dec(&neigh->tbl->entries);
856 kfree_rcu(neigh, rcu);
858 EXPORT_SYMBOL(neigh_destroy);
860 /* Neighbour state is suspicious;
863 Called with write_locked neigh.
865 static void neigh_suspect(struct neighbour *neigh)
867 neigh_dbg(2, "neigh %p is suspected\n", neigh);
869 neigh->output = neigh->ops->output;
872 /* Neighbour state is OK;
875 Called with write_locked neigh.
877 static void neigh_connect(struct neighbour *neigh)
879 neigh_dbg(2, "neigh %p is connected\n", neigh);
881 neigh->output = neigh->ops->connected_output;
884 static void neigh_periodic_work(struct work_struct *work)
886 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
888 struct neighbour __rcu **np;
890 struct neigh_hash_table *nht;
892 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
894 write_lock_bh(&tbl->lock);
895 nht = rcu_dereference_protected(tbl->nht,
896 lockdep_is_held(&tbl->lock));
899 * periodically recompute ReachableTime from random function
902 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
903 struct neigh_parms *p;
904 tbl->last_rand = jiffies;
905 list_for_each_entry(p, &tbl->parms_list, list)
907 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
910 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
913 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
914 np = &nht->hash_buckets[i];
916 while ((n = rcu_dereference_protected(*np,
917 lockdep_is_held(&tbl->lock))) != NULL) {
920 write_lock(&n->lock);
922 state = n->nud_state;
923 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
924 (n->flags & NTF_EXT_LEARNED)) {
925 write_unlock(&n->lock);
929 if (time_before(n->used, n->confirmed))
930 n->used = n->confirmed;
932 if (refcount_read(&n->refcnt) == 1 &&
933 (state == NUD_FAILED ||
934 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
937 write_unlock(&n->lock);
938 neigh_cleanup_and_release(n);
941 write_unlock(&n->lock);
947 * It's fine to release lock here, even if hash table
948 * grows while we are preempted.
950 write_unlock_bh(&tbl->lock);
952 write_lock_bh(&tbl->lock);
953 nht = rcu_dereference_protected(tbl->nht,
954 lockdep_is_held(&tbl->lock));
957 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
958 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
959 * BASE_REACHABLE_TIME.
961 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
962 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
963 write_unlock_bh(&tbl->lock);
966 static __inline__ int neigh_max_probes(struct neighbour *n)
968 struct neigh_parms *p = n->parms;
969 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
970 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
971 NEIGH_VAR(p, MCAST_PROBES));
974 static void neigh_invalidate(struct neighbour *neigh)
975 __releases(neigh->lock)
976 __acquires(neigh->lock)
980 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
981 neigh_dbg(2, "neigh %p is failed\n", neigh);
982 neigh->updated = jiffies;
984 /* It is very thin place. report_unreachable is very complicated
985 routine. Particularly, it can hit the same neighbour entry!
987 So that, we try to be accurate and avoid dead loop. --ANK
989 while (neigh->nud_state == NUD_FAILED &&
990 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
991 write_unlock(&neigh->lock);
992 neigh->ops->error_report(neigh, skb);
993 write_lock(&neigh->lock);
995 __skb_queue_purge(&neigh->arp_queue);
996 neigh->arp_queue_len_bytes = 0;
999 static void neigh_probe(struct neighbour *neigh)
1000 __releases(neigh->lock)
1002 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1003 /* keep skb alive even if arp_queue overflows */
1005 skb = skb_clone(skb, GFP_ATOMIC);
1006 write_unlock(&neigh->lock);
1007 if (neigh->ops->solicit)
1008 neigh->ops->solicit(neigh, skb);
1009 atomic_inc(&neigh->probes);
1013 /* Called when a timer expires for a neighbour entry. */
1015 static void neigh_timer_handler(struct timer_list *t)
1017 unsigned long now, next;
1018 struct neighbour *neigh = from_timer(neigh, t, timer);
1022 write_lock(&neigh->lock);
1024 state = neigh->nud_state;
1028 if (!(state & NUD_IN_TIMER))
1031 if (state & NUD_REACHABLE) {
1032 if (time_before_eq(now,
1033 neigh->confirmed + neigh->parms->reachable_time)) {
1034 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1035 next = neigh->confirmed + neigh->parms->reachable_time;
1036 } else if (time_before_eq(now,
1038 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1039 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1040 neigh->nud_state = NUD_DELAY;
1041 neigh->updated = jiffies;
1042 neigh_suspect(neigh);
1043 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1045 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1046 neigh->nud_state = NUD_STALE;
1047 neigh->updated = jiffies;
1048 neigh_suspect(neigh);
1051 } else if (state & NUD_DELAY) {
1052 if (time_before_eq(now,
1054 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1055 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1056 neigh->nud_state = NUD_REACHABLE;
1057 neigh->updated = jiffies;
1058 neigh_connect(neigh);
1060 next = neigh->confirmed + neigh->parms->reachable_time;
1062 neigh_dbg(2, "neigh %p is probed\n", neigh);
1063 neigh->nud_state = NUD_PROBE;
1064 neigh->updated = jiffies;
1065 atomic_set(&neigh->probes, 0);
1067 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1070 /* NUD_PROBE|NUD_INCOMPLETE */
1071 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1074 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1075 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1076 neigh->nud_state = NUD_FAILED;
1078 neigh_invalidate(neigh);
1082 if (neigh->nud_state & NUD_IN_TIMER) {
1083 if (time_before(next, jiffies + HZ/2))
1084 next = jiffies + HZ/2;
1085 if (!mod_timer(&neigh->timer, next))
1088 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1092 write_unlock(&neigh->lock);
1096 neigh_update_notify(neigh, 0);
1098 neigh_release(neigh);
1101 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1104 bool immediate_probe = false;
1106 write_lock_bh(&neigh->lock);
1109 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1114 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1115 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1116 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1117 unsigned long next, now = jiffies;
1119 atomic_set(&neigh->probes,
1120 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1121 neigh->nud_state = NUD_INCOMPLETE;
1122 neigh->updated = now;
1123 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1125 neigh_add_timer(neigh, next);
1126 immediate_probe = true;
1128 neigh->nud_state = NUD_FAILED;
1129 neigh->updated = jiffies;
1130 write_unlock_bh(&neigh->lock);
1135 } else if (neigh->nud_state & NUD_STALE) {
1136 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1137 neigh->nud_state = NUD_DELAY;
1138 neigh->updated = jiffies;
1139 neigh_add_timer(neigh, jiffies +
1140 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1143 if (neigh->nud_state == NUD_INCOMPLETE) {
1145 while (neigh->arp_queue_len_bytes + skb->truesize >
1146 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1147 struct sk_buff *buff;
1149 buff = __skb_dequeue(&neigh->arp_queue);
1152 neigh->arp_queue_len_bytes -= buff->truesize;
1154 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1157 __skb_queue_tail(&neigh->arp_queue, skb);
1158 neigh->arp_queue_len_bytes += skb->truesize;
1163 if (immediate_probe)
1166 write_unlock(&neigh->lock);
1171 if (neigh->nud_state & NUD_STALE)
1173 write_unlock_bh(&neigh->lock);
1177 EXPORT_SYMBOL(__neigh_event_send);
1179 static void neigh_update_hhs(struct neighbour *neigh)
1181 struct hh_cache *hh;
1182 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1185 if (neigh->dev->header_ops)
1186 update = neigh->dev->header_ops->cache_update;
1191 write_seqlock_bh(&hh->hh_lock);
1192 update(hh, neigh->dev, neigh->ha);
1193 write_sequnlock_bh(&hh->hh_lock);
1200 /* Generic update routine.
1201 -- lladdr is new lladdr or NULL, if it is not supplied.
1202 -- new is new state.
1204 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1206 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1207 lladdr instead of overriding it
1209 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1211 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1213 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1216 Caller MUST hold reference count on the entry.
1219 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1220 u8 new, u32 flags, u32 nlmsg_pid,
1221 struct netlink_ext_ack *extack)
1223 bool ext_learn_change = false;
1227 struct net_device *dev;
1228 int update_isrouter = 0;
1230 write_lock_bh(&neigh->lock);
1233 old = neigh->nud_state;
1236 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1237 (old & (NUD_NOARP | NUD_PERMANENT)))
1240 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1244 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify);
1246 if (!(new & NUD_VALID)) {
1247 neigh_del_timer(neigh);
1248 if (old & NUD_CONNECTED)
1249 neigh_suspect(neigh);
1250 neigh->nud_state = new;
1252 notify = old & NUD_VALID;
1253 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1254 (new & NUD_FAILED)) {
1255 neigh_invalidate(neigh);
1261 /* Compare new lladdr with cached one */
1262 if (!dev->addr_len) {
1263 /* First case: device needs no address. */
1265 } else if (lladdr) {
1266 /* The second case: if something is already cached
1267 and a new address is proposed:
1269 - if they are different, check override flag
1271 if ((old & NUD_VALID) &&
1272 !memcmp(lladdr, neigh->ha, dev->addr_len))
1275 /* No address is supplied; if we know something,
1276 use it, otherwise discard the request.
1279 if (!(old & NUD_VALID)) {
1280 NL_SET_ERR_MSG(extack, "No link layer address given");
1286 /* Update confirmed timestamp for neighbour entry after we
1287 * received ARP packet even if it doesn't change IP to MAC binding.
1289 if (new & NUD_CONNECTED)
1290 neigh->confirmed = jiffies;
1292 /* If entry was valid and address is not changed,
1293 do not change entry state, if new one is STALE.
1296 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1297 if (old & NUD_VALID) {
1298 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1299 update_isrouter = 0;
1300 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1301 (old & NUD_CONNECTED)) {
1307 if (lladdr == neigh->ha && new == NUD_STALE &&
1308 !(flags & NEIGH_UPDATE_F_ADMIN))
1313 /* Update timestamp only once we know we will make a change to the
1314 * neighbour entry. Otherwise we risk to move the locktime window with
1315 * noop updates and ignore relevant ARP updates.
1317 if (new != old || lladdr != neigh->ha)
1318 neigh->updated = jiffies;
1321 neigh_del_timer(neigh);
1322 if (new & NUD_PROBE)
1323 atomic_set(&neigh->probes, 0);
1324 if (new & NUD_IN_TIMER)
1325 neigh_add_timer(neigh, (jiffies +
1326 ((new & NUD_REACHABLE) ?
1327 neigh->parms->reachable_time :
1329 neigh->nud_state = new;
1333 if (lladdr != neigh->ha) {
1334 write_seqlock(&neigh->ha_lock);
1335 memcpy(&neigh->ha, lladdr, dev->addr_len);
1336 write_sequnlock(&neigh->ha_lock);
1337 neigh_update_hhs(neigh);
1338 if (!(new & NUD_CONNECTED))
1339 neigh->confirmed = jiffies -
1340 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1345 if (new & NUD_CONNECTED)
1346 neigh_connect(neigh);
1348 neigh_suspect(neigh);
1349 if (!(old & NUD_VALID)) {
1350 struct sk_buff *skb;
1352 /* Again: avoid dead loop if something went wrong */
1354 while (neigh->nud_state & NUD_VALID &&
1355 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1356 struct dst_entry *dst = skb_dst(skb);
1357 struct neighbour *n2, *n1 = neigh;
1358 write_unlock_bh(&neigh->lock);
1362 /* Why not just use 'neigh' as-is? The problem is that
1363 * things such as shaper, eql, and sch_teql can end up
1364 * using alternative, different, neigh objects to output
1365 * the packet in the output path. So what we need to do
1366 * here is re-lookup the top-level neigh in the path so
1367 * we can reinject the packet there.
1371 n2 = dst_neigh_lookup_skb(dst, skb);
1375 n1->output(n1, skb);
1380 write_lock_bh(&neigh->lock);
1382 __skb_queue_purge(&neigh->arp_queue);
1383 neigh->arp_queue_len_bytes = 0;
1386 if (update_isrouter)
1387 neigh_update_is_router(neigh, flags, ¬ify);
1388 write_unlock_bh(&neigh->lock);
1390 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1391 neigh_update_gc_list(neigh);
1394 neigh_update_notify(neigh, nlmsg_pid);
1399 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1400 u32 flags, u32 nlmsg_pid)
1402 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1404 EXPORT_SYMBOL(neigh_update);
1406 /* Update the neigh to listen temporarily for probe responses, even if it is
1407 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1409 void __neigh_set_probe_once(struct neighbour *neigh)
1413 neigh->updated = jiffies;
1414 if (!(neigh->nud_state & NUD_FAILED))
1416 neigh->nud_state = NUD_INCOMPLETE;
1417 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1418 neigh_add_timer(neigh,
1419 jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1421 EXPORT_SYMBOL(__neigh_set_probe_once);
1423 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1424 u8 *lladdr, void *saddr,
1425 struct net_device *dev)
1427 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1428 lladdr || !dev->addr_len);
1430 neigh_update(neigh, lladdr, NUD_STALE,
1431 NEIGH_UPDATE_F_OVERRIDE, 0);
1434 EXPORT_SYMBOL(neigh_event_ns);
1436 /* called with read_lock_bh(&n->lock); */
1437 static void neigh_hh_init(struct neighbour *n)
1439 struct net_device *dev = n->dev;
1440 __be16 prot = n->tbl->protocol;
1441 struct hh_cache *hh = &n->hh;
1443 write_lock_bh(&n->lock);
1445 /* Only one thread can come in here and initialize the
1449 dev->header_ops->cache(n, hh, prot);
1451 write_unlock_bh(&n->lock);
1454 /* Slow and careful. */
1456 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1460 if (!neigh_event_send(neigh, skb)) {
1462 struct net_device *dev = neigh->dev;
1465 if (dev->header_ops->cache && !neigh->hh.hh_len)
1466 neigh_hh_init(neigh);
1469 __skb_pull(skb, skb_network_offset(skb));
1470 seq = read_seqbegin(&neigh->ha_lock);
1471 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1472 neigh->ha, NULL, skb->len);
1473 } while (read_seqretry(&neigh->ha_lock, seq));
1476 rc = dev_queue_xmit(skb);
1487 EXPORT_SYMBOL(neigh_resolve_output);
1489 /* As fast as possible without hh cache */
1491 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1493 struct net_device *dev = neigh->dev;
1498 __skb_pull(skb, skb_network_offset(skb));
1499 seq = read_seqbegin(&neigh->ha_lock);
1500 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1501 neigh->ha, NULL, skb->len);
1502 } while (read_seqretry(&neigh->ha_lock, seq));
1505 err = dev_queue_xmit(skb);
1512 EXPORT_SYMBOL(neigh_connected_output);
1514 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1516 return dev_queue_xmit(skb);
1518 EXPORT_SYMBOL(neigh_direct_output);
1520 static void neigh_proxy_process(struct timer_list *t)
1522 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1523 long sched_next = 0;
1524 unsigned long now = jiffies;
1525 struct sk_buff *skb, *n;
1527 spin_lock(&tbl->proxy_queue.lock);
1529 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1530 long tdif = NEIGH_CB(skb)->sched_next - now;
1533 struct net_device *dev = skb->dev;
1535 __skb_unlink(skb, &tbl->proxy_queue);
1536 if (tbl->proxy_redo && netif_running(dev)) {
1538 tbl->proxy_redo(skb);
1545 } else if (!sched_next || tdif < sched_next)
1548 del_timer(&tbl->proxy_timer);
1550 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1551 spin_unlock(&tbl->proxy_queue.lock);
1554 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1555 struct sk_buff *skb)
1557 unsigned long now = jiffies;
1559 unsigned long sched_next = now + (prandom_u32() %
1560 NEIGH_VAR(p, PROXY_DELAY));
1562 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1567 NEIGH_CB(skb)->sched_next = sched_next;
1568 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1570 spin_lock(&tbl->proxy_queue.lock);
1571 if (del_timer(&tbl->proxy_timer)) {
1572 if (time_before(tbl->proxy_timer.expires, sched_next))
1573 sched_next = tbl->proxy_timer.expires;
1577 __skb_queue_tail(&tbl->proxy_queue, skb);
1578 mod_timer(&tbl->proxy_timer, sched_next);
1579 spin_unlock(&tbl->proxy_queue.lock);
1581 EXPORT_SYMBOL(pneigh_enqueue);
1583 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1584 struct net *net, int ifindex)
1586 struct neigh_parms *p;
1588 list_for_each_entry(p, &tbl->parms_list, list) {
1589 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1590 (!p->dev && !ifindex && net_eq(net, &init_net)))
1597 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1598 struct neigh_table *tbl)
1600 struct neigh_parms *p;
1601 struct net *net = dev_net(dev);
1602 const struct net_device_ops *ops = dev->netdev_ops;
1604 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1607 refcount_set(&p->refcnt, 1);
1609 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1612 write_pnet(&p->net, net);
1613 p->sysctl_table = NULL;
1615 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1621 write_lock_bh(&tbl->lock);
1622 list_add(&p->list, &tbl->parms.list);
1623 write_unlock_bh(&tbl->lock);
1625 neigh_parms_data_state_cleanall(p);
1629 EXPORT_SYMBOL(neigh_parms_alloc);
1631 static void neigh_rcu_free_parms(struct rcu_head *head)
1633 struct neigh_parms *parms =
1634 container_of(head, struct neigh_parms, rcu_head);
1636 neigh_parms_put(parms);
1639 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1641 if (!parms || parms == &tbl->parms)
1643 write_lock_bh(&tbl->lock);
1644 list_del(&parms->list);
1646 write_unlock_bh(&tbl->lock);
1648 dev_put(parms->dev);
1649 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1651 EXPORT_SYMBOL(neigh_parms_release);
1653 static void neigh_parms_destroy(struct neigh_parms *parms)
1658 static struct lock_class_key neigh_table_proxy_queue_class;
1660 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1662 void neigh_table_init(int index, struct neigh_table *tbl)
1664 unsigned long now = jiffies;
1665 unsigned long phsize;
1667 INIT_LIST_HEAD(&tbl->parms_list);
1668 INIT_LIST_HEAD(&tbl->gc_list);
1669 list_add(&tbl->parms.list, &tbl->parms_list);
1670 write_pnet(&tbl->parms.net, &init_net);
1671 refcount_set(&tbl->parms.refcnt, 1);
1672 tbl->parms.reachable_time =
1673 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1675 tbl->stats = alloc_percpu(struct neigh_statistics);
1677 panic("cannot create neighbour cache statistics");
1679 #ifdef CONFIG_PROC_FS
1680 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1681 &neigh_stat_seq_ops, tbl))
1682 panic("cannot create neighbour proc dir entry");
1685 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1687 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1688 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1690 if (!tbl->nht || !tbl->phash_buckets)
1691 panic("cannot allocate neighbour cache hashes");
1693 if (!tbl->entry_size)
1694 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1695 tbl->key_len, NEIGH_PRIV_ALIGN);
1697 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1699 rwlock_init(&tbl->lock);
1700 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1701 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1702 tbl->parms.reachable_time);
1703 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1704 skb_queue_head_init_class(&tbl->proxy_queue,
1705 &neigh_table_proxy_queue_class);
1707 tbl->last_flush = now;
1708 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1710 neigh_tables[index] = tbl;
1712 EXPORT_SYMBOL(neigh_table_init);
1714 int neigh_table_clear(int index, struct neigh_table *tbl)
1716 neigh_tables[index] = NULL;
1717 /* It is not clean... Fix it to unload IPv6 module safely */
1718 cancel_delayed_work_sync(&tbl->gc_work);
1719 del_timer_sync(&tbl->proxy_timer);
1720 pneigh_queue_purge(&tbl->proxy_queue);
1721 neigh_ifdown(tbl, NULL);
1722 if (atomic_read(&tbl->entries))
1723 pr_crit("neighbour leakage\n");
1725 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1726 neigh_hash_free_rcu);
1729 kfree(tbl->phash_buckets);
1730 tbl->phash_buckets = NULL;
1732 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1734 free_percpu(tbl->stats);
1739 EXPORT_SYMBOL(neigh_table_clear);
1741 static struct neigh_table *neigh_find_table(int family)
1743 struct neigh_table *tbl = NULL;
1747 tbl = neigh_tables[NEIGH_ARP_TABLE];
1750 tbl = neigh_tables[NEIGH_ND_TABLE];
1753 tbl = neigh_tables[NEIGH_DN_TABLE];
1760 const struct nla_policy nda_policy[NDA_MAX+1] = {
1761 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1762 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1763 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1764 [NDA_PROBES] = { .type = NLA_U32 },
1765 [NDA_VLAN] = { .type = NLA_U16 },
1766 [NDA_PORT] = { .type = NLA_U16 },
1767 [NDA_VNI] = { .type = NLA_U32 },
1768 [NDA_IFINDEX] = { .type = NLA_U32 },
1769 [NDA_MASTER] = { .type = NLA_U32 },
1770 [NDA_PROTOCOL] = { .type = NLA_U8 },
1773 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1774 struct netlink_ext_ack *extack)
1776 struct net *net = sock_net(skb->sk);
1778 struct nlattr *dst_attr;
1779 struct neigh_table *tbl;
1780 struct neighbour *neigh;
1781 struct net_device *dev = NULL;
1785 if (nlmsg_len(nlh) < sizeof(*ndm))
1788 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1790 NL_SET_ERR_MSG(extack, "Network address not specified");
1794 ndm = nlmsg_data(nlh);
1795 if (ndm->ndm_ifindex) {
1796 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1803 tbl = neigh_find_table(ndm->ndm_family);
1805 return -EAFNOSUPPORT;
1807 if (nla_len(dst_attr) < (int)tbl->key_len) {
1808 NL_SET_ERR_MSG(extack, "Invalid network address");
1812 if (ndm->ndm_flags & NTF_PROXY) {
1813 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1820 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1821 if (neigh == NULL) {
1826 err = __neigh_update(neigh, NULL, NUD_FAILED,
1827 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1828 NETLINK_CB(skb).portid, extack);
1829 write_lock_bh(&tbl->lock);
1830 neigh_release(neigh);
1831 neigh_remove_one(neigh, tbl);
1832 write_unlock_bh(&tbl->lock);
1838 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1839 struct netlink_ext_ack *extack)
1841 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1842 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1843 struct net *net = sock_net(skb->sk);
1845 struct nlattr *tb[NDA_MAX+1];
1846 struct neigh_table *tbl;
1847 struct net_device *dev = NULL;
1848 struct neighbour *neigh;
1854 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
1860 NL_SET_ERR_MSG(extack, "Network address not specified");
1864 ndm = nlmsg_data(nlh);
1865 if (ndm->ndm_ifindex) {
1866 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1872 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1873 NL_SET_ERR_MSG(extack, "Invalid link address");
1878 tbl = neigh_find_table(ndm->ndm_family);
1880 return -EAFNOSUPPORT;
1882 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1883 NL_SET_ERR_MSG(extack, "Invalid network address");
1887 dst = nla_data(tb[NDA_DST]);
1888 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1890 if (tb[NDA_PROTOCOL])
1891 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1893 if (ndm->ndm_flags & NTF_PROXY) {
1894 struct pneigh_entry *pn;
1897 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1899 pn->flags = ndm->ndm_flags;
1901 pn->protocol = protocol;
1908 NL_SET_ERR_MSG(extack, "Device not specified");
1912 neigh = neigh_lookup(tbl, dst, dev);
1913 if (neigh == NULL) {
1914 bool exempt_from_gc;
1916 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1921 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1922 ndm->ndm_flags & NTF_EXT_LEARNED;
1923 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1924 if (IS_ERR(neigh)) {
1925 err = PTR_ERR(neigh);
1929 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1931 neigh_release(neigh);
1935 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1936 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1937 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1940 if (ndm->ndm_flags & NTF_EXT_LEARNED)
1941 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1943 if (ndm->ndm_flags & NTF_ROUTER)
1944 flags |= NEIGH_UPDATE_F_ISROUTER;
1946 if (ndm->ndm_flags & NTF_USE) {
1947 neigh_event_send(neigh, NULL);
1950 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1951 NETLINK_CB(skb).portid, extack);
1954 neigh->protocol = protocol;
1956 neigh_release(neigh);
1962 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1964 struct nlattr *nest;
1966 nest = nla_nest_start(skb, NDTA_PARMS);
1971 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1972 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1973 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1974 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1975 /* approximative value for deprecated QUEUE_LEN (in packets) */
1976 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1977 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1978 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1979 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1980 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1981 NEIGH_VAR(parms, UCAST_PROBES)) ||
1982 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1983 NEIGH_VAR(parms, MCAST_PROBES)) ||
1984 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1985 NEIGH_VAR(parms, MCAST_REPROBES)) ||
1986 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1988 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1989 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1990 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1991 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1992 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1993 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1994 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1995 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1996 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1997 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1998 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1999 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2000 nla_put_msecs(skb, NDTPA_LOCKTIME,
2001 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2002 goto nla_put_failure;
2003 return nla_nest_end(skb, nest);
2006 nla_nest_cancel(skb, nest);
2010 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2011 u32 pid, u32 seq, int type, int flags)
2013 struct nlmsghdr *nlh;
2014 struct ndtmsg *ndtmsg;
2016 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2020 ndtmsg = nlmsg_data(nlh);
2022 read_lock_bh(&tbl->lock);
2023 ndtmsg->ndtm_family = tbl->family;
2024 ndtmsg->ndtm_pad1 = 0;
2025 ndtmsg->ndtm_pad2 = 0;
2027 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2028 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2029 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2030 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2031 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2032 goto nla_put_failure;
2034 unsigned long now = jiffies;
2035 unsigned int flush_delta = now - tbl->last_flush;
2036 unsigned int rand_delta = now - tbl->last_rand;
2037 struct neigh_hash_table *nht;
2038 struct ndt_config ndc = {
2039 .ndtc_key_len = tbl->key_len,
2040 .ndtc_entry_size = tbl->entry_size,
2041 .ndtc_entries = atomic_read(&tbl->entries),
2042 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2043 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2044 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
2048 nht = rcu_dereference_bh(tbl->nht);
2049 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2050 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2051 rcu_read_unlock_bh();
2053 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2054 goto nla_put_failure;
2059 struct ndt_stats ndst;
2061 memset(&ndst, 0, sizeof(ndst));
2063 for_each_possible_cpu(cpu) {
2064 struct neigh_statistics *st;
2066 st = per_cpu_ptr(tbl->stats, cpu);
2067 ndst.ndts_allocs += st->allocs;
2068 ndst.ndts_destroys += st->destroys;
2069 ndst.ndts_hash_grows += st->hash_grows;
2070 ndst.ndts_res_failed += st->res_failed;
2071 ndst.ndts_lookups += st->lookups;
2072 ndst.ndts_hits += st->hits;
2073 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
2074 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
2075 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
2076 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
2077 ndst.ndts_table_fulls += st->table_fulls;
2080 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2082 goto nla_put_failure;
2085 BUG_ON(tbl->parms.dev);
2086 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2087 goto nla_put_failure;
2089 read_unlock_bh(&tbl->lock);
2090 nlmsg_end(skb, nlh);
2094 read_unlock_bh(&tbl->lock);
2095 nlmsg_cancel(skb, nlh);
2099 static int neightbl_fill_param_info(struct sk_buff *skb,
2100 struct neigh_table *tbl,
2101 struct neigh_parms *parms,
2102 u32 pid, u32 seq, int type,
2105 struct ndtmsg *ndtmsg;
2106 struct nlmsghdr *nlh;
2108 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2112 ndtmsg = nlmsg_data(nlh);
2114 read_lock_bh(&tbl->lock);
2115 ndtmsg->ndtm_family = tbl->family;
2116 ndtmsg->ndtm_pad1 = 0;
2117 ndtmsg->ndtm_pad2 = 0;
2119 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2120 neightbl_fill_parms(skb, parms) < 0)
2123 read_unlock_bh(&tbl->lock);
2124 nlmsg_end(skb, nlh);
2127 read_unlock_bh(&tbl->lock);
2128 nlmsg_cancel(skb, nlh);
2132 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2133 [NDTA_NAME] = { .type = NLA_STRING },
2134 [NDTA_THRESH1] = { .type = NLA_U32 },
2135 [NDTA_THRESH2] = { .type = NLA_U32 },
2136 [NDTA_THRESH3] = { .type = NLA_U32 },
2137 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2138 [NDTA_PARMS] = { .type = NLA_NESTED },
2141 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2142 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2143 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2144 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2145 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2146 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2147 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2148 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2149 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2150 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2151 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2152 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2153 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2154 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2155 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2158 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2159 struct netlink_ext_ack *extack)
2161 struct net *net = sock_net(skb->sk);
2162 struct neigh_table *tbl;
2163 struct ndtmsg *ndtmsg;
2164 struct nlattr *tb[NDTA_MAX+1];
2168 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2169 nl_neightbl_policy, extack);
2173 if (tb[NDTA_NAME] == NULL) {
2178 ndtmsg = nlmsg_data(nlh);
2180 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2181 tbl = neigh_tables[tidx];
2184 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2186 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2196 * We acquire tbl->lock to be nice to the periodic timers and
2197 * make sure they always see a consistent set of values.
2199 write_lock_bh(&tbl->lock);
2201 if (tb[NDTA_PARMS]) {
2202 struct nlattr *tbp[NDTPA_MAX+1];
2203 struct neigh_parms *p;
2206 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2207 nl_ntbl_parm_policy, extack);
2209 goto errout_tbl_lock;
2211 if (tbp[NDTPA_IFINDEX])
2212 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2214 p = lookup_neigh_parms(tbl, net, ifindex);
2217 goto errout_tbl_lock;
2220 for (i = 1; i <= NDTPA_MAX; i++) {
2225 case NDTPA_QUEUE_LEN:
2226 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2227 nla_get_u32(tbp[i]) *
2228 SKB_TRUESIZE(ETH_FRAME_LEN));
2230 case NDTPA_QUEUE_LENBYTES:
2231 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2232 nla_get_u32(tbp[i]));
2234 case NDTPA_PROXY_QLEN:
2235 NEIGH_VAR_SET(p, PROXY_QLEN,
2236 nla_get_u32(tbp[i]));
2238 case NDTPA_APP_PROBES:
2239 NEIGH_VAR_SET(p, APP_PROBES,
2240 nla_get_u32(tbp[i]));
2242 case NDTPA_UCAST_PROBES:
2243 NEIGH_VAR_SET(p, UCAST_PROBES,
2244 nla_get_u32(tbp[i]));
2246 case NDTPA_MCAST_PROBES:
2247 NEIGH_VAR_SET(p, MCAST_PROBES,
2248 nla_get_u32(tbp[i]));
2250 case NDTPA_MCAST_REPROBES:
2251 NEIGH_VAR_SET(p, MCAST_REPROBES,
2252 nla_get_u32(tbp[i]));
2254 case NDTPA_BASE_REACHABLE_TIME:
2255 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2256 nla_get_msecs(tbp[i]));
2257 /* update reachable_time as well, otherwise, the change will
2258 * only be effective after the next time neigh_periodic_work
2259 * decides to recompute it (can be multiple minutes)
2262 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2264 case NDTPA_GC_STALETIME:
2265 NEIGH_VAR_SET(p, GC_STALETIME,
2266 nla_get_msecs(tbp[i]));
2268 case NDTPA_DELAY_PROBE_TIME:
2269 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2270 nla_get_msecs(tbp[i]));
2271 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2273 case NDTPA_RETRANS_TIME:
2274 NEIGH_VAR_SET(p, RETRANS_TIME,
2275 nla_get_msecs(tbp[i]));
2277 case NDTPA_ANYCAST_DELAY:
2278 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2279 nla_get_msecs(tbp[i]));
2281 case NDTPA_PROXY_DELAY:
2282 NEIGH_VAR_SET(p, PROXY_DELAY,
2283 nla_get_msecs(tbp[i]));
2285 case NDTPA_LOCKTIME:
2286 NEIGH_VAR_SET(p, LOCKTIME,
2287 nla_get_msecs(tbp[i]));
2294 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2295 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2296 !net_eq(net, &init_net))
2297 goto errout_tbl_lock;
2299 if (tb[NDTA_THRESH1])
2300 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2302 if (tb[NDTA_THRESH2])
2303 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2305 if (tb[NDTA_THRESH3])
2306 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2308 if (tb[NDTA_GC_INTERVAL])
2309 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2314 write_unlock_bh(&tbl->lock);
2319 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2320 struct netlink_ext_ack *extack)
2322 struct ndtmsg *ndtm;
2324 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2325 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2329 ndtm = nlmsg_data(nlh);
2330 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2331 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2335 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2336 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2343 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2345 const struct nlmsghdr *nlh = cb->nlh;
2346 struct net *net = sock_net(skb->sk);
2347 int family, tidx, nidx = 0;
2348 int tbl_skip = cb->args[0];
2349 int neigh_skip = cb->args[1];
2350 struct neigh_table *tbl;
2352 if (cb->strict_check) {
2353 int err = neightbl_valid_dump_info(nlh, cb->extack);
2359 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2361 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2362 struct neigh_parms *p;
2364 tbl = neigh_tables[tidx];
2368 if (tidx < tbl_skip || (family && tbl->family != family))
2371 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2372 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2377 p = list_next_entry(&tbl->parms, list);
2378 list_for_each_entry_from(p, &tbl->parms_list, list) {
2379 if (!net_eq(neigh_parms_net(p), net))
2382 if (nidx < neigh_skip)
2385 if (neightbl_fill_param_info(skb, tbl, p,
2386 NETLINK_CB(cb->skb).portid,
2404 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2405 u32 pid, u32 seq, int type, unsigned int flags)
2407 unsigned long now = jiffies;
2408 struct nda_cacheinfo ci;
2409 struct nlmsghdr *nlh;
2412 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2416 ndm = nlmsg_data(nlh);
2417 ndm->ndm_family = neigh->ops->family;
2420 ndm->ndm_flags = neigh->flags;
2421 ndm->ndm_type = neigh->type;
2422 ndm->ndm_ifindex = neigh->dev->ifindex;
2424 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2425 goto nla_put_failure;
2427 read_lock_bh(&neigh->lock);
2428 ndm->ndm_state = neigh->nud_state;
2429 if (neigh->nud_state & NUD_VALID) {
2430 char haddr[MAX_ADDR_LEN];
2432 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2433 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2434 read_unlock_bh(&neigh->lock);
2435 goto nla_put_failure;
2439 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2440 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2441 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2442 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2443 read_unlock_bh(&neigh->lock);
2445 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2446 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2447 goto nla_put_failure;
2449 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2450 goto nla_put_failure;
2452 nlmsg_end(skb, nlh);
2456 nlmsg_cancel(skb, nlh);
2460 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2461 u32 pid, u32 seq, int type, unsigned int flags,
2462 struct neigh_table *tbl)
2464 struct nlmsghdr *nlh;
2467 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2471 ndm = nlmsg_data(nlh);
2472 ndm->ndm_family = tbl->family;
2475 ndm->ndm_flags = pn->flags | NTF_PROXY;
2476 ndm->ndm_type = RTN_UNICAST;
2477 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2478 ndm->ndm_state = NUD_NONE;
2480 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2481 goto nla_put_failure;
2483 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2484 goto nla_put_failure;
2486 nlmsg_end(skb, nlh);
2490 nlmsg_cancel(skb, nlh);
2494 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2496 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2497 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2500 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2502 struct net_device *master;
2507 master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2508 if (!master || master->ifindex != master_idx)
2514 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2516 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2522 struct neigh_dump_filter {
2527 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2528 struct netlink_callback *cb,
2529 struct neigh_dump_filter *filter)
2531 struct net *net = sock_net(skb->sk);
2532 struct neighbour *n;
2533 int rc, h, s_h = cb->args[1];
2534 int idx, s_idx = idx = cb->args[2];
2535 struct neigh_hash_table *nht;
2536 unsigned int flags = NLM_F_MULTI;
2538 if (filter->dev_idx || filter->master_idx)
2539 flags |= NLM_F_DUMP_FILTERED;
2542 nht = rcu_dereference_bh(tbl->nht);
2544 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2547 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2549 n = rcu_dereference_bh(n->next)) {
2550 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2552 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2553 neigh_master_filtered(n->dev, filter->master_idx))
2555 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2568 rcu_read_unlock_bh();
2574 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2575 struct netlink_callback *cb,
2576 struct neigh_dump_filter *filter)
2578 struct pneigh_entry *n;
2579 struct net *net = sock_net(skb->sk);
2580 int rc, h, s_h = cb->args[3];
2581 int idx, s_idx = idx = cb->args[4];
2582 unsigned int flags = NLM_F_MULTI;
2584 if (filter->dev_idx || filter->master_idx)
2585 flags |= NLM_F_DUMP_FILTERED;
2587 read_lock_bh(&tbl->lock);
2589 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2592 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2593 if (idx < s_idx || pneigh_net(n) != net)
2595 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2596 neigh_master_filtered(n->dev, filter->master_idx))
2598 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2600 RTM_NEWNEIGH, flags, tbl) < 0) {
2601 read_unlock_bh(&tbl->lock);
2610 read_unlock_bh(&tbl->lock);
2619 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2621 struct neigh_dump_filter *filter,
2622 struct netlink_ext_ack *extack)
2624 struct nlattr *tb[NDA_MAX + 1];
2630 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2631 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2635 ndm = nlmsg_data(nlh);
2636 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2637 ndm->ndm_state || ndm->ndm_type) {
2638 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2642 if (ndm->ndm_flags & ~NTF_PROXY) {
2643 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2647 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2648 nda_policy, extack);
2650 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2651 nda_policy, extack);
2656 for (i = 0; i <= NDA_MAX; ++i) {
2660 /* all new attributes should require strict_check */
2663 filter->dev_idx = nla_get_u32(tb[i]);
2666 filter->master_idx = nla_get_u32(tb[i]);
2670 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2679 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2681 const struct nlmsghdr *nlh = cb->nlh;
2682 struct neigh_dump_filter filter = {};
2683 struct neigh_table *tbl;
2688 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2690 /* check for full ndmsg structure presence, family member is
2691 * the same for both structures
2693 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2694 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2697 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2698 if (err < 0 && cb->strict_check)
2703 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2704 tbl = neigh_tables[t];
2708 if (t < s_t || (family && tbl->family != family))
2711 memset(&cb->args[1], 0, sizeof(cb->args) -
2712 sizeof(cb->args[0]));
2714 err = pneigh_dump_table(tbl, skb, cb, &filter);
2716 err = neigh_dump_table(tbl, skb, cb, &filter);
2725 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2726 struct neigh_table **tbl,
2727 void **dst, int *dev_idx, u8 *ndm_flags,
2728 struct netlink_ext_ack *extack)
2730 struct nlattr *tb[NDA_MAX + 1];
2734 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2735 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2739 ndm = nlmsg_data(nlh);
2740 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2742 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2746 if (ndm->ndm_flags & ~NTF_PROXY) {
2747 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2751 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2752 nda_policy, extack);
2756 *ndm_flags = ndm->ndm_flags;
2757 *dev_idx = ndm->ndm_ifindex;
2758 *tbl = neigh_find_table(ndm->ndm_family);
2760 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2761 return -EAFNOSUPPORT;
2764 for (i = 0; i <= NDA_MAX; ++i) {
2770 if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2771 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2774 *dst = nla_data(tb[i]);
2777 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2785 static inline size_t neigh_nlmsg_size(void)
2787 return NLMSG_ALIGN(sizeof(struct ndmsg))
2788 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2789 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2790 + nla_total_size(sizeof(struct nda_cacheinfo))
2791 + nla_total_size(4) /* NDA_PROBES */
2792 + nla_total_size(1); /* NDA_PROTOCOL */
2795 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2798 struct sk_buff *skb;
2801 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2805 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2811 err = rtnl_unicast(skb, net, pid);
2816 static inline size_t pneigh_nlmsg_size(void)
2818 return NLMSG_ALIGN(sizeof(struct ndmsg))
2819 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2820 + nla_total_size(1); /* NDA_PROTOCOL */
2823 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2824 u32 pid, u32 seq, struct neigh_table *tbl)
2826 struct sk_buff *skb;
2829 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2833 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2839 err = rtnl_unicast(skb, net, pid);
2844 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2845 struct netlink_ext_ack *extack)
2847 struct net *net = sock_net(in_skb->sk);
2848 struct net_device *dev = NULL;
2849 struct neigh_table *tbl = NULL;
2850 struct neighbour *neigh;
2856 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2862 dev = __dev_get_by_index(net, dev_idx);
2864 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2870 NL_SET_ERR_MSG(extack, "Network address not specified");
2874 if (ndm_flags & NTF_PROXY) {
2875 struct pneigh_entry *pn;
2877 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2879 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2882 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2883 nlh->nlmsg_seq, tbl);
2887 NL_SET_ERR_MSG(extack, "No device specified");
2891 neigh = neigh_lookup(tbl, dst, dev);
2893 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2897 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2900 neigh_release(neigh);
2905 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2908 struct neigh_hash_table *nht;
2911 nht = rcu_dereference_bh(tbl->nht);
2913 read_lock(&tbl->lock); /* avoid resizes */
2914 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2915 struct neighbour *n;
2917 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2919 n = rcu_dereference_bh(n->next))
2922 read_unlock(&tbl->lock);
2923 rcu_read_unlock_bh();
2925 EXPORT_SYMBOL(neigh_for_each);
2927 /* The tbl->lock must be held as a writer and BH disabled. */
2928 void __neigh_for_each_release(struct neigh_table *tbl,
2929 int (*cb)(struct neighbour *))
2932 struct neigh_hash_table *nht;
2934 nht = rcu_dereference_protected(tbl->nht,
2935 lockdep_is_held(&tbl->lock));
2936 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2937 struct neighbour *n;
2938 struct neighbour __rcu **np;
2940 np = &nht->hash_buckets[chain];
2941 while ((n = rcu_dereference_protected(*np,
2942 lockdep_is_held(&tbl->lock))) != NULL) {
2945 write_lock(&n->lock);
2948 rcu_assign_pointer(*np,
2949 rcu_dereference_protected(n->next,
2950 lockdep_is_held(&tbl->lock)));
2954 write_unlock(&n->lock);
2956 neigh_cleanup_and_release(n);
2960 EXPORT_SYMBOL(__neigh_for_each_release);
2962 int neigh_xmit(int index, struct net_device *dev,
2963 const void *addr, struct sk_buff *skb)
2965 int err = -EAFNOSUPPORT;
2966 if (likely(index < NEIGH_NR_TABLES)) {
2967 struct neigh_table *tbl;
2968 struct neighbour *neigh;
2970 tbl = neigh_tables[index];
2974 neigh = __neigh_lookup_noref(tbl, addr, dev);
2976 neigh = __neigh_create(tbl, addr, dev, false);
2977 err = PTR_ERR(neigh);
2978 if (IS_ERR(neigh)) {
2979 rcu_read_unlock_bh();
2982 err = neigh->output(neigh, skb);
2983 rcu_read_unlock_bh();
2985 else if (index == NEIGH_LINK_TABLE) {
2986 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2987 addr, NULL, skb->len);
2990 err = dev_queue_xmit(skb);
2998 EXPORT_SYMBOL(neigh_xmit);
3000 #ifdef CONFIG_PROC_FS
3002 static struct neighbour *neigh_get_first(struct seq_file *seq)
3004 struct neigh_seq_state *state = seq->private;
3005 struct net *net = seq_file_net(seq);
3006 struct neigh_hash_table *nht = state->nht;
3007 struct neighbour *n = NULL;
3008 int bucket = state->bucket;
3010 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3011 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3012 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3015 if (!net_eq(dev_net(n->dev), net))
3017 if (state->neigh_sub_iter) {
3021 v = state->neigh_sub_iter(state, n, &fakep);
3025 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3027 if (n->nud_state & ~NUD_NOARP)
3030 n = rcu_dereference_bh(n->next);
3036 state->bucket = bucket;
3041 static struct neighbour *neigh_get_next(struct seq_file *seq,
3042 struct neighbour *n,
3045 struct neigh_seq_state *state = seq->private;
3046 struct net *net = seq_file_net(seq);
3047 struct neigh_hash_table *nht = state->nht;
3049 if (state->neigh_sub_iter) {
3050 void *v = state->neigh_sub_iter(state, n, pos);
3054 n = rcu_dereference_bh(n->next);
3058 if (!net_eq(dev_net(n->dev), net))
3060 if (state->neigh_sub_iter) {
3061 void *v = state->neigh_sub_iter(state, n, pos);
3066 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3069 if (n->nud_state & ~NUD_NOARP)
3072 n = rcu_dereference_bh(n->next);
3078 if (++state->bucket >= (1 << nht->hash_shift))
3081 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3089 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3091 struct neighbour *n = neigh_get_first(seq);
3096 n = neigh_get_next(seq, n, pos);
3101 return *pos ? NULL : n;
3104 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3106 struct neigh_seq_state *state = seq->private;
3107 struct net *net = seq_file_net(seq);
3108 struct neigh_table *tbl = state->tbl;
3109 struct pneigh_entry *pn = NULL;
3110 int bucket = state->bucket;
3112 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3113 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3114 pn = tbl->phash_buckets[bucket];
3115 while (pn && !net_eq(pneigh_net(pn), net))
3120 state->bucket = bucket;
3125 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3126 struct pneigh_entry *pn,
3129 struct neigh_seq_state *state = seq->private;
3130 struct net *net = seq_file_net(seq);
3131 struct neigh_table *tbl = state->tbl;
3135 } while (pn && !net_eq(pneigh_net(pn), net));
3138 if (++state->bucket > PNEIGH_HASHMASK)
3140 pn = tbl->phash_buckets[state->bucket];
3141 while (pn && !net_eq(pneigh_net(pn), net))
3153 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3155 struct pneigh_entry *pn = pneigh_get_first(seq);
3160 pn = pneigh_get_next(seq, pn, pos);
3165 return *pos ? NULL : pn;
3168 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3170 struct neigh_seq_state *state = seq->private;
3172 loff_t idxpos = *pos;
3174 rc = neigh_get_idx(seq, &idxpos);
3175 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3176 rc = pneigh_get_idx(seq, &idxpos);
3181 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3184 struct neigh_seq_state *state = seq->private;
3188 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3191 state->nht = rcu_dereference_bh(tbl->nht);
3193 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3195 EXPORT_SYMBOL(neigh_seq_start);
3197 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3199 struct neigh_seq_state *state;
3202 if (v == SEQ_START_TOKEN) {
3203 rc = neigh_get_first(seq);
3207 state = seq->private;
3208 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3209 rc = neigh_get_next(seq, v, NULL);
3212 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3213 rc = pneigh_get_first(seq);
3215 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3216 rc = pneigh_get_next(seq, v, NULL);
3222 EXPORT_SYMBOL(neigh_seq_next);
3224 void neigh_seq_stop(struct seq_file *seq, void *v)
3227 rcu_read_unlock_bh();
3229 EXPORT_SYMBOL(neigh_seq_stop);
3231 /* statistics via seq_file */
3233 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3235 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3239 return SEQ_START_TOKEN;
3241 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3242 if (!cpu_possible(cpu))
3245 return per_cpu_ptr(tbl->stats, cpu);
3250 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3252 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3255 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3256 if (!cpu_possible(cpu))
3259 return per_cpu_ptr(tbl->stats, cpu);
3264 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3269 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3271 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3272 struct neigh_statistics *st = v;
3274 if (v == SEQ_START_TOKEN) {
3275 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3279 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3280 "%08lx %08lx %08lx %08lx %08lx %08lx\n",
3281 atomic_read(&tbl->entries),
3292 st->rcv_probes_mcast,
3293 st->rcv_probes_ucast,
3295 st->periodic_gc_runs,
3304 static const struct seq_operations neigh_stat_seq_ops = {
3305 .start = neigh_stat_seq_start,
3306 .next = neigh_stat_seq_next,
3307 .stop = neigh_stat_seq_stop,
3308 .show = neigh_stat_seq_show,
3310 #endif /* CONFIG_PROC_FS */
3312 static void __neigh_notify(struct neighbour *n, int type, int flags,
3315 struct net *net = dev_net(n->dev);
3316 struct sk_buff *skb;
3319 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3323 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3325 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3326 WARN_ON(err == -EMSGSIZE);
3330 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3334 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3337 void neigh_app_ns(struct neighbour *n)
3339 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3341 EXPORT_SYMBOL(neigh_app_ns);
3343 #ifdef CONFIG_SYSCTL
3345 static int int_max = INT_MAX;
3346 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3348 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3349 void __user *buffer, size_t *lenp, loff_t *ppos)
3352 struct ctl_table tmp = *ctl;
3355 tmp.extra2 = &unres_qlen_max;
3358 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3359 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3362 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3366 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3371 return __in_dev_arp_parms_get_rcu(dev);
3373 return __in6_dev_nd_parms_get_rcu(dev);
3378 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3381 struct net_device *dev;
3382 int family = neigh_parms_family(p);
3385 for_each_netdev_rcu(net, dev) {
3386 struct neigh_parms *dst_p =
3387 neigh_get_dev_parms_rcu(dev, family);
3389 if (dst_p && !test_bit(index, dst_p->data_state))
3390 dst_p->data[index] = p->data[index];
3395 static void neigh_proc_update(struct ctl_table *ctl, int write)
3397 struct net_device *dev = ctl->extra1;
3398 struct neigh_parms *p = ctl->extra2;
3399 struct net *net = neigh_parms_net(p);
3400 int index = (int *) ctl->data - p->data;
3405 set_bit(index, p->data_state);
3406 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3407 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3408 if (!dev) /* NULL dev means this is default value */
3409 neigh_copy_dflt_parms(net, p, index);
3412 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3413 void __user *buffer,
3414 size_t *lenp, loff_t *ppos)
3416 struct ctl_table tmp = *ctl;
3420 tmp.extra2 = &int_max;
3422 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3423 neigh_proc_update(ctl, write);
3427 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3428 void __user *buffer, size_t *lenp, loff_t *ppos)
3430 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3432 neigh_proc_update(ctl, write);
3435 EXPORT_SYMBOL(neigh_proc_dointvec);
3437 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3438 void __user *buffer,
3439 size_t *lenp, loff_t *ppos)
3441 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3443 neigh_proc_update(ctl, write);
3446 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3448 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3449 void __user *buffer,
3450 size_t *lenp, loff_t *ppos)
3452 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3454 neigh_proc_update(ctl, write);
3458 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3459 void __user *buffer,
3460 size_t *lenp, loff_t *ppos)
3462 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3464 neigh_proc_update(ctl, write);
3467 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3469 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3470 void __user *buffer,
3471 size_t *lenp, loff_t *ppos)
3473 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3475 neigh_proc_update(ctl, write);
3479 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3480 void __user *buffer,
3481 size_t *lenp, loff_t *ppos)
3483 struct neigh_parms *p = ctl->extra2;
3486 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3487 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3488 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3489 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3493 if (write && ret == 0) {
3494 /* update reachable_time as well, otherwise, the change will
3495 * only be effective after the next time neigh_periodic_work
3496 * decides to recompute it
3499 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3504 #define NEIGH_PARMS_DATA_OFFSET(index) \
3505 (&((struct neigh_parms *) 0)->data[index])
3507 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3508 [NEIGH_VAR_ ## attr] = { \
3510 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3511 .maxlen = sizeof(int), \
3513 .proc_handler = proc, \
3516 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3517 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3519 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3520 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3522 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3523 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3525 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3526 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3528 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3529 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3531 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3532 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3534 static struct neigh_sysctl_table {
3535 struct ctl_table_header *sysctl_header;
3536 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3537 } neigh_sysctl_template __read_mostly = {
3539 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3540 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3541 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3542 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3543 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3544 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3545 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3546 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3547 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3548 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3549 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3550 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3551 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3552 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3553 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3554 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3555 [NEIGH_VAR_GC_INTERVAL] = {
3556 .procname = "gc_interval",
3557 .maxlen = sizeof(int),
3559 .proc_handler = proc_dointvec_jiffies,
3561 [NEIGH_VAR_GC_THRESH1] = {
3562 .procname = "gc_thresh1",
3563 .maxlen = sizeof(int),
3567 .proc_handler = proc_dointvec_minmax,
3569 [NEIGH_VAR_GC_THRESH2] = {
3570 .procname = "gc_thresh2",
3571 .maxlen = sizeof(int),
3575 .proc_handler = proc_dointvec_minmax,
3577 [NEIGH_VAR_GC_THRESH3] = {
3578 .procname = "gc_thresh3",
3579 .maxlen = sizeof(int),
3583 .proc_handler = proc_dointvec_minmax,
3589 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3590 proc_handler *handler)
3593 struct neigh_sysctl_table *t;
3594 const char *dev_name_source;
3595 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3598 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3602 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3603 t->neigh_vars[i].data += (long) p;
3604 t->neigh_vars[i].extra1 = dev;
3605 t->neigh_vars[i].extra2 = p;
3609 dev_name_source = dev->name;
3610 /* Terminate the table early */
3611 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3612 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3614 struct neigh_table *tbl = p->tbl;
3615 dev_name_source = "default";
3616 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3617 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3618 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3619 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3624 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3626 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3627 /* RetransTime (in milliseconds)*/
3628 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3629 /* ReachableTime (in milliseconds) */
3630 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3632 /* Those handlers will update p->reachable_time after
3633 * base_reachable_time(_ms) is set to ensure the new timer starts being
3634 * applied after the next neighbour update instead of waiting for
3635 * neigh_periodic_work to update its value (can be multiple minutes)
3636 * So any handler that replaces them should do this as well
3639 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3640 neigh_proc_base_reachable_time;
3641 /* ReachableTime (in milliseconds) */
3642 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3643 neigh_proc_base_reachable_time;
3646 /* Don't export sysctls to unprivileged users */
3647 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3648 t->neigh_vars[0].procname = NULL;
3650 switch (neigh_parms_family(p)) {
3661 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3662 p_name, dev_name_source);
3664 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3665 if (!t->sysctl_header)
3668 p->sysctl_table = t;
3676 EXPORT_SYMBOL(neigh_sysctl_register);
3678 void neigh_sysctl_unregister(struct neigh_parms *p)
3680 if (p->sysctl_table) {
3681 struct neigh_sysctl_table *t = p->sysctl_table;
3682 p->sysctl_table = NULL;
3683 unregister_net_sysctl_table(t->sysctl_header);
3687 EXPORT_SYMBOL(neigh_sysctl_unregister);
3689 #endif /* CONFIG_SYSCTL */
3691 static int __init neigh_init(void)
3693 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3694 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3695 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3697 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3699 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3704 subsys_initcall(neigh_init);