Merge tag 'driver-core-5.0-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/kmemleak.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/module.h>
25 #include <linux/socket.h>
26 #include <linux/netdevice.h>
27 #include <linux/proc_fs.h>
28 #ifdef CONFIG_SYSCTL
29 #include <linux/sysctl.h>
30 #endif
31 #include <linux/times.h>
32 #include <net/net_namespace.h>
33 #include <net/neighbour.h>
34 #include <net/dst.h>
35 #include <net/sock.h>
36 #include <net/netevent.h>
37 #include <net/netlink.h>
38 #include <linux/rtnetlink.h>
39 #include <linux/random.h>
40 #include <linux/string.h>
41 #include <linux/log2.h>
42 #include <linux/inetdevice.h>
43 #include <net/addrconf.h>
44
45 #define DEBUG
46 #define NEIGH_DEBUG 1
47 #define neigh_dbg(level, fmt, ...)              \
48 do {                                            \
49         if (level <= NEIGH_DEBUG)               \
50                 pr_debug(fmt, ##__VA_ARGS__);   \
51 } while (0)
52
53 #define PNEIGH_HASHMASK         0xF
54
55 static void neigh_timer_handler(struct timer_list *t);
56 static void __neigh_notify(struct neighbour *n, int type, int flags,
57                            u32 pid);
58 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
59 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
60                                     struct net_device *dev);
61
62 #ifdef CONFIG_PROC_FS
63 static const struct seq_operations neigh_stat_seq_ops;
64 #endif
65
66 /*
67    Neighbour hash table buckets are protected with rwlock tbl->lock.
68
69    - All the scans/updates to hash buckets MUST be made under this lock.
70    - NOTHING clever should be made under this lock: no callbacks
71      to protocol backends, no attempts to send something to network.
72      It will result in deadlocks, if backend/driver wants to use neighbour
73      cache.
74    - If the entry requires some non-trivial actions, increase
75      its reference count and release table lock.
76
77    Neighbour entries are protected:
78    - with reference count.
79    - with rwlock neigh->lock
80
81    Reference count prevents destruction.
82
83    neigh->lock mainly serializes ll address data and its validity state.
84    However, the same lock is used to protect another entry fields:
85     - timer
86     - resolution queue
87
88    Again, nothing clever shall be made under neigh->lock,
89    the most complicated procedure, which we allow is dev->hard_header.
90    It is supposed, that dev->hard_header is simplistic and does
91    not make callbacks to neighbour tables.
92  */
93
94 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
95 {
96         kfree_skb(skb);
97         return -ENETDOWN;
98 }
99
100 static void neigh_cleanup_and_release(struct neighbour *neigh)
101 {
102         if (neigh->parms->neigh_cleanup)
103                 neigh->parms->neigh_cleanup(neigh);
104
105         __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
106         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (prandom_u32() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122 static void neigh_mark_dead(struct neighbour *n)
123 {
124         n->dead = 1;
125         if (!list_empty(&n->gc_list)) {
126                 list_del_init(&n->gc_list);
127                 atomic_dec(&n->tbl->gc_entries);
128         }
129 }
130
131 static void neigh_update_gc_list(struct neighbour *n)
132 {
133         bool on_gc_list, exempt_from_gc;
134
135         write_lock_bh(&n->tbl->lock);
136         write_lock(&n->lock);
137
138         /* remove from the gc list if new state is permanent or if neighbor
139          * is externally learned; otherwise entry should be on the gc list
140          */
141         exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142                          n->flags & NTF_EXT_LEARNED;
143         on_gc_list = !list_empty(&n->gc_list);
144
145         if (exempt_from_gc && on_gc_list) {
146                 list_del_init(&n->gc_list);
147                 atomic_dec(&n->tbl->gc_entries);
148         } else if (!exempt_from_gc && !on_gc_list) {
149                 /* add entries to the tail; cleaning removes from the front */
150                 list_add_tail(&n->gc_list, &n->tbl->gc_list);
151                 atomic_inc(&n->tbl->gc_entries);
152         }
153
154         write_unlock(&n->lock);
155         write_unlock_bh(&n->tbl->lock);
156 }
157
158 static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
159                                      int *notify)
160 {
161         bool rc = false;
162         u8 ndm_flags;
163
164         if (!(flags & NEIGH_UPDATE_F_ADMIN))
165                 return rc;
166
167         ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
168         if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
169                 if (ndm_flags & NTF_EXT_LEARNED)
170                         neigh->flags |= NTF_EXT_LEARNED;
171                 else
172                         neigh->flags &= ~NTF_EXT_LEARNED;
173                 rc = true;
174                 *notify = 1;
175         }
176
177         return rc;
178 }
179
180 static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
181                       struct neigh_table *tbl)
182 {
183         bool retval = false;
184
185         write_lock(&n->lock);
186         if (refcount_read(&n->refcnt) == 1) {
187                 struct neighbour *neigh;
188
189                 neigh = rcu_dereference_protected(n->next,
190                                                   lockdep_is_held(&tbl->lock));
191                 rcu_assign_pointer(*np, neigh);
192                 neigh_mark_dead(n);
193                 retval = true;
194         }
195         write_unlock(&n->lock);
196         if (retval)
197                 neigh_cleanup_and_release(n);
198         return retval;
199 }
200
201 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
202 {
203         struct neigh_hash_table *nht;
204         void *pkey = ndel->primary_key;
205         u32 hash_val;
206         struct neighbour *n;
207         struct neighbour __rcu **np;
208
209         nht = rcu_dereference_protected(tbl->nht,
210                                         lockdep_is_held(&tbl->lock));
211         hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
212         hash_val = hash_val >> (32 - nht->hash_shift);
213
214         np = &nht->hash_buckets[hash_val];
215         while ((n = rcu_dereference_protected(*np,
216                                               lockdep_is_held(&tbl->lock)))) {
217                 if (n == ndel)
218                         return neigh_del(n, np, tbl);
219                 np = &n->next;
220         }
221         return false;
222 }
223
224 static int neigh_forced_gc(struct neigh_table *tbl)
225 {
226         int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
227         unsigned long tref = jiffies - 5 * HZ;
228         struct neighbour *n, *tmp;
229         int shrunk = 0;
230
231         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
232
233         write_lock_bh(&tbl->lock);
234
235         list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
236                 if (refcount_read(&n->refcnt) == 1) {
237                         bool remove = false;
238
239                         write_lock(&n->lock);
240                         if ((n->nud_state == NUD_FAILED) ||
241                             time_after(tref, n->updated))
242                                 remove = true;
243                         write_unlock(&n->lock);
244
245                         if (remove && neigh_remove_one(n, tbl))
246                                 shrunk++;
247                         if (shrunk >= max_clean)
248                                 break;
249                 }
250         }
251
252         tbl->last_flush = jiffies;
253
254         write_unlock_bh(&tbl->lock);
255
256         return shrunk;
257 }
258
259 static void neigh_add_timer(struct neighbour *n, unsigned long when)
260 {
261         neigh_hold(n);
262         if (unlikely(mod_timer(&n->timer, when))) {
263                 printk("NEIGH: BUG, double timer add, state is %x\n",
264                        n->nud_state);
265                 dump_stack();
266         }
267 }
268
269 static int neigh_del_timer(struct neighbour *n)
270 {
271         if ((n->nud_state & NUD_IN_TIMER) &&
272             del_timer(&n->timer)) {
273                 neigh_release(n);
274                 return 1;
275         }
276         return 0;
277 }
278
279 static void pneigh_queue_purge(struct sk_buff_head *list)
280 {
281         struct sk_buff *skb;
282
283         while ((skb = skb_dequeue(list)) != NULL) {
284                 dev_put(skb->dev);
285                 kfree_skb(skb);
286         }
287 }
288
289 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
290                             bool skip_perm)
291 {
292         int i;
293         struct neigh_hash_table *nht;
294
295         nht = rcu_dereference_protected(tbl->nht,
296                                         lockdep_is_held(&tbl->lock));
297
298         for (i = 0; i < (1 << nht->hash_shift); i++) {
299                 struct neighbour *n;
300                 struct neighbour __rcu **np = &nht->hash_buckets[i];
301
302                 while ((n = rcu_dereference_protected(*np,
303                                         lockdep_is_held(&tbl->lock))) != NULL) {
304                         if (dev && n->dev != dev) {
305                                 np = &n->next;
306                                 continue;
307                         }
308                         if (skip_perm && n->nud_state & NUD_PERMANENT) {
309                                 np = &n->next;
310                                 continue;
311                         }
312                         rcu_assign_pointer(*np,
313                                    rcu_dereference_protected(n->next,
314                                                 lockdep_is_held(&tbl->lock)));
315                         write_lock(&n->lock);
316                         neigh_del_timer(n);
317                         neigh_mark_dead(n);
318                         if (refcount_read(&n->refcnt) != 1) {
319                                 /* The most unpleasant situation.
320                                    We must destroy neighbour entry,
321                                    but someone still uses it.
322
323                                    The destroy will be delayed until
324                                    the last user releases us, but
325                                    we must kill timers etc. and move
326                                    it to safe state.
327                                  */
328                                 __skb_queue_purge(&n->arp_queue);
329                                 n->arp_queue_len_bytes = 0;
330                                 n->output = neigh_blackhole;
331                                 if (n->nud_state & NUD_VALID)
332                                         n->nud_state = NUD_NOARP;
333                                 else
334                                         n->nud_state = NUD_NONE;
335                                 neigh_dbg(2, "neigh %p is stray\n", n);
336                         }
337                         write_unlock(&n->lock);
338                         neigh_cleanup_and_release(n);
339                 }
340         }
341 }
342
343 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
344 {
345         write_lock_bh(&tbl->lock);
346         neigh_flush_dev(tbl, dev, false);
347         write_unlock_bh(&tbl->lock);
348 }
349 EXPORT_SYMBOL(neigh_changeaddr);
350
351 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
352                           bool skip_perm)
353 {
354         write_lock_bh(&tbl->lock);
355         neigh_flush_dev(tbl, dev, skip_perm);
356         pneigh_ifdown_and_unlock(tbl, dev);
357
358         del_timer_sync(&tbl->proxy_timer);
359         pneigh_queue_purge(&tbl->proxy_queue);
360         return 0;
361 }
362
363 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
364 {
365         __neigh_ifdown(tbl, dev, true);
366         return 0;
367 }
368 EXPORT_SYMBOL(neigh_carrier_down);
369
370 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
371 {
372         __neigh_ifdown(tbl, dev, false);
373         return 0;
374 }
375 EXPORT_SYMBOL(neigh_ifdown);
376
377 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
378                                      struct net_device *dev,
379                                      bool exempt_from_gc)
380 {
381         struct neighbour *n = NULL;
382         unsigned long now = jiffies;
383         int entries;
384
385         if (exempt_from_gc)
386                 goto do_alloc;
387
388         entries = atomic_inc_return(&tbl->gc_entries) - 1;
389         if (entries >= tbl->gc_thresh3 ||
390             (entries >= tbl->gc_thresh2 &&
391              time_after(now, tbl->last_flush + 5 * HZ))) {
392                 if (!neigh_forced_gc(tbl) &&
393                     entries >= tbl->gc_thresh3) {
394                         net_info_ratelimited("%s: neighbor table overflow!\n",
395                                              tbl->id);
396                         NEIGH_CACHE_STAT_INC(tbl, table_fulls);
397                         goto out_entries;
398                 }
399         }
400
401 do_alloc:
402         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
403         if (!n)
404                 goto out_entries;
405
406         __skb_queue_head_init(&n->arp_queue);
407         rwlock_init(&n->lock);
408         seqlock_init(&n->ha_lock);
409         n->updated        = n->used = now;
410         n->nud_state      = NUD_NONE;
411         n->output         = neigh_blackhole;
412         seqlock_init(&n->hh.hh_lock);
413         n->parms          = neigh_parms_clone(&tbl->parms);
414         timer_setup(&n->timer, neigh_timer_handler, 0);
415
416         NEIGH_CACHE_STAT_INC(tbl, allocs);
417         n->tbl            = tbl;
418         refcount_set(&n->refcnt, 1);
419         n->dead           = 1;
420         INIT_LIST_HEAD(&n->gc_list);
421
422         atomic_inc(&tbl->entries);
423 out:
424         return n;
425
426 out_entries:
427         if (!exempt_from_gc)
428                 atomic_dec(&tbl->gc_entries);
429         goto out;
430 }
431
432 static void neigh_get_hash_rnd(u32 *x)
433 {
434         *x = get_random_u32() | 1;
435 }
436
437 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
438 {
439         size_t size = (1 << shift) * sizeof(struct neighbour *);
440         struct neigh_hash_table *ret;
441         struct neighbour __rcu **buckets;
442         int i;
443
444         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
445         if (!ret)
446                 return NULL;
447         if (size <= PAGE_SIZE) {
448                 buckets = kzalloc(size, GFP_ATOMIC);
449         } else {
450                 buckets = (struct neighbour __rcu **)
451                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
452                                            get_order(size));
453                 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
454         }
455         if (!buckets) {
456                 kfree(ret);
457                 return NULL;
458         }
459         ret->hash_buckets = buckets;
460         ret->hash_shift = shift;
461         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
462                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
463         return ret;
464 }
465
466 static void neigh_hash_free_rcu(struct rcu_head *head)
467 {
468         struct neigh_hash_table *nht = container_of(head,
469                                                     struct neigh_hash_table,
470                                                     rcu);
471         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
472         struct neighbour __rcu **buckets = nht->hash_buckets;
473
474         if (size <= PAGE_SIZE) {
475                 kfree(buckets);
476         } else {
477                 kmemleak_free(buckets);
478                 free_pages((unsigned long)buckets, get_order(size));
479         }
480         kfree(nht);
481 }
482
483 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
484                                                 unsigned long new_shift)
485 {
486         unsigned int i, hash;
487         struct neigh_hash_table *new_nht, *old_nht;
488
489         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
490
491         old_nht = rcu_dereference_protected(tbl->nht,
492                                             lockdep_is_held(&tbl->lock));
493         new_nht = neigh_hash_alloc(new_shift);
494         if (!new_nht)
495                 return old_nht;
496
497         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
498                 struct neighbour *n, *next;
499
500                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
501                                                    lockdep_is_held(&tbl->lock));
502                      n != NULL;
503                      n = next) {
504                         hash = tbl->hash(n->primary_key, n->dev,
505                                          new_nht->hash_rnd);
506
507                         hash >>= (32 - new_nht->hash_shift);
508                         next = rcu_dereference_protected(n->next,
509                                                 lockdep_is_held(&tbl->lock));
510
511                         rcu_assign_pointer(n->next,
512                                            rcu_dereference_protected(
513                                                 new_nht->hash_buckets[hash],
514                                                 lockdep_is_held(&tbl->lock)));
515                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
516                 }
517         }
518
519         rcu_assign_pointer(tbl->nht, new_nht);
520         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
521         return new_nht;
522 }
523
524 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
525                                struct net_device *dev)
526 {
527         struct neighbour *n;
528
529         NEIGH_CACHE_STAT_INC(tbl, lookups);
530
531         rcu_read_lock_bh();
532         n = __neigh_lookup_noref(tbl, pkey, dev);
533         if (n) {
534                 if (!refcount_inc_not_zero(&n->refcnt))
535                         n = NULL;
536                 NEIGH_CACHE_STAT_INC(tbl, hits);
537         }
538
539         rcu_read_unlock_bh();
540         return n;
541 }
542 EXPORT_SYMBOL(neigh_lookup);
543
544 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
545                                      const void *pkey)
546 {
547         struct neighbour *n;
548         unsigned int key_len = tbl->key_len;
549         u32 hash_val;
550         struct neigh_hash_table *nht;
551
552         NEIGH_CACHE_STAT_INC(tbl, lookups);
553
554         rcu_read_lock_bh();
555         nht = rcu_dereference_bh(tbl->nht);
556         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
557
558         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
559              n != NULL;
560              n = rcu_dereference_bh(n->next)) {
561                 if (!memcmp(n->primary_key, pkey, key_len) &&
562                     net_eq(dev_net(n->dev), net)) {
563                         if (!refcount_inc_not_zero(&n->refcnt))
564                                 n = NULL;
565                         NEIGH_CACHE_STAT_INC(tbl, hits);
566                         break;
567                 }
568         }
569
570         rcu_read_unlock_bh();
571         return n;
572 }
573 EXPORT_SYMBOL(neigh_lookup_nodev);
574
575 static struct neighbour *___neigh_create(struct neigh_table *tbl,
576                                          const void *pkey,
577                                          struct net_device *dev,
578                                          bool exempt_from_gc, bool want_ref)
579 {
580         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
581         u32 hash_val;
582         unsigned int key_len = tbl->key_len;
583         int error;
584         struct neigh_hash_table *nht;
585
586         if (!n) {
587                 rc = ERR_PTR(-ENOBUFS);
588                 goto out;
589         }
590
591         memcpy(n->primary_key, pkey, key_len);
592         n->dev = dev;
593         dev_hold(dev);
594
595         /* Protocol specific setup. */
596         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
597                 rc = ERR_PTR(error);
598                 goto out_neigh_release;
599         }
600
601         if (dev->netdev_ops->ndo_neigh_construct) {
602                 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
603                 if (error < 0) {
604                         rc = ERR_PTR(error);
605                         goto out_neigh_release;
606                 }
607         }
608
609         /* Device specific setup. */
610         if (n->parms->neigh_setup &&
611             (error = n->parms->neigh_setup(n)) < 0) {
612                 rc = ERR_PTR(error);
613                 goto out_neigh_release;
614         }
615
616         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
617
618         write_lock_bh(&tbl->lock);
619         nht = rcu_dereference_protected(tbl->nht,
620                                         lockdep_is_held(&tbl->lock));
621
622         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
623                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
624
625         hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
626
627         if (n->parms->dead) {
628                 rc = ERR_PTR(-EINVAL);
629                 goto out_tbl_unlock;
630         }
631
632         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
633                                             lockdep_is_held(&tbl->lock));
634              n1 != NULL;
635              n1 = rcu_dereference_protected(n1->next,
636                         lockdep_is_held(&tbl->lock))) {
637                 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
638                         if (want_ref)
639                                 neigh_hold(n1);
640                         rc = n1;
641                         goto out_tbl_unlock;
642                 }
643         }
644
645         n->dead = 0;
646         if (!exempt_from_gc)
647                 list_add_tail(&n->gc_list, &n->tbl->gc_list);
648
649         if (want_ref)
650                 neigh_hold(n);
651         rcu_assign_pointer(n->next,
652                            rcu_dereference_protected(nht->hash_buckets[hash_val],
653                                                      lockdep_is_held(&tbl->lock)));
654         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
655         write_unlock_bh(&tbl->lock);
656         neigh_dbg(2, "neigh %p is created\n", n);
657         rc = n;
658 out:
659         return rc;
660 out_tbl_unlock:
661         write_unlock_bh(&tbl->lock);
662 out_neigh_release:
663         neigh_release(n);
664         goto out;
665 }
666
667 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
668                                  struct net_device *dev, bool want_ref)
669 {
670         return ___neigh_create(tbl, pkey, dev, false, want_ref);
671 }
672 EXPORT_SYMBOL(__neigh_create);
673
674 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
675 {
676         u32 hash_val = *(u32 *)(pkey + key_len - 4);
677         hash_val ^= (hash_val >> 16);
678         hash_val ^= hash_val >> 8;
679         hash_val ^= hash_val >> 4;
680         hash_val &= PNEIGH_HASHMASK;
681         return hash_val;
682 }
683
684 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
685                                               struct net *net,
686                                               const void *pkey,
687                                               unsigned int key_len,
688                                               struct net_device *dev)
689 {
690         while (n) {
691                 if (!memcmp(n->key, pkey, key_len) &&
692                     net_eq(pneigh_net(n), net) &&
693                     (n->dev == dev || !n->dev))
694                         return n;
695                 n = n->next;
696         }
697         return NULL;
698 }
699
700 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
701                 struct net *net, const void *pkey, struct net_device *dev)
702 {
703         unsigned int key_len = tbl->key_len;
704         u32 hash_val = pneigh_hash(pkey, key_len);
705
706         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
707                                  net, pkey, key_len, dev);
708 }
709 EXPORT_SYMBOL_GPL(__pneigh_lookup);
710
711 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
712                                     struct net *net, const void *pkey,
713                                     struct net_device *dev, int creat)
714 {
715         struct pneigh_entry *n;
716         unsigned int key_len = tbl->key_len;
717         u32 hash_val = pneigh_hash(pkey, key_len);
718
719         read_lock_bh(&tbl->lock);
720         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
721                               net, pkey, key_len, dev);
722         read_unlock_bh(&tbl->lock);
723
724         if (n || !creat)
725                 goto out;
726
727         ASSERT_RTNL();
728
729         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
730         if (!n)
731                 goto out;
732
733         n->protocol = 0;
734         write_pnet(&n->net, net);
735         memcpy(n->key, pkey, key_len);
736         n->dev = dev;
737         if (dev)
738                 dev_hold(dev);
739
740         if (tbl->pconstructor && tbl->pconstructor(n)) {
741                 if (dev)
742                         dev_put(dev);
743                 kfree(n);
744                 n = NULL;
745                 goto out;
746         }
747
748         write_lock_bh(&tbl->lock);
749         n->next = tbl->phash_buckets[hash_val];
750         tbl->phash_buckets[hash_val] = n;
751         write_unlock_bh(&tbl->lock);
752 out:
753         return n;
754 }
755 EXPORT_SYMBOL(pneigh_lookup);
756
757
758 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
759                   struct net_device *dev)
760 {
761         struct pneigh_entry *n, **np;
762         unsigned int key_len = tbl->key_len;
763         u32 hash_val = pneigh_hash(pkey, key_len);
764
765         write_lock_bh(&tbl->lock);
766         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
767              np = &n->next) {
768                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
769                     net_eq(pneigh_net(n), net)) {
770                         *np = n->next;
771                         write_unlock_bh(&tbl->lock);
772                         if (tbl->pdestructor)
773                                 tbl->pdestructor(n);
774                         if (n->dev)
775                                 dev_put(n->dev);
776                         kfree(n);
777                         return 0;
778                 }
779         }
780         write_unlock_bh(&tbl->lock);
781         return -ENOENT;
782 }
783
784 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
785                                     struct net_device *dev)
786 {
787         struct pneigh_entry *n, **np, *freelist = NULL;
788         u32 h;
789
790         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
791                 np = &tbl->phash_buckets[h];
792                 while ((n = *np) != NULL) {
793                         if (!dev || n->dev == dev) {
794                                 *np = n->next;
795                                 n->next = freelist;
796                                 freelist = n;
797                                 continue;
798                         }
799                         np = &n->next;
800                 }
801         }
802         write_unlock_bh(&tbl->lock);
803         while ((n = freelist)) {
804                 freelist = n->next;
805                 n->next = NULL;
806                 if (tbl->pdestructor)
807                         tbl->pdestructor(n);
808                 if (n->dev)
809                         dev_put(n->dev);
810                 kfree(n);
811         }
812         return -ENOENT;
813 }
814
815 static void neigh_parms_destroy(struct neigh_parms *parms);
816
817 static inline void neigh_parms_put(struct neigh_parms *parms)
818 {
819         if (refcount_dec_and_test(&parms->refcnt))
820                 neigh_parms_destroy(parms);
821 }
822
823 /*
824  *      neighbour must already be out of the table;
825  *
826  */
827 void neigh_destroy(struct neighbour *neigh)
828 {
829         struct net_device *dev = neigh->dev;
830
831         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
832
833         if (!neigh->dead) {
834                 pr_warn("Destroying alive neighbour %p\n", neigh);
835                 dump_stack();
836                 return;
837         }
838
839         if (neigh_del_timer(neigh))
840                 pr_warn("Impossible event\n");
841
842         write_lock_bh(&neigh->lock);
843         __skb_queue_purge(&neigh->arp_queue);
844         write_unlock_bh(&neigh->lock);
845         neigh->arp_queue_len_bytes = 0;
846
847         if (dev->netdev_ops->ndo_neigh_destroy)
848                 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
849
850         dev_put(dev);
851         neigh_parms_put(neigh->parms);
852
853         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
854
855         atomic_dec(&neigh->tbl->entries);
856         kfree_rcu(neigh, rcu);
857 }
858 EXPORT_SYMBOL(neigh_destroy);
859
860 /* Neighbour state is suspicious;
861    disable fast path.
862
863    Called with write_locked neigh.
864  */
865 static void neigh_suspect(struct neighbour *neigh)
866 {
867         neigh_dbg(2, "neigh %p is suspected\n", neigh);
868
869         neigh->output = neigh->ops->output;
870 }
871
872 /* Neighbour state is OK;
873    enable fast path.
874
875    Called with write_locked neigh.
876  */
877 static void neigh_connect(struct neighbour *neigh)
878 {
879         neigh_dbg(2, "neigh %p is connected\n", neigh);
880
881         neigh->output = neigh->ops->connected_output;
882 }
883
884 static void neigh_periodic_work(struct work_struct *work)
885 {
886         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
887         struct neighbour *n;
888         struct neighbour __rcu **np;
889         unsigned int i;
890         struct neigh_hash_table *nht;
891
892         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
893
894         write_lock_bh(&tbl->lock);
895         nht = rcu_dereference_protected(tbl->nht,
896                                         lockdep_is_held(&tbl->lock));
897
898         /*
899          *      periodically recompute ReachableTime from random function
900          */
901
902         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
903                 struct neigh_parms *p;
904                 tbl->last_rand = jiffies;
905                 list_for_each_entry(p, &tbl->parms_list, list)
906                         p->reachable_time =
907                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
908         }
909
910         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
911                 goto out;
912
913         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
914                 np = &nht->hash_buckets[i];
915
916                 while ((n = rcu_dereference_protected(*np,
917                                 lockdep_is_held(&tbl->lock))) != NULL) {
918                         unsigned int state;
919
920                         write_lock(&n->lock);
921
922                         state = n->nud_state;
923                         if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
924                             (n->flags & NTF_EXT_LEARNED)) {
925                                 write_unlock(&n->lock);
926                                 goto next_elt;
927                         }
928
929                         if (time_before(n->used, n->confirmed))
930                                 n->used = n->confirmed;
931
932                         if (refcount_read(&n->refcnt) == 1 &&
933                             (state == NUD_FAILED ||
934                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
935                                 *np = n->next;
936                                 neigh_mark_dead(n);
937                                 write_unlock(&n->lock);
938                                 neigh_cleanup_and_release(n);
939                                 continue;
940                         }
941                         write_unlock(&n->lock);
942
943 next_elt:
944                         np = &n->next;
945                 }
946                 /*
947                  * It's fine to release lock here, even if hash table
948                  * grows while we are preempted.
949                  */
950                 write_unlock_bh(&tbl->lock);
951                 cond_resched();
952                 write_lock_bh(&tbl->lock);
953                 nht = rcu_dereference_protected(tbl->nht,
954                                                 lockdep_is_held(&tbl->lock));
955         }
956 out:
957         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
958          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
959          * BASE_REACHABLE_TIME.
960          */
961         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
962                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
963         write_unlock_bh(&tbl->lock);
964 }
965
966 static __inline__ int neigh_max_probes(struct neighbour *n)
967 {
968         struct neigh_parms *p = n->parms;
969         return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
970                (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
971                 NEIGH_VAR(p, MCAST_PROBES));
972 }
973
974 static void neigh_invalidate(struct neighbour *neigh)
975         __releases(neigh->lock)
976         __acquires(neigh->lock)
977 {
978         struct sk_buff *skb;
979
980         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
981         neigh_dbg(2, "neigh %p is failed\n", neigh);
982         neigh->updated = jiffies;
983
984         /* It is very thin place. report_unreachable is very complicated
985            routine. Particularly, it can hit the same neighbour entry!
986
987            So that, we try to be accurate and avoid dead loop. --ANK
988          */
989         while (neigh->nud_state == NUD_FAILED &&
990                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
991                 write_unlock(&neigh->lock);
992                 neigh->ops->error_report(neigh, skb);
993                 write_lock(&neigh->lock);
994         }
995         __skb_queue_purge(&neigh->arp_queue);
996         neigh->arp_queue_len_bytes = 0;
997 }
998
999 static void neigh_probe(struct neighbour *neigh)
1000         __releases(neigh->lock)
1001 {
1002         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1003         /* keep skb alive even if arp_queue overflows */
1004         if (skb)
1005                 skb = skb_clone(skb, GFP_ATOMIC);
1006         write_unlock(&neigh->lock);
1007         if (neigh->ops->solicit)
1008                 neigh->ops->solicit(neigh, skb);
1009         atomic_inc(&neigh->probes);
1010         consume_skb(skb);
1011 }
1012
1013 /* Called when a timer expires for a neighbour entry. */
1014
1015 static void neigh_timer_handler(struct timer_list *t)
1016 {
1017         unsigned long now, next;
1018         struct neighbour *neigh = from_timer(neigh, t, timer);
1019         unsigned int state;
1020         int notify = 0;
1021
1022         write_lock(&neigh->lock);
1023
1024         state = neigh->nud_state;
1025         now = jiffies;
1026         next = now + HZ;
1027
1028         if (!(state & NUD_IN_TIMER))
1029                 goto out;
1030
1031         if (state & NUD_REACHABLE) {
1032                 if (time_before_eq(now,
1033                                    neigh->confirmed + neigh->parms->reachable_time)) {
1034                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
1035                         next = neigh->confirmed + neigh->parms->reachable_time;
1036                 } else if (time_before_eq(now,
1037                                           neigh->used +
1038                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1039                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
1040                         neigh->nud_state = NUD_DELAY;
1041                         neigh->updated = jiffies;
1042                         neigh_suspect(neigh);
1043                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1044                 } else {
1045                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
1046                         neigh->nud_state = NUD_STALE;
1047                         neigh->updated = jiffies;
1048                         neigh_suspect(neigh);
1049                         notify = 1;
1050                 }
1051         } else if (state & NUD_DELAY) {
1052                 if (time_before_eq(now,
1053                                    neigh->confirmed +
1054                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1055                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1056                         neigh->nud_state = NUD_REACHABLE;
1057                         neigh->updated = jiffies;
1058                         neigh_connect(neigh);
1059                         notify = 1;
1060                         next = neigh->confirmed + neigh->parms->reachable_time;
1061                 } else {
1062                         neigh_dbg(2, "neigh %p is probed\n", neigh);
1063                         neigh->nud_state = NUD_PROBE;
1064                         neigh->updated = jiffies;
1065                         atomic_set(&neigh->probes, 0);
1066                         notify = 1;
1067                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1068                 }
1069         } else {
1070                 /* NUD_PROBE|NUD_INCOMPLETE */
1071                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
1072         }
1073
1074         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1075             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1076                 neigh->nud_state = NUD_FAILED;
1077                 notify = 1;
1078                 neigh_invalidate(neigh);
1079                 goto out;
1080         }
1081
1082         if (neigh->nud_state & NUD_IN_TIMER) {
1083                 if (time_before(next, jiffies + HZ/2))
1084                         next = jiffies + HZ/2;
1085                 if (!mod_timer(&neigh->timer, next))
1086                         neigh_hold(neigh);
1087         }
1088         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1089                 neigh_probe(neigh);
1090         } else {
1091 out:
1092                 write_unlock(&neigh->lock);
1093         }
1094
1095         if (notify)
1096                 neigh_update_notify(neigh, 0);
1097
1098         neigh_release(neigh);
1099 }
1100
1101 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1102 {
1103         int rc;
1104         bool immediate_probe = false;
1105
1106         write_lock_bh(&neigh->lock);
1107
1108         rc = 0;
1109         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1110                 goto out_unlock_bh;
1111         if (neigh->dead)
1112                 goto out_dead;
1113
1114         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1115                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1116                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
1117                         unsigned long next, now = jiffies;
1118
1119                         atomic_set(&neigh->probes,
1120                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
1121                         neigh->nud_state     = NUD_INCOMPLETE;
1122                         neigh->updated = now;
1123                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1124                                          HZ/2);
1125                         neigh_add_timer(neigh, next);
1126                         immediate_probe = true;
1127                 } else {
1128                         neigh->nud_state = NUD_FAILED;
1129                         neigh->updated = jiffies;
1130                         write_unlock_bh(&neigh->lock);
1131
1132                         kfree_skb(skb);
1133                         return 1;
1134                 }
1135         } else if (neigh->nud_state & NUD_STALE) {
1136                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1137                 neigh->nud_state = NUD_DELAY;
1138                 neigh->updated = jiffies;
1139                 neigh_add_timer(neigh, jiffies +
1140                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1141         }
1142
1143         if (neigh->nud_state == NUD_INCOMPLETE) {
1144                 if (skb) {
1145                         while (neigh->arp_queue_len_bytes + skb->truesize >
1146                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1147                                 struct sk_buff *buff;
1148
1149                                 buff = __skb_dequeue(&neigh->arp_queue);
1150                                 if (!buff)
1151                                         break;
1152                                 neigh->arp_queue_len_bytes -= buff->truesize;
1153                                 kfree_skb(buff);
1154                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1155                         }
1156                         skb_dst_force(skb);
1157                         __skb_queue_tail(&neigh->arp_queue, skb);
1158                         neigh->arp_queue_len_bytes += skb->truesize;
1159                 }
1160                 rc = 1;
1161         }
1162 out_unlock_bh:
1163         if (immediate_probe)
1164                 neigh_probe(neigh);
1165         else
1166                 write_unlock(&neigh->lock);
1167         local_bh_enable();
1168         return rc;
1169
1170 out_dead:
1171         if (neigh->nud_state & NUD_STALE)
1172                 goto out_unlock_bh;
1173         write_unlock_bh(&neigh->lock);
1174         kfree_skb(skb);
1175         return 1;
1176 }
1177 EXPORT_SYMBOL(__neigh_event_send);
1178
1179 static void neigh_update_hhs(struct neighbour *neigh)
1180 {
1181         struct hh_cache *hh;
1182         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1183                 = NULL;
1184
1185         if (neigh->dev->header_ops)
1186                 update = neigh->dev->header_ops->cache_update;
1187
1188         if (update) {
1189                 hh = &neigh->hh;
1190                 if (hh->hh_len) {
1191                         write_seqlock_bh(&hh->hh_lock);
1192                         update(hh, neigh->dev, neigh->ha);
1193                         write_sequnlock_bh(&hh->hh_lock);
1194                 }
1195         }
1196 }
1197
1198
1199
1200 /* Generic update routine.
1201    -- lladdr is new lladdr or NULL, if it is not supplied.
1202    -- new    is new state.
1203    -- flags
1204         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1205                                 if it is different.
1206         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1207                                 lladdr instead of overriding it
1208                                 if it is different.
1209         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1210
1211         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1212                                 NTF_ROUTER flag.
1213         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1214                                 a router.
1215
1216    Caller MUST hold reference count on the entry.
1217  */
1218
1219 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1220                           u8 new, u32 flags, u32 nlmsg_pid,
1221                           struct netlink_ext_ack *extack)
1222 {
1223         bool ext_learn_change = false;
1224         u8 old;
1225         int err;
1226         int notify = 0;
1227         struct net_device *dev;
1228         int update_isrouter = 0;
1229
1230         write_lock_bh(&neigh->lock);
1231
1232         dev    = neigh->dev;
1233         old    = neigh->nud_state;
1234         err    = -EPERM;
1235
1236         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1237             (old & (NUD_NOARP | NUD_PERMANENT)))
1238                 goto out;
1239         if (neigh->dead) {
1240                 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1241                 goto out;
1242         }
1243
1244         ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
1245
1246         if (!(new & NUD_VALID)) {
1247                 neigh_del_timer(neigh);
1248                 if (old & NUD_CONNECTED)
1249                         neigh_suspect(neigh);
1250                 neigh->nud_state = new;
1251                 err = 0;
1252                 notify = old & NUD_VALID;
1253                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1254                     (new & NUD_FAILED)) {
1255                         neigh_invalidate(neigh);
1256                         notify = 1;
1257                 }
1258                 goto out;
1259         }
1260
1261         /* Compare new lladdr with cached one */
1262         if (!dev->addr_len) {
1263                 /* First case: device needs no address. */
1264                 lladdr = neigh->ha;
1265         } else if (lladdr) {
1266                 /* The second case: if something is already cached
1267                    and a new address is proposed:
1268                    - compare new & old
1269                    - if they are different, check override flag
1270                  */
1271                 if ((old & NUD_VALID) &&
1272                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1273                         lladdr = neigh->ha;
1274         } else {
1275                 /* No address is supplied; if we know something,
1276                    use it, otherwise discard the request.
1277                  */
1278                 err = -EINVAL;
1279                 if (!(old & NUD_VALID)) {
1280                         NL_SET_ERR_MSG(extack, "No link layer address given");
1281                         goto out;
1282                 }
1283                 lladdr = neigh->ha;
1284         }
1285
1286         /* Update confirmed timestamp for neighbour entry after we
1287          * received ARP packet even if it doesn't change IP to MAC binding.
1288          */
1289         if (new & NUD_CONNECTED)
1290                 neigh->confirmed = jiffies;
1291
1292         /* If entry was valid and address is not changed,
1293            do not change entry state, if new one is STALE.
1294          */
1295         err = 0;
1296         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1297         if (old & NUD_VALID) {
1298                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1299                         update_isrouter = 0;
1300                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1301                             (old & NUD_CONNECTED)) {
1302                                 lladdr = neigh->ha;
1303                                 new = NUD_STALE;
1304                         } else
1305                                 goto out;
1306                 } else {
1307                         if (lladdr == neigh->ha && new == NUD_STALE &&
1308                             !(flags & NEIGH_UPDATE_F_ADMIN))
1309                                 new = old;
1310                 }
1311         }
1312
1313         /* Update timestamp only once we know we will make a change to the
1314          * neighbour entry. Otherwise we risk to move the locktime window with
1315          * noop updates and ignore relevant ARP updates.
1316          */
1317         if (new != old || lladdr != neigh->ha)
1318                 neigh->updated = jiffies;
1319
1320         if (new != old) {
1321                 neigh_del_timer(neigh);
1322                 if (new & NUD_PROBE)
1323                         atomic_set(&neigh->probes, 0);
1324                 if (new & NUD_IN_TIMER)
1325                         neigh_add_timer(neigh, (jiffies +
1326                                                 ((new & NUD_REACHABLE) ?
1327                                                  neigh->parms->reachable_time :
1328                                                  0)));
1329                 neigh->nud_state = new;
1330                 notify = 1;
1331         }
1332
1333         if (lladdr != neigh->ha) {
1334                 write_seqlock(&neigh->ha_lock);
1335                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1336                 write_sequnlock(&neigh->ha_lock);
1337                 neigh_update_hhs(neigh);
1338                 if (!(new & NUD_CONNECTED))
1339                         neigh->confirmed = jiffies -
1340                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1341                 notify = 1;
1342         }
1343         if (new == old)
1344                 goto out;
1345         if (new & NUD_CONNECTED)
1346                 neigh_connect(neigh);
1347         else
1348                 neigh_suspect(neigh);
1349         if (!(old & NUD_VALID)) {
1350                 struct sk_buff *skb;
1351
1352                 /* Again: avoid dead loop if something went wrong */
1353
1354                 while (neigh->nud_state & NUD_VALID &&
1355                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1356                         struct dst_entry *dst = skb_dst(skb);
1357                         struct neighbour *n2, *n1 = neigh;
1358                         write_unlock_bh(&neigh->lock);
1359
1360                         rcu_read_lock();
1361
1362                         /* Why not just use 'neigh' as-is?  The problem is that
1363                          * things such as shaper, eql, and sch_teql can end up
1364                          * using alternative, different, neigh objects to output
1365                          * the packet in the output path.  So what we need to do
1366                          * here is re-lookup the top-level neigh in the path so
1367                          * we can reinject the packet there.
1368                          */
1369                         n2 = NULL;
1370                         if (dst) {
1371                                 n2 = dst_neigh_lookup_skb(dst, skb);
1372                                 if (n2)
1373                                         n1 = n2;
1374                         }
1375                         n1->output(n1, skb);
1376                         if (n2)
1377                                 neigh_release(n2);
1378                         rcu_read_unlock();
1379
1380                         write_lock_bh(&neigh->lock);
1381                 }
1382                 __skb_queue_purge(&neigh->arp_queue);
1383                 neigh->arp_queue_len_bytes = 0;
1384         }
1385 out:
1386         if (update_isrouter)
1387                 neigh_update_is_router(neigh, flags, &notify);
1388         write_unlock_bh(&neigh->lock);
1389
1390         if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
1391                 neigh_update_gc_list(neigh);
1392
1393         if (notify)
1394                 neigh_update_notify(neigh, nlmsg_pid);
1395
1396         return err;
1397 }
1398
1399 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1400                  u32 flags, u32 nlmsg_pid)
1401 {
1402         return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1403 }
1404 EXPORT_SYMBOL(neigh_update);
1405
1406 /* Update the neigh to listen temporarily for probe responses, even if it is
1407  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1408  */
1409 void __neigh_set_probe_once(struct neighbour *neigh)
1410 {
1411         if (neigh->dead)
1412                 return;
1413         neigh->updated = jiffies;
1414         if (!(neigh->nud_state & NUD_FAILED))
1415                 return;
1416         neigh->nud_state = NUD_INCOMPLETE;
1417         atomic_set(&neigh->probes, neigh_max_probes(neigh));
1418         neigh_add_timer(neigh,
1419                         jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1420 }
1421 EXPORT_SYMBOL(__neigh_set_probe_once);
1422
1423 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1424                                  u8 *lladdr, void *saddr,
1425                                  struct net_device *dev)
1426 {
1427         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1428                                                  lladdr || !dev->addr_len);
1429         if (neigh)
1430                 neigh_update(neigh, lladdr, NUD_STALE,
1431                              NEIGH_UPDATE_F_OVERRIDE, 0);
1432         return neigh;
1433 }
1434 EXPORT_SYMBOL(neigh_event_ns);
1435
1436 /* called with read_lock_bh(&n->lock); */
1437 static void neigh_hh_init(struct neighbour *n)
1438 {
1439         struct net_device *dev = n->dev;
1440         __be16 prot = n->tbl->protocol;
1441         struct hh_cache *hh = &n->hh;
1442
1443         write_lock_bh(&n->lock);
1444
1445         /* Only one thread can come in here and initialize the
1446          * hh_cache entry.
1447          */
1448         if (!hh->hh_len)
1449                 dev->header_ops->cache(n, hh, prot);
1450
1451         write_unlock_bh(&n->lock);
1452 }
1453
1454 /* Slow and careful. */
1455
1456 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1457 {
1458         int rc = 0;
1459
1460         if (!neigh_event_send(neigh, skb)) {
1461                 int err;
1462                 struct net_device *dev = neigh->dev;
1463                 unsigned int seq;
1464
1465                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1466                         neigh_hh_init(neigh);
1467
1468                 do {
1469                         __skb_pull(skb, skb_network_offset(skb));
1470                         seq = read_seqbegin(&neigh->ha_lock);
1471                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1472                                               neigh->ha, NULL, skb->len);
1473                 } while (read_seqretry(&neigh->ha_lock, seq));
1474
1475                 if (err >= 0)
1476                         rc = dev_queue_xmit(skb);
1477                 else
1478                         goto out_kfree_skb;
1479         }
1480 out:
1481         return rc;
1482 out_kfree_skb:
1483         rc = -EINVAL;
1484         kfree_skb(skb);
1485         goto out;
1486 }
1487 EXPORT_SYMBOL(neigh_resolve_output);
1488
1489 /* As fast as possible without hh cache */
1490
1491 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1492 {
1493         struct net_device *dev = neigh->dev;
1494         unsigned int seq;
1495         int err;
1496
1497         do {
1498                 __skb_pull(skb, skb_network_offset(skb));
1499                 seq = read_seqbegin(&neigh->ha_lock);
1500                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1501                                       neigh->ha, NULL, skb->len);
1502         } while (read_seqretry(&neigh->ha_lock, seq));
1503
1504         if (err >= 0)
1505                 err = dev_queue_xmit(skb);
1506         else {
1507                 err = -EINVAL;
1508                 kfree_skb(skb);
1509         }
1510         return err;
1511 }
1512 EXPORT_SYMBOL(neigh_connected_output);
1513
1514 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1515 {
1516         return dev_queue_xmit(skb);
1517 }
1518 EXPORT_SYMBOL(neigh_direct_output);
1519
1520 static void neigh_proxy_process(struct timer_list *t)
1521 {
1522         struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1523         long sched_next = 0;
1524         unsigned long now = jiffies;
1525         struct sk_buff *skb, *n;
1526
1527         spin_lock(&tbl->proxy_queue.lock);
1528
1529         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1530                 long tdif = NEIGH_CB(skb)->sched_next - now;
1531
1532                 if (tdif <= 0) {
1533                         struct net_device *dev = skb->dev;
1534
1535                         __skb_unlink(skb, &tbl->proxy_queue);
1536                         if (tbl->proxy_redo && netif_running(dev)) {
1537                                 rcu_read_lock();
1538                                 tbl->proxy_redo(skb);
1539                                 rcu_read_unlock();
1540                         } else {
1541                                 kfree_skb(skb);
1542                         }
1543
1544                         dev_put(dev);
1545                 } else if (!sched_next || tdif < sched_next)
1546                         sched_next = tdif;
1547         }
1548         del_timer(&tbl->proxy_timer);
1549         if (sched_next)
1550                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1551         spin_unlock(&tbl->proxy_queue.lock);
1552 }
1553
1554 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1555                     struct sk_buff *skb)
1556 {
1557         unsigned long now = jiffies;
1558
1559         unsigned long sched_next = now + (prandom_u32() %
1560                                           NEIGH_VAR(p, PROXY_DELAY));
1561
1562         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1563                 kfree_skb(skb);
1564                 return;
1565         }
1566
1567         NEIGH_CB(skb)->sched_next = sched_next;
1568         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1569
1570         spin_lock(&tbl->proxy_queue.lock);
1571         if (del_timer(&tbl->proxy_timer)) {
1572                 if (time_before(tbl->proxy_timer.expires, sched_next))
1573                         sched_next = tbl->proxy_timer.expires;
1574         }
1575         skb_dst_drop(skb);
1576         dev_hold(skb->dev);
1577         __skb_queue_tail(&tbl->proxy_queue, skb);
1578         mod_timer(&tbl->proxy_timer, sched_next);
1579         spin_unlock(&tbl->proxy_queue.lock);
1580 }
1581 EXPORT_SYMBOL(pneigh_enqueue);
1582
1583 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1584                                                       struct net *net, int ifindex)
1585 {
1586         struct neigh_parms *p;
1587
1588         list_for_each_entry(p, &tbl->parms_list, list) {
1589                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1590                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1591                         return p;
1592         }
1593
1594         return NULL;
1595 }
1596
1597 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1598                                       struct neigh_table *tbl)
1599 {
1600         struct neigh_parms *p;
1601         struct net *net = dev_net(dev);
1602         const struct net_device_ops *ops = dev->netdev_ops;
1603
1604         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1605         if (p) {
1606                 p->tbl            = tbl;
1607                 refcount_set(&p->refcnt, 1);
1608                 p->reachable_time =
1609                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1610                 dev_hold(dev);
1611                 p->dev = dev;
1612                 write_pnet(&p->net, net);
1613                 p->sysctl_table = NULL;
1614
1615                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1616                         dev_put(dev);
1617                         kfree(p);
1618                         return NULL;
1619                 }
1620
1621                 write_lock_bh(&tbl->lock);
1622                 list_add(&p->list, &tbl->parms.list);
1623                 write_unlock_bh(&tbl->lock);
1624
1625                 neigh_parms_data_state_cleanall(p);
1626         }
1627         return p;
1628 }
1629 EXPORT_SYMBOL(neigh_parms_alloc);
1630
1631 static void neigh_rcu_free_parms(struct rcu_head *head)
1632 {
1633         struct neigh_parms *parms =
1634                 container_of(head, struct neigh_parms, rcu_head);
1635
1636         neigh_parms_put(parms);
1637 }
1638
1639 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1640 {
1641         if (!parms || parms == &tbl->parms)
1642                 return;
1643         write_lock_bh(&tbl->lock);
1644         list_del(&parms->list);
1645         parms->dead = 1;
1646         write_unlock_bh(&tbl->lock);
1647         if (parms->dev)
1648                 dev_put(parms->dev);
1649         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1650 }
1651 EXPORT_SYMBOL(neigh_parms_release);
1652
1653 static void neigh_parms_destroy(struct neigh_parms *parms)
1654 {
1655         kfree(parms);
1656 }
1657
1658 static struct lock_class_key neigh_table_proxy_queue_class;
1659
1660 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1661
1662 void neigh_table_init(int index, struct neigh_table *tbl)
1663 {
1664         unsigned long now = jiffies;
1665         unsigned long phsize;
1666
1667         INIT_LIST_HEAD(&tbl->parms_list);
1668         INIT_LIST_HEAD(&tbl->gc_list);
1669         list_add(&tbl->parms.list, &tbl->parms_list);
1670         write_pnet(&tbl->parms.net, &init_net);
1671         refcount_set(&tbl->parms.refcnt, 1);
1672         tbl->parms.reachable_time =
1673                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1674
1675         tbl->stats = alloc_percpu(struct neigh_statistics);
1676         if (!tbl->stats)
1677                 panic("cannot create neighbour cache statistics");
1678
1679 #ifdef CONFIG_PROC_FS
1680         if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1681                               &neigh_stat_seq_ops, tbl))
1682                 panic("cannot create neighbour proc dir entry");
1683 #endif
1684
1685         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1686
1687         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1688         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1689
1690         if (!tbl->nht || !tbl->phash_buckets)
1691                 panic("cannot allocate neighbour cache hashes");
1692
1693         if (!tbl->entry_size)
1694                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1695                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1696         else
1697                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1698
1699         rwlock_init(&tbl->lock);
1700         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1701         queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1702                         tbl->parms.reachable_time);
1703         timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1704         skb_queue_head_init_class(&tbl->proxy_queue,
1705                         &neigh_table_proxy_queue_class);
1706
1707         tbl->last_flush = now;
1708         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1709
1710         neigh_tables[index] = tbl;
1711 }
1712 EXPORT_SYMBOL(neigh_table_init);
1713
1714 int neigh_table_clear(int index, struct neigh_table *tbl)
1715 {
1716         neigh_tables[index] = NULL;
1717         /* It is not clean... Fix it to unload IPv6 module safely */
1718         cancel_delayed_work_sync(&tbl->gc_work);
1719         del_timer_sync(&tbl->proxy_timer);
1720         pneigh_queue_purge(&tbl->proxy_queue);
1721         neigh_ifdown(tbl, NULL);
1722         if (atomic_read(&tbl->entries))
1723                 pr_crit("neighbour leakage\n");
1724
1725         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1726                  neigh_hash_free_rcu);
1727         tbl->nht = NULL;
1728
1729         kfree(tbl->phash_buckets);
1730         tbl->phash_buckets = NULL;
1731
1732         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1733
1734         free_percpu(tbl->stats);
1735         tbl->stats = NULL;
1736
1737         return 0;
1738 }
1739 EXPORT_SYMBOL(neigh_table_clear);
1740
1741 static struct neigh_table *neigh_find_table(int family)
1742 {
1743         struct neigh_table *tbl = NULL;
1744
1745         switch (family) {
1746         case AF_INET:
1747                 tbl = neigh_tables[NEIGH_ARP_TABLE];
1748                 break;
1749         case AF_INET6:
1750                 tbl = neigh_tables[NEIGH_ND_TABLE];
1751                 break;
1752         case AF_DECnet:
1753                 tbl = neigh_tables[NEIGH_DN_TABLE];
1754                 break;
1755         }
1756
1757         return tbl;
1758 }
1759
1760 const struct nla_policy nda_policy[NDA_MAX+1] = {
1761         [NDA_DST]               = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1762         [NDA_LLADDR]            = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1763         [NDA_CACHEINFO]         = { .len = sizeof(struct nda_cacheinfo) },
1764         [NDA_PROBES]            = { .type = NLA_U32 },
1765         [NDA_VLAN]              = { .type = NLA_U16 },
1766         [NDA_PORT]              = { .type = NLA_U16 },
1767         [NDA_VNI]               = { .type = NLA_U32 },
1768         [NDA_IFINDEX]           = { .type = NLA_U32 },
1769         [NDA_MASTER]            = { .type = NLA_U32 },
1770         [NDA_PROTOCOL]          = { .type = NLA_U8 },
1771 };
1772
1773 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1774                         struct netlink_ext_ack *extack)
1775 {
1776         struct net *net = sock_net(skb->sk);
1777         struct ndmsg *ndm;
1778         struct nlattr *dst_attr;
1779         struct neigh_table *tbl;
1780         struct neighbour *neigh;
1781         struct net_device *dev = NULL;
1782         int err = -EINVAL;
1783
1784         ASSERT_RTNL();
1785         if (nlmsg_len(nlh) < sizeof(*ndm))
1786                 goto out;
1787
1788         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1789         if (!dst_attr) {
1790                 NL_SET_ERR_MSG(extack, "Network address not specified");
1791                 goto out;
1792         }
1793
1794         ndm = nlmsg_data(nlh);
1795         if (ndm->ndm_ifindex) {
1796                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1797                 if (dev == NULL) {
1798                         err = -ENODEV;
1799                         goto out;
1800                 }
1801         }
1802
1803         tbl = neigh_find_table(ndm->ndm_family);
1804         if (tbl == NULL)
1805                 return -EAFNOSUPPORT;
1806
1807         if (nla_len(dst_attr) < (int)tbl->key_len) {
1808                 NL_SET_ERR_MSG(extack, "Invalid network address");
1809                 goto out;
1810         }
1811
1812         if (ndm->ndm_flags & NTF_PROXY) {
1813                 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1814                 goto out;
1815         }
1816
1817         if (dev == NULL)
1818                 goto out;
1819
1820         neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1821         if (neigh == NULL) {
1822                 err = -ENOENT;
1823                 goto out;
1824         }
1825
1826         err = __neigh_update(neigh, NULL, NUD_FAILED,
1827                              NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1828                              NETLINK_CB(skb).portid, extack);
1829         write_lock_bh(&tbl->lock);
1830         neigh_release(neigh);
1831         neigh_remove_one(neigh, tbl);
1832         write_unlock_bh(&tbl->lock);
1833
1834 out:
1835         return err;
1836 }
1837
1838 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1839                      struct netlink_ext_ack *extack)
1840 {
1841         int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1842                 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1843         struct net *net = sock_net(skb->sk);
1844         struct ndmsg *ndm;
1845         struct nlattr *tb[NDA_MAX+1];
1846         struct neigh_table *tbl;
1847         struct net_device *dev = NULL;
1848         struct neighbour *neigh;
1849         void *dst, *lladdr;
1850         u8 protocol = 0;
1851         int err;
1852
1853         ASSERT_RTNL();
1854         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
1855         if (err < 0)
1856                 goto out;
1857
1858         err = -EINVAL;
1859         if (!tb[NDA_DST]) {
1860                 NL_SET_ERR_MSG(extack, "Network address not specified");
1861                 goto out;
1862         }
1863
1864         ndm = nlmsg_data(nlh);
1865         if (ndm->ndm_ifindex) {
1866                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1867                 if (dev == NULL) {
1868                         err = -ENODEV;
1869                         goto out;
1870                 }
1871
1872                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1873                         NL_SET_ERR_MSG(extack, "Invalid link address");
1874                         goto out;
1875                 }
1876         }
1877
1878         tbl = neigh_find_table(ndm->ndm_family);
1879         if (tbl == NULL)
1880                 return -EAFNOSUPPORT;
1881
1882         if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1883                 NL_SET_ERR_MSG(extack, "Invalid network address");
1884                 goto out;
1885         }
1886
1887         dst = nla_data(tb[NDA_DST]);
1888         lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1889
1890         if (tb[NDA_PROTOCOL])
1891                 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1892
1893         if (ndm->ndm_flags & NTF_PROXY) {
1894                 struct pneigh_entry *pn;
1895
1896                 err = -ENOBUFS;
1897                 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1898                 if (pn) {
1899                         pn->flags = ndm->ndm_flags;
1900                         if (protocol)
1901                                 pn->protocol = protocol;
1902                         err = 0;
1903                 }
1904                 goto out;
1905         }
1906
1907         if (!dev) {
1908                 NL_SET_ERR_MSG(extack, "Device not specified");
1909                 goto out;
1910         }
1911
1912         neigh = neigh_lookup(tbl, dst, dev);
1913         if (neigh == NULL) {
1914                 bool exempt_from_gc;
1915
1916                 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1917                         err = -ENOENT;
1918                         goto out;
1919                 }
1920
1921                 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
1922                                  ndm->ndm_flags & NTF_EXT_LEARNED;
1923                 neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
1924                 if (IS_ERR(neigh)) {
1925                         err = PTR_ERR(neigh);
1926                         goto out;
1927                 }
1928         } else {
1929                 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1930                         err = -EEXIST;
1931                         neigh_release(neigh);
1932                         goto out;
1933                 }
1934
1935                 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1936                         flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1937                                    NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1938         }
1939
1940         if (ndm->ndm_flags & NTF_EXT_LEARNED)
1941                 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1942
1943         if (ndm->ndm_flags & NTF_ROUTER)
1944                 flags |= NEIGH_UPDATE_F_ISROUTER;
1945
1946         if (ndm->ndm_flags & NTF_USE) {
1947                 neigh_event_send(neigh, NULL);
1948                 err = 0;
1949         } else
1950                 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1951                                      NETLINK_CB(skb).portid, extack);
1952
1953         if (protocol)
1954                 neigh->protocol = protocol;
1955
1956         neigh_release(neigh);
1957
1958 out:
1959         return err;
1960 }
1961
1962 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1963 {
1964         struct nlattr *nest;
1965
1966         nest = nla_nest_start(skb, NDTA_PARMS);
1967         if (nest == NULL)
1968                 return -ENOBUFS;
1969
1970         if ((parms->dev &&
1971              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1972             nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1973             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1974                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1975             /* approximative value for deprecated QUEUE_LEN (in packets) */
1976             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1977                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1978             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1979             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1980             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1981                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1982             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1983                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1984             nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1985                         NEIGH_VAR(parms, MCAST_REPROBES)) ||
1986             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1987                           NDTPA_PAD) ||
1988             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1989                           NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1990             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1991                           NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1992             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1993                           NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1994             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1995                           NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1996             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1997                           NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1998             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1999                           NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2000             nla_put_msecs(skb, NDTPA_LOCKTIME,
2001                           NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
2002                 goto nla_put_failure;
2003         return nla_nest_end(skb, nest);
2004
2005 nla_put_failure:
2006         nla_nest_cancel(skb, nest);
2007         return -EMSGSIZE;
2008 }
2009
2010 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2011                               u32 pid, u32 seq, int type, int flags)
2012 {
2013         struct nlmsghdr *nlh;
2014         struct ndtmsg *ndtmsg;
2015
2016         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2017         if (nlh == NULL)
2018                 return -EMSGSIZE;
2019
2020         ndtmsg = nlmsg_data(nlh);
2021
2022         read_lock_bh(&tbl->lock);
2023         ndtmsg->ndtm_family = tbl->family;
2024         ndtmsg->ndtm_pad1   = 0;
2025         ndtmsg->ndtm_pad2   = 0;
2026
2027         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2028             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
2029             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
2030             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
2031             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
2032                 goto nla_put_failure;
2033         {
2034                 unsigned long now = jiffies;
2035                 unsigned int flush_delta = now - tbl->last_flush;
2036                 unsigned int rand_delta = now - tbl->last_rand;
2037                 struct neigh_hash_table *nht;
2038                 struct ndt_config ndc = {
2039                         .ndtc_key_len           = tbl->key_len,
2040                         .ndtc_entry_size        = tbl->entry_size,
2041                         .ndtc_entries           = atomic_read(&tbl->entries),
2042                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
2043                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
2044                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
2045                 };
2046
2047                 rcu_read_lock_bh();
2048                 nht = rcu_dereference_bh(tbl->nht);
2049                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2050                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2051                 rcu_read_unlock_bh();
2052
2053                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2054                         goto nla_put_failure;
2055         }
2056
2057         {
2058                 int cpu;
2059                 struct ndt_stats ndst;
2060
2061                 memset(&ndst, 0, sizeof(ndst));
2062
2063                 for_each_possible_cpu(cpu) {
2064                         struct neigh_statistics *st;
2065
2066                         st = per_cpu_ptr(tbl->stats, cpu);
2067                         ndst.ndts_allocs                += st->allocs;
2068                         ndst.ndts_destroys              += st->destroys;
2069                         ndst.ndts_hash_grows            += st->hash_grows;
2070                         ndst.ndts_res_failed            += st->res_failed;
2071                         ndst.ndts_lookups               += st->lookups;
2072                         ndst.ndts_hits                  += st->hits;
2073                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
2074                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
2075                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
2076                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
2077                         ndst.ndts_table_fulls           += st->table_fulls;
2078                 }
2079
2080                 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2081                                   NDTA_PAD))
2082                         goto nla_put_failure;
2083         }
2084
2085         BUG_ON(tbl->parms.dev);
2086         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2087                 goto nla_put_failure;
2088
2089         read_unlock_bh(&tbl->lock);
2090         nlmsg_end(skb, nlh);
2091         return 0;
2092
2093 nla_put_failure:
2094         read_unlock_bh(&tbl->lock);
2095         nlmsg_cancel(skb, nlh);
2096         return -EMSGSIZE;
2097 }
2098
2099 static int neightbl_fill_param_info(struct sk_buff *skb,
2100                                     struct neigh_table *tbl,
2101                                     struct neigh_parms *parms,
2102                                     u32 pid, u32 seq, int type,
2103                                     unsigned int flags)
2104 {
2105         struct ndtmsg *ndtmsg;
2106         struct nlmsghdr *nlh;
2107
2108         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2109         if (nlh == NULL)
2110                 return -EMSGSIZE;
2111
2112         ndtmsg = nlmsg_data(nlh);
2113
2114         read_lock_bh(&tbl->lock);
2115         ndtmsg->ndtm_family = tbl->family;
2116         ndtmsg->ndtm_pad1   = 0;
2117         ndtmsg->ndtm_pad2   = 0;
2118
2119         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2120             neightbl_fill_parms(skb, parms) < 0)
2121                 goto errout;
2122
2123         read_unlock_bh(&tbl->lock);
2124         nlmsg_end(skb, nlh);
2125         return 0;
2126 errout:
2127         read_unlock_bh(&tbl->lock);
2128         nlmsg_cancel(skb, nlh);
2129         return -EMSGSIZE;
2130 }
2131
2132 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2133         [NDTA_NAME]             = { .type = NLA_STRING },
2134         [NDTA_THRESH1]          = { .type = NLA_U32 },
2135         [NDTA_THRESH2]          = { .type = NLA_U32 },
2136         [NDTA_THRESH3]          = { .type = NLA_U32 },
2137         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
2138         [NDTA_PARMS]            = { .type = NLA_NESTED },
2139 };
2140
2141 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2142         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
2143         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
2144         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
2145         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
2146         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
2147         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
2148         [NDTPA_MCAST_REPROBES]          = { .type = NLA_U32 },
2149         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
2150         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
2151         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
2152         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
2153         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
2154         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
2155         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
2156 };
2157
2158 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2159                         struct netlink_ext_ack *extack)
2160 {
2161         struct net *net = sock_net(skb->sk);
2162         struct neigh_table *tbl;
2163         struct ndtmsg *ndtmsg;
2164         struct nlattr *tb[NDTA_MAX+1];
2165         bool found = false;
2166         int err, tidx;
2167
2168         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2169                           nl_neightbl_policy, extack);
2170         if (err < 0)
2171                 goto errout;
2172
2173         if (tb[NDTA_NAME] == NULL) {
2174                 err = -EINVAL;
2175                 goto errout;
2176         }
2177
2178         ndtmsg = nlmsg_data(nlh);
2179
2180         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2181                 tbl = neigh_tables[tidx];
2182                 if (!tbl)
2183                         continue;
2184                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2185                         continue;
2186                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2187                         found = true;
2188                         break;
2189                 }
2190         }
2191
2192         if (!found)
2193                 return -ENOENT;
2194
2195         /*
2196          * We acquire tbl->lock to be nice to the periodic timers and
2197          * make sure they always see a consistent set of values.
2198          */
2199         write_lock_bh(&tbl->lock);
2200
2201         if (tb[NDTA_PARMS]) {
2202                 struct nlattr *tbp[NDTPA_MAX+1];
2203                 struct neigh_parms *p;
2204                 int i, ifindex = 0;
2205
2206                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2207                                        nl_ntbl_parm_policy, extack);
2208                 if (err < 0)
2209                         goto errout_tbl_lock;
2210
2211                 if (tbp[NDTPA_IFINDEX])
2212                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2213
2214                 p = lookup_neigh_parms(tbl, net, ifindex);
2215                 if (p == NULL) {
2216                         err = -ENOENT;
2217                         goto errout_tbl_lock;
2218                 }
2219
2220                 for (i = 1; i <= NDTPA_MAX; i++) {
2221                         if (tbp[i] == NULL)
2222                                 continue;
2223
2224                         switch (i) {
2225                         case NDTPA_QUEUE_LEN:
2226                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2227                                               nla_get_u32(tbp[i]) *
2228                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2229                                 break;
2230                         case NDTPA_QUEUE_LENBYTES:
2231                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2232                                               nla_get_u32(tbp[i]));
2233                                 break;
2234                         case NDTPA_PROXY_QLEN:
2235                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2236                                               nla_get_u32(tbp[i]));
2237                                 break;
2238                         case NDTPA_APP_PROBES:
2239                                 NEIGH_VAR_SET(p, APP_PROBES,
2240                                               nla_get_u32(tbp[i]));
2241                                 break;
2242                         case NDTPA_UCAST_PROBES:
2243                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2244                                               nla_get_u32(tbp[i]));
2245                                 break;
2246                         case NDTPA_MCAST_PROBES:
2247                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2248                                               nla_get_u32(tbp[i]));
2249                                 break;
2250                         case NDTPA_MCAST_REPROBES:
2251                                 NEIGH_VAR_SET(p, MCAST_REPROBES,
2252                                               nla_get_u32(tbp[i]));
2253                                 break;
2254                         case NDTPA_BASE_REACHABLE_TIME:
2255                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2256                                               nla_get_msecs(tbp[i]));
2257                                 /* update reachable_time as well, otherwise, the change will
2258                                  * only be effective after the next time neigh_periodic_work
2259                                  * decides to recompute it (can be multiple minutes)
2260                                  */
2261                                 p->reachable_time =
2262                                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2263                                 break;
2264                         case NDTPA_GC_STALETIME:
2265                                 NEIGH_VAR_SET(p, GC_STALETIME,
2266                                               nla_get_msecs(tbp[i]));
2267                                 break;
2268                         case NDTPA_DELAY_PROBE_TIME:
2269                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2270                                               nla_get_msecs(tbp[i]));
2271                                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2272                                 break;
2273                         case NDTPA_RETRANS_TIME:
2274                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2275                                               nla_get_msecs(tbp[i]));
2276                                 break;
2277                         case NDTPA_ANYCAST_DELAY:
2278                                 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2279                                               nla_get_msecs(tbp[i]));
2280                                 break;
2281                         case NDTPA_PROXY_DELAY:
2282                                 NEIGH_VAR_SET(p, PROXY_DELAY,
2283                                               nla_get_msecs(tbp[i]));
2284                                 break;
2285                         case NDTPA_LOCKTIME:
2286                                 NEIGH_VAR_SET(p, LOCKTIME,
2287                                               nla_get_msecs(tbp[i]));
2288                                 break;
2289                         }
2290                 }
2291         }
2292
2293         err = -ENOENT;
2294         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2295              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2296             !net_eq(net, &init_net))
2297                 goto errout_tbl_lock;
2298
2299         if (tb[NDTA_THRESH1])
2300                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2301
2302         if (tb[NDTA_THRESH2])
2303                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2304
2305         if (tb[NDTA_THRESH3])
2306                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2307
2308         if (tb[NDTA_GC_INTERVAL])
2309                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2310
2311         err = 0;
2312
2313 errout_tbl_lock:
2314         write_unlock_bh(&tbl->lock);
2315 errout:
2316         return err;
2317 }
2318
2319 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2320                                     struct netlink_ext_ack *extack)
2321 {
2322         struct ndtmsg *ndtm;
2323
2324         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2325                 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2326                 return -EINVAL;
2327         }
2328
2329         ndtm = nlmsg_data(nlh);
2330         if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2331                 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2332                 return -EINVAL;
2333         }
2334
2335         if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2336                 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2337                 return -EINVAL;
2338         }
2339
2340         return 0;
2341 }
2342
2343 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2344 {
2345         const struct nlmsghdr *nlh = cb->nlh;
2346         struct net *net = sock_net(skb->sk);
2347         int family, tidx, nidx = 0;
2348         int tbl_skip = cb->args[0];
2349         int neigh_skip = cb->args[1];
2350         struct neigh_table *tbl;
2351
2352         if (cb->strict_check) {
2353                 int err = neightbl_valid_dump_info(nlh, cb->extack);
2354
2355                 if (err < 0)
2356                         return err;
2357         }
2358
2359         family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2360
2361         for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2362                 struct neigh_parms *p;
2363
2364                 tbl = neigh_tables[tidx];
2365                 if (!tbl)
2366                         continue;
2367
2368                 if (tidx < tbl_skip || (family && tbl->family != family))
2369                         continue;
2370
2371                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2372                                        nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2373                                        NLM_F_MULTI) < 0)
2374                         break;
2375
2376                 nidx = 0;
2377                 p = list_next_entry(&tbl->parms, list);
2378                 list_for_each_entry_from(p, &tbl->parms_list, list) {
2379                         if (!net_eq(neigh_parms_net(p), net))
2380                                 continue;
2381
2382                         if (nidx < neigh_skip)
2383                                 goto next;
2384
2385                         if (neightbl_fill_param_info(skb, tbl, p,
2386                                                      NETLINK_CB(cb->skb).portid,
2387                                                      nlh->nlmsg_seq,
2388                                                      RTM_NEWNEIGHTBL,
2389                                                      NLM_F_MULTI) < 0)
2390                                 goto out;
2391                 next:
2392                         nidx++;
2393                 }
2394
2395                 neigh_skip = 0;
2396         }
2397 out:
2398         cb->args[0] = tidx;
2399         cb->args[1] = nidx;
2400
2401         return skb->len;
2402 }
2403
2404 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2405                            u32 pid, u32 seq, int type, unsigned int flags)
2406 {
2407         unsigned long now = jiffies;
2408         struct nda_cacheinfo ci;
2409         struct nlmsghdr *nlh;
2410         struct ndmsg *ndm;
2411
2412         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2413         if (nlh == NULL)
2414                 return -EMSGSIZE;
2415
2416         ndm = nlmsg_data(nlh);
2417         ndm->ndm_family  = neigh->ops->family;
2418         ndm->ndm_pad1    = 0;
2419         ndm->ndm_pad2    = 0;
2420         ndm->ndm_flags   = neigh->flags;
2421         ndm->ndm_type    = neigh->type;
2422         ndm->ndm_ifindex = neigh->dev->ifindex;
2423
2424         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2425                 goto nla_put_failure;
2426
2427         read_lock_bh(&neigh->lock);
2428         ndm->ndm_state   = neigh->nud_state;
2429         if (neigh->nud_state & NUD_VALID) {
2430                 char haddr[MAX_ADDR_LEN];
2431
2432                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2433                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2434                         read_unlock_bh(&neigh->lock);
2435                         goto nla_put_failure;
2436                 }
2437         }
2438
2439         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2440         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2441         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2442         ci.ndm_refcnt    = refcount_read(&neigh->refcnt) - 1;
2443         read_unlock_bh(&neigh->lock);
2444
2445         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2446             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2447                 goto nla_put_failure;
2448
2449         if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2450                 goto nla_put_failure;
2451
2452         nlmsg_end(skb, nlh);
2453         return 0;
2454
2455 nla_put_failure:
2456         nlmsg_cancel(skb, nlh);
2457         return -EMSGSIZE;
2458 }
2459
2460 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2461                             u32 pid, u32 seq, int type, unsigned int flags,
2462                             struct neigh_table *tbl)
2463 {
2464         struct nlmsghdr *nlh;
2465         struct ndmsg *ndm;
2466
2467         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2468         if (nlh == NULL)
2469                 return -EMSGSIZE;
2470
2471         ndm = nlmsg_data(nlh);
2472         ndm->ndm_family  = tbl->family;
2473         ndm->ndm_pad1    = 0;
2474         ndm->ndm_pad2    = 0;
2475         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2476         ndm->ndm_type    = RTN_UNICAST;
2477         ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2478         ndm->ndm_state   = NUD_NONE;
2479
2480         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2481                 goto nla_put_failure;
2482
2483         if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2484                 goto nla_put_failure;
2485
2486         nlmsg_end(skb, nlh);
2487         return 0;
2488
2489 nla_put_failure:
2490         nlmsg_cancel(skb, nlh);
2491         return -EMSGSIZE;
2492 }
2493
2494 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2495 {
2496         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2497         __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2498 }
2499
2500 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2501 {
2502         struct net_device *master;
2503
2504         if (!master_idx)
2505                 return false;
2506
2507         master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2508         if (!master || master->ifindex != master_idx)
2509                 return true;
2510
2511         return false;
2512 }
2513
2514 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2515 {
2516         if (filter_idx && (!dev || dev->ifindex != filter_idx))
2517                 return true;
2518
2519         return false;
2520 }
2521
2522 struct neigh_dump_filter {
2523         int master_idx;
2524         int dev_idx;
2525 };
2526
2527 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2528                             struct netlink_callback *cb,
2529                             struct neigh_dump_filter *filter)
2530 {
2531         struct net *net = sock_net(skb->sk);
2532         struct neighbour *n;
2533         int rc, h, s_h = cb->args[1];
2534         int idx, s_idx = idx = cb->args[2];
2535         struct neigh_hash_table *nht;
2536         unsigned int flags = NLM_F_MULTI;
2537
2538         if (filter->dev_idx || filter->master_idx)
2539                 flags |= NLM_F_DUMP_FILTERED;
2540
2541         rcu_read_lock_bh();
2542         nht = rcu_dereference_bh(tbl->nht);
2543
2544         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2545                 if (h > s_h)
2546                         s_idx = 0;
2547                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2548                      n != NULL;
2549                      n = rcu_dereference_bh(n->next)) {
2550                         if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2551                                 goto next;
2552                         if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2553                             neigh_master_filtered(n->dev, filter->master_idx))
2554                                 goto next;
2555                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2556                                             cb->nlh->nlmsg_seq,
2557                                             RTM_NEWNEIGH,
2558                                             flags) < 0) {
2559                                 rc = -1;
2560                                 goto out;
2561                         }
2562 next:
2563                         idx++;
2564                 }
2565         }
2566         rc = skb->len;
2567 out:
2568         rcu_read_unlock_bh();
2569         cb->args[1] = h;
2570         cb->args[2] = idx;
2571         return rc;
2572 }
2573
2574 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2575                              struct netlink_callback *cb,
2576                              struct neigh_dump_filter *filter)
2577 {
2578         struct pneigh_entry *n;
2579         struct net *net = sock_net(skb->sk);
2580         int rc, h, s_h = cb->args[3];
2581         int idx, s_idx = idx = cb->args[4];
2582         unsigned int flags = NLM_F_MULTI;
2583
2584         if (filter->dev_idx || filter->master_idx)
2585                 flags |= NLM_F_DUMP_FILTERED;
2586
2587         read_lock_bh(&tbl->lock);
2588
2589         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2590                 if (h > s_h)
2591                         s_idx = 0;
2592                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2593                         if (idx < s_idx || pneigh_net(n) != net)
2594                                 goto next;
2595                         if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2596                             neigh_master_filtered(n->dev, filter->master_idx))
2597                                 goto next;
2598                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2599                                             cb->nlh->nlmsg_seq,
2600                                             RTM_NEWNEIGH, flags, tbl) < 0) {
2601                                 read_unlock_bh(&tbl->lock);
2602                                 rc = -1;
2603                                 goto out;
2604                         }
2605                 next:
2606                         idx++;
2607                 }
2608         }
2609
2610         read_unlock_bh(&tbl->lock);
2611         rc = skb->len;
2612 out:
2613         cb->args[3] = h;
2614         cb->args[4] = idx;
2615         return rc;
2616
2617 }
2618
2619 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2620                                 bool strict_check,
2621                                 struct neigh_dump_filter *filter,
2622                                 struct netlink_ext_ack *extack)
2623 {
2624         struct nlattr *tb[NDA_MAX + 1];
2625         int err, i;
2626
2627         if (strict_check) {
2628                 struct ndmsg *ndm;
2629
2630                 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2631                         NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2632                         return -EINVAL;
2633                 }
2634
2635                 ndm = nlmsg_data(nlh);
2636                 if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2637                     ndm->ndm_state || ndm->ndm_type) {
2638                         NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2639                         return -EINVAL;
2640                 }
2641
2642                 if (ndm->ndm_flags & ~NTF_PROXY) {
2643                         NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2644                         return -EINVAL;
2645                 }
2646
2647                 err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2648                                          nda_policy, extack);
2649         } else {
2650                 err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2651                                   nda_policy, extack);
2652         }
2653         if (err < 0)
2654                 return err;
2655
2656         for (i = 0; i <= NDA_MAX; ++i) {
2657                 if (!tb[i])
2658                         continue;
2659
2660                 /* all new attributes should require strict_check */
2661                 switch (i) {
2662                 case NDA_IFINDEX:
2663                         filter->dev_idx = nla_get_u32(tb[i]);
2664                         break;
2665                 case NDA_MASTER:
2666                         filter->master_idx = nla_get_u32(tb[i]);
2667                         break;
2668                 default:
2669                         if (strict_check) {
2670                                 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2671                                 return -EINVAL;
2672                         }
2673                 }
2674         }
2675
2676         return 0;
2677 }
2678
2679 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2680 {
2681         const struct nlmsghdr *nlh = cb->nlh;
2682         struct neigh_dump_filter filter = {};
2683         struct neigh_table *tbl;
2684         int t, family, s_t;
2685         int proxy = 0;
2686         int err;
2687
2688         family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2689
2690         /* check for full ndmsg structure presence, family member is
2691          * the same for both structures
2692          */
2693         if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2694             ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2695                 proxy = 1;
2696
2697         err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2698         if (err < 0 && cb->strict_check)
2699                 return err;
2700
2701         s_t = cb->args[0];
2702
2703         for (t = 0; t < NEIGH_NR_TABLES; t++) {
2704                 tbl = neigh_tables[t];
2705
2706                 if (!tbl)
2707                         continue;
2708                 if (t < s_t || (family && tbl->family != family))
2709                         continue;
2710                 if (t > s_t)
2711                         memset(&cb->args[1], 0, sizeof(cb->args) -
2712                                                 sizeof(cb->args[0]));
2713                 if (proxy)
2714                         err = pneigh_dump_table(tbl, skb, cb, &filter);
2715                 else
2716                         err = neigh_dump_table(tbl, skb, cb, &filter);
2717                 if (err < 0)
2718                         break;
2719         }
2720
2721         cb->args[0] = t;
2722         return skb->len;
2723 }
2724
2725 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2726                                struct neigh_table **tbl,
2727                                void **dst, int *dev_idx, u8 *ndm_flags,
2728                                struct netlink_ext_ack *extack)
2729 {
2730         struct nlattr *tb[NDA_MAX + 1];
2731         struct ndmsg *ndm;
2732         int err, i;
2733
2734         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2735                 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2736                 return -EINVAL;
2737         }
2738
2739         ndm = nlmsg_data(nlh);
2740         if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2741             ndm->ndm_type) {
2742                 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2743                 return -EINVAL;
2744         }
2745
2746         if (ndm->ndm_flags & ~NTF_PROXY) {
2747                 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2748                 return -EINVAL;
2749         }
2750
2751         err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2752                                  nda_policy, extack);
2753         if (err < 0)
2754                 return err;
2755
2756         *ndm_flags = ndm->ndm_flags;
2757         *dev_idx = ndm->ndm_ifindex;
2758         *tbl = neigh_find_table(ndm->ndm_family);
2759         if (*tbl == NULL) {
2760                 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2761                 return -EAFNOSUPPORT;
2762         }
2763
2764         for (i = 0; i <= NDA_MAX; ++i) {
2765                 if (!tb[i])
2766                         continue;
2767
2768                 switch (i) {
2769                 case NDA_DST:
2770                         if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2771                                 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2772                                 return -EINVAL;
2773                         }
2774                         *dst = nla_data(tb[i]);
2775                         break;
2776                 default:
2777                         NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2778                         return -EINVAL;
2779                 }
2780         }
2781
2782         return 0;
2783 }
2784
2785 static inline size_t neigh_nlmsg_size(void)
2786 {
2787         return NLMSG_ALIGN(sizeof(struct ndmsg))
2788                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2789                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2790                + nla_total_size(sizeof(struct nda_cacheinfo))
2791                + nla_total_size(4)  /* NDA_PROBES */
2792                + nla_total_size(1); /* NDA_PROTOCOL */
2793 }
2794
2795 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2796                            u32 pid, u32 seq)
2797 {
2798         struct sk_buff *skb;
2799         int err = 0;
2800
2801         skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2802         if (!skb)
2803                 return -ENOBUFS;
2804
2805         err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2806         if (err) {
2807                 kfree_skb(skb);
2808                 goto errout;
2809         }
2810
2811         err = rtnl_unicast(skb, net, pid);
2812 errout:
2813         return err;
2814 }
2815
2816 static inline size_t pneigh_nlmsg_size(void)
2817 {
2818         return NLMSG_ALIGN(sizeof(struct ndmsg))
2819                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2820                + nla_total_size(1); /* NDA_PROTOCOL */
2821 }
2822
2823 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2824                             u32 pid, u32 seq, struct neigh_table *tbl)
2825 {
2826         struct sk_buff *skb;
2827         int err = 0;
2828
2829         skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2830         if (!skb)
2831                 return -ENOBUFS;
2832
2833         err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2834         if (err) {
2835                 kfree_skb(skb);
2836                 goto errout;
2837         }
2838
2839         err = rtnl_unicast(skb, net, pid);
2840 errout:
2841         return err;
2842 }
2843
2844 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2845                      struct netlink_ext_ack *extack)
2846 {
2847         struct net *net = sock_net(in_skb->sk);
2848         struct net_device *dev = NULL;
2849         struct neigh_table *tbl = NULL;
2850         struct neighbour *neigh;
2851         void *dst = NULL;
2852         u8 ndm_flags = 0;
2853         int dev_idx = 0;
2854         int err;
2855
2856         err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2857                                   extack);
2858         if (err < 0)
2859                 return err;
2860
2861         if (dev_idx) {
2862                 dev = __dev_get_by_index(net, dev_idx);
2863                 if (!dev) {
2864                         NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2865                         return -ENODEV;
2866                 }
2867         }
2868
2869         if (!dst) {
2870                 NL_SET_ERR_MSG(extack, "Network address not specified");
2871                 return -EINVAL;
2872         }
2873
2874         if (ndm_flags & NTF_PROXY) {
2875                 struct pneigh_entry *pn;
2876
2877                 pn = pneigh_lookup(tbl, net, dst, dev, 0);
2878                 if (!pn) {
2879                         NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
2880                         return -ENOENT;
2881                 }
2882                 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
2883                                         nlh->nlmsg_seq, tbl);
2884         }
2885
2886         if (!dev) {
2887                 NL_SET_ERR_MSG(extack, "No device specified");
2888                 return -EINVAL;
2889         }
2890
2891         neigh = neigh_lookup(tbl, dst, dev);
2892         if (!neigh) {
2893                 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
2894                 return -ENOENT;
2895         }
2896
2897         err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
2898                               nlh->nlmsg_seq);
2899
2900         neigh_release(neigh);
2901
2902         return err;
2903 }
2904
2905 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2906 {
2907         int chain;
2908         struct neigh_hash_table *nht;
2909
2910         rcu_read_lock_bh();
2911         nht = rcu_dereference_bh(tbl->nht);
2912
2913         read_lock(&tbl->lock); /* avoid resizes */
2914         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2915                 struct neighbour *n;
2916
2917                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2918                      n != NULL;
2919                      n = rcu_dereference_bh(n->next))
2920                         cb(n, cookie);
2921         }
2922         read_unlock(&tbl->lock);
2923         rcu_read_unlock_bh();
2924 }
2925 EXPORT_SYMBOL(neigh_for_each);
2926
2927 /* The tbl->lock must be held as a writer and BH disabled. */
2928 void __neigh_for_each_release(struct neigh_table *tbl,
2929                               int (*cb)(struct neighbour *))
2930 {
2931         int chain;
2932         struct neigh_hash_table *nht;
2933
2934         nht = rcu_dereference_protected(tbl->nht,
2935                                         lockdep_is_held(&tbl->lock));
2936         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2937                 struct neighbour *n;
2938                 struct neighbour __rcu **np;
2939
2940                 np = &nht->hash_buckets[chain];
2941                 while ((n = rcu_dereference_protected(*np,
2942                                         lockdep_is_held(&tbl->lock))) != NULL) {
2943                         int release;
2944
2945                         write_lock(&n->lock);
2946                         release = cb(n);
2947                         if (release) {
2948                                 rcu_assign_pointer(*np,
2949                                         rcu_dereference_protected(n->next,
2950                                                 lockdep_is_held(&tbl->lock)));
2951                                 neigh_mark_dead(n);
2952                         } else
2953                                 np = &n->next;
2954                         write_unlock(&n->lock);
2955                         if (release)
2956                                 neigh_cleanup_and_release(n);
2957                 }
2958         }
2959 }
2960 EXPORT_SYMBOL(__neigh_for_each_release);
2961
2962 int neigh_xmit(int index, struct net_device *dev,
2963                const void *addr, struct sk_buff *skb)
2964 {
2965         int err = -EAFNOSUPPORT;
2966         if (likely(index < NEIGH_NR_TABLES)) {
2967                 struct neigh_table *tbl;
2968                 struct neighbour *neigh;
2969
2970                 tbl = neigh_tables[index];
2971                 if (!tbl)
2972                         goto out;
2973                 rcu_read_lock_bh();
2974                 neigh = __neigh_lookup_noref(tbl, addr, dev);
2975                 if (!neigh)
2976                         neigh = __neigh_create(tbl, addr, dev, false);
2977                 err = PTR_ERR(neigh);
2978                 if (IS_ERR(neigh)) {
2979                         rcu_read_unlock_bh();
2980                         goto out_kfree_skb;
2981                 }
2982                 err = neigh->output(neigh, skb);
2983                 rcu_read_unlock_bh();
2984         }
2985         else if (index == NEIGH_LINK_TABLE) {
2986                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2987                                       addr, NULL, skb->len);
2988                 if (err < 0)
2989                         goto out_kfree_skb;
2990                 err = dev_queue_xmit(skb);
2991         }
2992 out:
2993         return err;
2994 out_kfree_skb:
2995         kfree_skb(skb);
2996         goto out;
2997 }
2998 EXPORT_SYMBOL(neigh_xmit);
2999
3000 #ifdef CONFIG_PROC_FS
3001
3002 static struct neighbour *neigh_get_first(struct seq_file *seq)
3003 {
3004         struct neigh_seq_state *state = seq->private;
3005         struct net *net = seq_file_net(seq);
3006         struct neigh_hash_table *nht = state->nht;
3007         struct neighbour *n = NULL;
3008         int bucket = state->bucket;
3009
3010         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3011         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3012                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
3013
3014                 while (n) {
3015                         if (!net_eq(dev_net(n->dev), net))
3016                                 goto next;
3017                         if (state->neigh_sub_iter) {
3018                                 loff_t fakep = 0;
3019                                 void *v;
3020
3021                                 v = state->neigh_sub_iter(state, n, &fakep);
3022                                 if (!v)
3023                                         goto next;
3024                         }
3025                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3026                                 break;
3027                         if (n->nud_state & ~NUD_NOARP)
3028                                 break;
3029 next:
3030                         n = rcu_dereference_bh(n->next);
3031                 }
3032
3033                 if (n)
3034                         break;
3035         }
3036         state->bucket = bucket;
3037
3038         return n;
3039 }
3040
3041 static struct neighbour *neigh_get_next(struct seq_file *seq,
3042                                         struct neighbour *n,
3043                                         loff_t *pos)
3044 {
3045         struct neigh_seq_state *state = seq->private;
3046         struct net *net = seq_file_net(seq);
3047         struct neigh_hash_table *nht = state->nht;
3048
3049         if (state->neigh_sub_iter) {
3050                 void *v = state->neigh_sub_iter(state, n, pos);
3051                 if (v)
3052                         return n;
3053         }
3054         n = rcu_dereference_bh(n->next);
3055
3056         while (1) {
3057                 while (n) {
3058                         if (!net_eq(dev_net(n->dev), net))
3059                                 goto next;
3060                         if (state->neigh_sub_iter) {
3061                                 void *v = state->neigh_sub_iter(state, n, pos);
3062                                 if (v)
3063                                         return n;
3064                                 goto next;
3065                         }
3066                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3067                                 break;
3068
3069                         if (n->nud_state & ~NUD_NOARP)
3070                                 break;
3071 next:
3072                         n = rcu_dereference_bh(n->next);
3073                 }
3074
3075                 if (n)
3076                         break;
3077
3078                 if (++state->bucket >= (1 << nht->hash_shift))
3079                         break;
3080
3081                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
3082         }
3083
3084         if (n && pos)
3085                 --(*pos);
3086         return n;
3087 }
3088
3089 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3090 {
3091         struct neighbour *n = neigh_get_first(seq);
3092
3093         if (n) {
3094                 --(*pos);
3095                 while (*pos) {
3096                         n = neigh_get_next(seq, n, pos);
3097                         if (!n)
3098                                 break;
3099                 }
3100         }
3101         return *pos ? NULL : n;
3102 }
3103
3104 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3105 {
3106         struct neigh_seq_state *state = seq->private;
3107         struct net *net = seq_file_net(seq);
3108         struct neigh_table *tbl = state->tbl;
3109         struct pneigh_entry *pn = NULL;
3110         int bucket = state->bucket;
3111
3112         state->flags |= NEIGH_SEQ_IS_PNEIGH;
3113         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3114                 pn = tbl->phash_buckets[bucket];
3115                 while (pn && !net_eq(pneigh_net(pn), net))
3116                         pn = pn->next;
3117                 if (pn)
3118                         break;
3119         }
3120         state->bucket = bucket;
3121
3122         return pn;
3123 }
3124
3125 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3126                                             struct pneigh_entry *pn,
3127                                             loff_t *pos)
3128 {
3129         struct neigh_seq_state *state = seq->private;
3130         struct net *net = seq_file_net(seq);
3131         struct neigh_table *tbl = state->tbl;
3132
3133         do {
3134                 pn = pn->next;
3135         } while (pn && !net_eq(pneigh_net(pn), net));
3136
3137         while (!pn) {
3138                 if (++state->bucket > PNEIGH_HASHMASK)
3139                         break;
3140                 pn = tbl->phash_buckets[state->bucket];
3141                 while (pn && !net_eq(pneigh_net(pn), net))
3142                         pn = pn->next;
3143                 if (pn)
3144                         break;
3145         }
3146
3147         if (pn && pos)
3148                 --(*pos);
3149
3150         return pn;
3151 }
3152
3153 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3154 {
3155         struct pneigh_entry *pn = pneigh_get_first(seq);
3156
3157         if (pn) {
3158                 --(*pos);
3159                 while (*pos) {
3160                         pn = pneigh_get_next(seq, pn, pos);
3161                         if (!pn)
3162                                 break;
3163                 }
3164         }
3165         return *pos ? NULL : pn;
3166 }
3167
3168 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3169 {
3170         struct neigh_seq_state *state = seq->private;
3171         void *rc;
3172         loff_t idxpos = *pos;
3173
3174         rc = neigh_get_idx(seq, &idxpos);
3175         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3176                 rc = pneigh_get_idx(seq, &idxpos);
3177
3178         return rc;
3179 }
3180
3181 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3182         __acquires(rcu_bh)
3183 {
3184         struct neigh_seq_state *state = seq->private;
3185
3186         state->tbl = tbl;
3187         state->bucket = 0;
3188         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3189
3190         rcu_read_lock_bh();
3191         state->nht = rcu_dereference_bh(tbl->nht);
3192
3193         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3194 }
3195 EXPORT_SYMBOL(neigh_seq_start);
3196
3197 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3198 {
3199         struct neigh_seq_state *state;
3200         void *rc;
3201
3202         if (v == SEQ_START_TOKEN) {
3203                 rc = neigh_get_first(seq);
3204                 goto out;
3205         }
3206
3207         state = seq->private;
3208         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3209                 rc = neigh_get_next(seq, v, NULL);
3210                 if (rc)
3211                         goto out;
3212                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3213                         rc = pneigh_get_first(seq);
3214         } else {
3215                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3216                 rc = pneigh_get_next(seq, v, NULL);
3217         }
3218 out:
3219         ++(*pos);
3220         return rc;
3221 }
3222 EXPORT_SYMBOL(neigh_seq_next);
3223
3224 void neigh_seq_stop(struct seq_file *seq, void *v)
3225         __releases(rcu_bh)
3226 {
3227         rcu_read_unlock_bh();
3228 }
3229 EXPORT_SYMBOL(neigh_seq_stop);
3230
3231 /* statistics via seq_file */
3232
3233 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3234 {
3235         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3236         int cpu;
3237
3238         if (*pos == 0)
3239                 return SEQ_START_TOKEN;
3240
3241         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3242                 if (!cpu_possible(cpu))
3243                         continue;
3244                 *pos = cpu+1;
3245                 return per_cpu_ptr(tbl->stats, cpu);
3246         }
3247         return NULL;
3248 }
3249
3250 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3251 {
3252         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3253         int cpu;
3254
3255         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3256                 if (!cpu_possible(cpu))
3257                         continue;
3258                 *pos = cpu+1;
3259                 return per_cpu_ptr(tbl->stats, cpu);
3260         }
3261         return NULL;
3262 }
3263
3264 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3265 {
3266
3267 }
3268
3269 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3270 {
3271         struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
3272         struct neigh_statistics *st = v;
3273
3274         if (v == SEQ_START_TOKEN) {
3275                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3276                 return 0;
3277         }
3278
3279         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
3280                         "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
3281                    atomic_read(&tbl->entries),
3282
3283                    st->allocs,
3284                    st->destroys,
3285                    st->hash_grows,
3286
3287                    st->lookups,
3288                    st->hits,
3289
3290                    st->res_failed,
3291
3292                    st->rcv_probes_mcast,
3293                    st->rcv_probes_ucast,
3294
3295                    st->periodic_gc_runs,
3296                    st->forced_gc_runs,
3297                    st->unres_discards,
3298                    st->table_fulls
3299                    );
3300
3301         return 0;
3302 }
3303
3304 static const struct seq_operations neigh_stat_seq_ops = {
3305         .start  = neigh_stat_seq_start,
3306         .next   = neigh_stat_seq_next,
3307         .stop   = neigh_stat_seq_stop,
3308         .show   = neigh_stat_seq_show,
3309 };
3310 #endif /* CONFIG_PROC_FS */
3311
3312 static void __neigh_notify(struct neighbour *n, int type, int flags,
3313                            u32 pid)
3314 {
3315         struct net *net = dev_net(n->dev);
3316         struct sk_buff *skb;
3317         int err = -ENOBUFS;
3318
3319         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3320         if (skb == NULL)
3321                 goto errout;
3322
3323         err = neigh_fill_info(skb, n, pid, 0, type, flags);
3324         if (err < 0) {
3325                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3326                 WARN_ON(err == -EMSGSIZE);
3327                 kfree_skb(skb);
3328                 goto errout;
3329         }
3330         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3331         return;
3332 errout:
3333         if (err < 0)
3334                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3335 }
3336
3337 void neigh_app_ns(struct neighbour *n)
3338 {
3339         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3340 }
3341 EXPORT_SYMBOL(neigh_app_ns);
3342
3343 #ifdef CONFIG_SYSCTL
3344 static int zero;
3345 static int int_max = INT_MAX;
3346 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3347
3348 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3349                            void __user *buffer, size_t *lenp, loff_t *ppos)
3350 {
3351         int size, ret;
3352         struct ctl_table tmp = *ctl;
3353
3354         tmp.extra1 = &zero;
3355         tmp.extra2 = &unres_qlen_max;
3356         tmp.data = &size;
3357
3358         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3359         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3360
3361         if (write && !ret)
3362                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3363         return ret;
3364 }
3365
3366 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3367                                                    int family)
3368 {
3369         switch (family) {
3370         case AF_INET:
3371                 return __in_dev_arp_parms_get_rcu(dev);
3372         case AF_INET6:
3373                 return __in6_dev_nd_parms_get_rcu(dev);
3374         }
3375         return NULL;
3376 }
3377
3378 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3379                                   int index)
3380 {
3381         struct net_device *dev;
3382         int family = neigh_parms_family(p);
3383
3384         rcu_read_lock();
3385         for_each_netdev_rcu(net, dev) {
3386                 struct neigh_parms *dst_p =
3387                                 neigh_get_dev_parms_rcu(dev, family);
3388
3389                 if (dst_p && !test_bit(index, dst_p->data_state))
3390                         dst_p->data[index] = p->data[index];
3391         }
3392         rcu_read_unlock();
3393 }
3394
3395 static void neigh_proc_update(struct ctl_table *ctl, int write)
3396 {
3397         struct net_device *dev = ctl->extra1;
3398         struct neigh_parms *p = ctl->extra2;
3399         struct net *net = neigh_parms_net(p);
3400         int index = (int *) ctl->data - p->data;
3401
3402         if (!write)
3403                 return;
3404
3405         set_bit(index, p->data_state);
3406         if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3407                 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3408         if (!dev) /* NULL dev means this is default value */
3409                 neigh_copy_dflt_parms(net, p, index);
3410 }
3411
3412 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3413                                            void __user *buffer,
3414                                            size_t *lenp, loff_t *ppos)
3415 {
3416         struct ctl_table tmp = *ctl;
3417         int ret;
3418
3419         tmp.extra1 = &zero;
3420         tmp.extra2 = &int_max;
3421
3422         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3423         neigh_proc_update(ctl, write);
3424         return ret;
3425 }
3426
3427 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3428                         void __user *buffer, size_t *lenp, loff_t *ppos)
3429 {
3430         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3431
3432         neigh_proc_update(ctl, write);
3433         return ret;
3434 }
3435 EXPORT_SYMBOL(neigh_proc_dointvec);
3436
3437 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3438                                 void __user *buffer,
3439                                 size_t *lenp, loff_t *ppos)
3440 {
3441         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3442
3443         neigh_proc_update(ctl, write);
3444         return ret;
3445 }
3446 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3447
3448 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3449                                               void __user *buffer,
3450                                               size_t *lenp, loff_t *ppos)
3451 {
3452         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3453
3454         neigh_proc_update(ctl, write);
3455         return ret;
3456 }
3457
3458 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3459                                    void __user *buffer,
3460                                    size_t *lenp, loff_t *ppos)
3461 {
3462         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3463
3464         neigh_proc_update(ctl, write);
3465         return ret;
3466 }
3467 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3468
3469 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3470                                           void __user *buffer,
3471                                           size_t *lenp, loff_t *ppos)
3472 {
3473         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3474
3475         neigh_proc_update(ctl, write);
3476         return ret;
3477 }
3478
3479 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3480                                           void __user *buffer,
3481                                           size_t *lenp, loff_t *ppos)
3482 {
3483         struct neigh_parms *p = ctl->extra2;
3484         int ret;
3485
3486         if (strcmp(ctl->procname, "base_reachable_time") == 0)
3487                 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3488         else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3489                 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3490         else
3491                 ret = -1;
3492
3493         if (write && ret == 0) {
3494                 /* update reachable_time as well, otherwise, the change will
3495                  * only be effective after the next time neigh_periodic_work
3496                  * decides to recompute it
3497                  */
3498                 p->reachable_time =
3499                         neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3500         }
3501         return ret;
3502 }
3503
3504 #define NEIGH_PARMS_DATA_OFFSET(index)  \
3505         (&((struct neigh_parms *) 0)->data[index])
3506
3507 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3508         [NEIGH_VAR_ ## attr] = { \
3509                 .procname       = name, \
3510                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3511                 .maxlen         = sizeof(int), \
3512                 .mode           = mval, \
3513                 .proc_handler   = proc, \
3514         }
3515
3516 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3517         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3518
3519 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3520         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3521
3522 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3523         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3524
3525 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3526         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3527
3528 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3529         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3530
3531 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3532         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3533
3534 static struct neigh_sysctl_table {
3535         struct ctl_table_header *sysctl_header;
3536         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3537 } neigh_sysctl_template __read_mostly = {
3538         .neigh_vars = {
3539                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3540                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3541                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3542                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3543                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3544                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3545                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3546                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3547                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3548                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3549                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3550                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3551                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3552                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3553                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3554                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3555                 [NEIGH_VAR_GC_INTERVAL] = {
3556                         .procname       = "gc_interval",
3557                         .maxlen         = sizeof(int),
3558                         .mode           = 0644,
3559                         .proc_handler   = proc_dointvec_jiffies,
3560                 },
3561                 [NEIGH_VAR_GC_THRESH1] = {
3562                         .procname       = "gc_thresh1",
3563                         .maxlen         = sizeof(int),
3564                         .mode           = 0644,
3565                         .extra1         = &zero,
3566                         .extra2         = &int_max,
3567                         .proc_handler   = proc_dointvec_minmax,
3568                 },
3569                 [NEIGH_VAR_GC_THRESH2] = {
3570                         .procname       = "gc_thresh2",
3571                         .maxlen         = sizeof(int),
3572                         .mode           = 0644,
3573                         .extra1         = &zero,
3574                         .extra2         = &int_max,
3575                         .proc_handler   = proc_dointvec_minmax,
3576                 },
3577                 [NEIGH_VAR_GC_THRESH3] = {
3578                         .procname       = "gc_thresh3",
3579                         .maxlen         = sizeof(int),
3580                         .mode           = 0644,
3581                         .extra1         = &zero,
3582                         .extra2         = &int_max,
3583                         .proc_handler   = proc_dointvec_minmax,
3584                 },
3585                 {},
3586         },
3587 };
3588
3589 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3590                           proc_handler *handler)
3591 {
3592         int i;
3593         struct neigh_sysctl_table *t;
3594         const char *dev_name_source;
3595         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3596         char *p_name;
3597
3598         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3599         if (!t)
3600                 goto err;
3601
3602         for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3603                 t->neigh_vars[i].data += (long) p;
3604                 t->neigh_vars[i].extra1 = dev;
3605                 t->neigh_vars[i].extra2 = p;
3606         }
3607
3608         if (dev) {
3609                 dev_name_source = dev->name;
3610                 /* Terminate the table early */
3611                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3612                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3613         } else {
3614                 struct neigh_table *tbl = p->tbl;
3615                 dev_name_source = "default";
3616                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3617                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3618                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3619                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3620         }
3621
3622         if (handler) {
3623                 /* RetransTime */
3624                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3625                 /* ReachableTime */
3626                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3627                 /* RetransTime (in milliseconds)*/
3628                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3629                 /* ReachableTime (in milliseconds) */
3630                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3631         } else {
3632                 /* Those handlers will update p->reachable_time after
3633                  * base_reachable_time(_ms) is set to ensure the new timer starts being
3634                  * applied after the next neighbour update instead of waiting for
3635                  * neigh_periodic_work to update its value (can be multiple minutes)
3636                  * So any handler that replaces them should do this as well
3637                  */
3638                 /* ReachableTime */
3639                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3640                         neigh_proc_base_reachable_time;
3641                 /* ReachableTime (in milliseconds) */
3642                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3643                         neigh_proc_base_reachable_time;
3644         }
3645
3646         /* Don't export sysctls to unprivileged users */
3647         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3648                 t->neigh_vars[0].procname = NULL;
3649
3650         switch (neigh_parms_family(p)) {
3651         case AF_INET:
3652               p_name = "ipv4";
3653               break;
3654         case AF_INET6:
3655               p_name = "ipv6";
3656               break;
3657         default:
3658               BUG();
3659         }
3660
3661         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3662                 p_name, dev_name_source);
3663         t->sysctl_header =
3664                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3665         if (!t->sysctl_header)
3666                 goto free;
3667
3668         p->sysctl_table = t;
3669         return 0;
3670
3671 free:
3672         kfree(t);
3673 err:
3674         return -ENOBUFS;
3675 }
3676 EXPORT_SYMBOL(neigh_sysctl_register);
3677
3678 void neigh_sysctl_unregister(struct neigh_parms *p)
3679 {
3680         if (p->sysctl_table) {
3681                 struct neigh_sysctl_table *t = p->sysctl_table;
3682                 p->sysctl_table = NULL;
3683                 unregister_net_sysctl_table(t->sysctl_header);
3684                 kfree(t);
3685         }
3686 }
3687 EXPORT_SYMBOL(neigh_sysctl_unregister);
3688
3689 #endif  /* CONFIG_SYSCTL */
3690
3691 static int __init neigh_init(void)
3692 {
3693         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3694         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3695         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
3696
3697         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3698                       0);
3699         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3700
3701         return 0;
3702 }
3703
3704 subsys_initcall(neigh_init);