]> rtime.felk.cvut.cz Git - linux-imx.git/blob - net/core/neighbour.c
Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty...
[linux-imx.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 __skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         __skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         write_lock_bh(&neigh->lock);
712         __skb_queue_purge(&neigh->arp_queue);
713         write_unlock_bh(&neigh->lock);
714         neigh->arp_queue_len_bytes = 0;
715
716         if (dev->netdev_ops->ndo_neigh_destroy)
717                 dev->netdev_ops->ndo_neigh_destroy(neigh);
718
719         dev_put(dev);
720         neigh_parms_put(neigh->parms);
721
722         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723
724         atomic_dec(&neigh->tbl->entries);
725         kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728
729 /* Neighbour state is suspicious;
730    disable fast path.
731
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736         neigh_dbg(2, "neigh %p is suspected\n", neigh);
737
738         neigh->output = neigh->ops->output;
739 }
740
741 /* Neighbour state is OK;
742    enable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748         neigh_dbg(2, "neigh %p is connected\n", neigh);
749
750         neigh->output = neigh->ops->connected_output;
751 }
752
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756         struct neighbour *n;
757         struct neighbour __rcu **np;
758         unsigned int i;
759         struct neigh_hash_table *nht;
760
761         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762
763         write_lock_bh(&tbl->lock);
764         nht = rcu_dereference_protected(tbl->nht,
765                                         lockdep_is_held(&tbl->lock));
766
767         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768                 goto out;
769
770         /*
771          *      periodically recompute ReachableTime from random function
772          */
773
774         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775                 struct neigh_parms *p;
776                 tbl->last_rand = jiffies;
777                 for (p = &tbl->parms; p; p = p->next)
778                         p->reachable_time =
779                                 neigh_rand_reach_time(p->base_reachable_time);
780         }
781
782         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783                 np = &nht->hash_buckets[i];
784
785                 while ((n = rcu_dereference_protected(*np,
786                                 lockdep_is_held(&tbl->lock))) != NULL) {
787                         unsigned int state;
788
789                         write_lock(&n->lock);
790
791                         state = n->nud_state;
792                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793                                 write_unlock(&n->lock);
794                                 goto next_elt;
795                         }
796
797                         if (time_before(n->used, n->confirmed))
798                                 n->used = n->confirmed;
799
800                         if (atomic_read(&n->refcnt) == 1 &&
801                             (state == NUD_FAILED ||
802                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
803                                 *np = n->next;
804                                 n->dead = 1;
805                                 write_unlock(&n->lock);
806                                 neigh_cleanup_and_release(n);
807                                 continue;
808                         }
809                         write_unlock(&n->lock);
810
811 next_elt:
812                         np = &n->next;
813                 }
814                 /*
815                  * It's fine to release lock here, even if hash table
816                  * grows while we are preempted.
817                  */
818                 write_unlock_bh(&tbl->lock);
819                 cond_resched();
820                 write_lock_bh(&tbl->lock);
821                 nht = rcu_dereference_protected(tbl->nht,
822                                                 lockdep_is_held(&tbl->lock));
823         }
824 out:
825         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
826          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827          * base_reachable_time.
828          */
829         schedule_delayed_work(&tbl->gc_work,
830                               tbl->parms.base_reachable_time >> 1);
831         write_unlock_bh(&tbl->lock);
832 }
833
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836         struct neigh_parms *p = n->parms;
837         return (n->nud_state & NUD_PROBE) ?
838                 p->ucast_probes :
839                 p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841
842 static void neigh_invalidate(struct neighbour *neigh)
843         __releases(neigh->lock)
844         __acquires(neigh->lock)
845 {
846         struct sk_buff *skb;
847
848         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849         neigh_dbg(2, "neigh %p is failed\n", neigh);
850         neigh->updated = jiffies;
851
852         /* It is very thin place. report_unreachable is very complicated
853            routine. Particularly, it can hit the same neighbour entry!
854
855            So that, we try to be accurate and avoid dead loop. --ANK
856          */
857         while (neigh->nud_state == NUD_FAILED &&
858                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859                 write_unlock(&neigh->lock);
860                 neigh->ops->error_report(neigh, skb);
861                 write_lock(&neigh->lock);
862         }
863         __skb_queue_purge(&neigh->arp_queue);
864         neigh->arp_queue_len_bytes = 0;
865 }
866
867 static void neigh_probe(struct neighbour *neigh)
868         __releases(neigh->lock)
869 {
870         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
871         /* keep skb alive even if arp_queue overflows */
872         if (skb)
873                 skb = skb_copy(skb, GFP_ATOMIC);
874         write_unlock(&neigh->lock);
875         neigh->ops->solicit(neigh, skb);
876         atomic_inc(&neigh->probes);
877         kfree_skb(skb);
878 }
879
880 /* Called when a timer expires for a neighbour entry. */
881
882 static void neigh_timer_handler(unsigned long arg)
883 {
884         unsigned long now, next;
885         struct neighbour *neigh = (struct neighbour *)arg;
886         unsigned int state;
887         int notify = 0;
888
889         write_lock(&neigh->lock);
890
891         state = neigh->nud_state;
892         now = jiffies;
893         next = now + HZ;
894
895         if (!(state & NUD_IN_TIMER))
896                 goto out;
897
898         if (state & NUD_REACHABLE) {
899                 if (time_before_eq(now,
900                                    neigh->confirmed + neigh->parms->reachable_time)) {
901                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else if (time_before_eq(now,
904                                           neigh->used + neigh->parms->delay_probe_time)) {
905                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
906                         neigh->nud_state = NUD_DELAY;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         next = now + neigh->parms->delay_probe_time;
910                 } else {
911                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
912                         neigh->nud_state = NUD_STALE;
913                         neigh->updated = jiffies;
914                         neigh_suspect(neigh);
915                         notify = 1;
916                 }
917         } else if (state & NUD_DELAY) {
918                 if (time_before_eq(now,
919                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
920                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921                         neigh->nud_state = NUD_REACHABLE;
922                         neigh->updated = jiffies;
923                         neigh_connect(neigh);
924                         notify = 1;
925                         next = neigh->confirmed + neigh->parms->reachable_time;
926                 } else {
927                         neigh_dbg(2, "neigh %p is probed\n", neigh);
928                         neigh->nud_state = NUD_PROBE;
929                         neigh->updated = jiffies;
930                         atomic_set(&neigh->probes, 0);
931                         next = now + neigh->parms->retrans_time;
932                 }
933         } else {
934                 /* NUD_PROBE|NUD_INCOMPLETE */
935                 next = now + neigh->parms->retrans_time;
936         }
937
938         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940                 neigh->nud_state = NUD_FAILED;
941                 notify = 1;
942                 neigh_invalidate(neigh);
943         }
944
945         if (neigh->nud_state & NUD_IN_TIMER) {
946                 if (time_before(next, jiffies + HZ/2))
947                         next = jiffies + HZ/2;
948                 if (!mod_timer(&neigh->timer, next))
949                         neigh_hold(neigh);
950         }
951         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952                 neigh_probe(neigh);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         bool immediate_probe = false;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977                         unsigned long next, now = jiffies;
978
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = now;
982                         next = now + max(neigh->parms->retrans_time, HZ/2);
983                         neigh_add_timer(neigh, next);
984                         immediate_probe = true;
985                 } else {
986                         neigh->nud_state = NUD_FAILED;
987                         neigh->updated = jiffies;
988                         write_unlock_bh(&neigh->lock);
989
990                         kfree_skb(skb);
991                         return 1;
992                 }
993         } else if (neigh->nud_state & NUD_STALE) {
994                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995                 neigh->nud_state = NUD_DELAY;
996                 neigh->updated = jiffies;
997                 neigh_add_timer(neigh,
998                                 jiffies + neigh->parms->delay_probe_time);
999         }
1000
1001         if (neigh->nud_state == NUD_INCOMPLETE) {
1002                 if (skb) {
1003                         while (neigh->arp_queue_len_bytes + skb->truesize >
1004                                neigh->parms->queue_len_bytes) {
1005                                 struct sk_buff *buff;
1006
1007                                 buff = __skb_dequeue(&neigh->arp_queue);
1008                                 if (!buff)
1009                                         break;
1010                                 neigh->arp_queue_len_bytes -= buff->truesize;
1011                                 kfree_skb(buff);
1012                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013                         }
1014                         skb_dst_force(skb);
1015                         __skb_queue_tail(&neigh->arp_queue, skb);
1016                         neigh->arp_queue_len_bytes += skb->truesize;
1017                 }
1018                 rc = 1;
1019         }
1020 out_unlock_bh:
1021         if (immediate_probe)
1022                 neigh_probe(neigh);
1023         else
1024                 write_unlock(&neigh->lock);
1025         local_bh_enable();
1026         return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032         struct hh_cache *hh;
1033         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034                 = NULL;
1035
1036         if (neigh->dev->header_ops)
1037                 update = neigh->dev->header_ops->cache_update;
1038
1039         if (update) {
1040                 hh = &neigh->hh;
1041                 if (hh->hh_len) {
1042                         write_seqlock_bh(&hh->hh_lock);
1043                         update(hh, neigh->dev, neigh->ha);
1044                         write_sequnlock_bh(&hh->hh_lock);
1045                 }
1046         }
1047 }
1048
1049
1050
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056                                 if it is different.
1057         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058                                 lladdr instead of overriding it
1059                                 if it is different.
1060                                 It also allows to retain current state
1061                                 if lladdr is unchanged.
1062         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1063
1064         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065                                 NTF_ROUTER flag.
1066         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1067                                 a router.
1068
1069    Caller MUST hold reference count on the entry.
1070  */
1071
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073                  u32 flags)
1074 {
1075         u8 old;
1076         int err;
1077         int notify = 0;
1078         struct net_device *dev;
1079         int update_isrouter = 0;
1080
1081         write_lock_bh(&neigh->lock);
1082
1083         dev    = neigh->dev;
1084         old    = neigh->nud_state;
1085         err    = -EPERM;
1086
1087         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088             (old & (NUD_NOARP | NUD_PERMANENT)))
1089                 goto out;
1090
1091         if (!(new & NUD_VALID)) {
1092                 neigh_del_timer(neigh);
1093                 if (old & NUD_CONNECTED)
1094                         neigh_suspect(neigh);
1095                 neigh->nud_state = new;
1096                 err = 0;
1097                 notify = old & NUD_VALID;
1098                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099                     (new & NUD_FAILED)) {
1100                         neigh_invalidate(neigh);
1101                         notify = 1;
1102                 }
1103                 goto out;
1104         }
1105
1106         /* Compare new lladdr with cached one */
1107         if (!dev->addr_len) {
1108                 /* First case: device needs no address. */
1109                 lladdr = neigh->ha;
1110         } else if (lladdr) {
1111                 /* The second case: if something is already cached
1112                    and a new address is proposed:
1113                    - compare new & old
1114                    - if they are different, check override flag
1115                  */
1116                 if ((old & NUD_VALID) &&
1117                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1118                         lladdr = neigh->ha;
1119         } else {
1120                 /* No address is supplied; if we know something,
1121                    use it, otherwise discard the request.
1122                  */
1123                 err = -EINVAL;
1124                 if (!(old & NUD_VALID))
1125                         goto out;
1126                 lladdr = neigh->ha;
1127         }
1128
1129         if (new & NUD_CONNECTED)
1130                 neigh->confirmed = jiffies;
1131         neigh->updated = jiffies;
1132
1133         /* If entry was valid and address is not changed,
1134            do not change entry state, if new one is STALE.
1135          */
1136         err = 0;
1137         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138         if (old & NUD_VALID) {
1139                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140                         update_isrouter = 0;
1141                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142                             (old & NUD_CONNECTED)) {
1143                                 lladdr = neigh->ha;
1144                                 new = NUD_STALE;
1145                         } else
1146                                 goto out;
1147                 } else {
1148                         if (lladdr == neigh->ha && new == NUD_STALE &&
1149                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150                              (old & NUD_CONNECTED))
1151                             )
1152                                 new = old;
1153                 }
1154         }
1155
1156         if (new != old) {
1157                 neigh_del_timer(neigh);
1158                 if (new & NUD_IN_TIMER)
1159                         neigh_add_timer(neigh, (jiffies +
1160                                                 ((new & NUD_REACHABLE) ?
1161                                                  neigh->parms->reachable_time :
1162                                                  0)));
1163                 neigh->nud_state = new;
1164         }
1165
1166         if (lladdr != neigh->ha) {
1167                 write_seqlock(&neigh->ha_lock);
1168                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1169                 write_sequnlock(&neigh->ha_lock);
1170                 neigh_update_hhs(neigh);
1171                 if (!(new & NUD_CONNECTED))
1172                         neigh->confirmed = jiffies -
1173                                       (neigh->parms->base_reachable_time << 1);
1174                 notify = 1;
1175         }
1176         if (new == old)
1177                 goto out;
1178         if (new & NUD_CONNECTED)
1179                 neigh_connect(neigh);
1180         else
1181                 neigh_suspect(neigh);
1182         if (!(old & NUD_VALID)) {
1183                 struct sk_buff *skb;
1184
1185                 /* Again: avoid dead loop if something went wrong */
1186
1187                 while (neigh->nud_state & NUD_VALID &&
1188                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189                         struct dst_entry *dst = skb_dst(skb);
1190                         struct neighbour *n2, *n1 = neigh;
1191                         write_unlock_bh(&neigh->lock);
1192
1193                         rcu_read_lock();
1194
1195                         /* Why not just use 'neigh' as-is?  The problem is that
1196                          * things such as shaper, eql, and sch_teql can end up
1197                          * using alternative, different, neigh objects to output
1198                          * the packet in the output path.  So what we need to do
1199                          * here is re-lookup the top-level neigh in the path so
1200                          * we can reinject the packet there.
1201                          */
1202                         n2 = NULL;
1203                         if (dst) {
1204                                 n2 = dst_neigh_lookup_skb(dst, skb);
1205                                 if (n2)
1206                                         n1 = n2;
1207                         }
1208                         n1->output(n1, skb);
1209                         if (n2)
1210                                 neigh_release(n2);
1211                         rcu_read_unlock();
1212
1213                         write_lock_bh(&neigh->lock);
1214                 }
1215                 __skb_queue_purge(&neigh->arp_queue);
1216                 neigh->arp_queue_len_bytes = 0;
1217         }
1218 out:
1219         if (update_isrouter) {
1220                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221                         (neigh->flags | NTF_ROUTER) :
1222                         (neigh->flags & ~NTF_ROUTER);
1223         }
1224         write_unlock_bh(&neigh->lock);
1225
1226         if (notify)
1227                 neigh_update_notify(neigh);
1228
1229         return err;
1230 }
1231 EXPORT_SYMBOL(neigh_update);
1232
1233 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234                                  u8 *lladdr, void *saddr,
1235                                  struct net_device *dev)
1236 {
1237         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238                                                  lladdr || !dev->addr_len);
1239         if (neigh)
1240                 neigh_update(neigh, lladdr, NUD_STALE,
1241                              NEIGH_UPDATE_F_OVERRIDE);
1242         return neigh;
1243 }
1244 EXPORT_SYMBOL(neigh_event_ns);
1245
1246 /* called with read_lock_bh(&n->lock); */
1247 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248 {
1249         struct net_device *dev = dst->dev;
1250         __be16 prot = dst->ops->protocol;
1251         struct hh_cache *hh = &n->hh;
1252
1253         write_lock_bh(&n->lock);
1254
1255         /* Only one thread can come in here and initialize the
1256          * hh_cache entry.
1257          */
1258         if (!hh->hh_len)
1259                 dev->header_ops->cache(n, hh, prot);
1260
1261         write_unlock_bh(&n->lock);
1262 }
1263
1264 /* This function can be used in contexts, where only old dev_queue_xmit
1265  * worked, f.e. if you want to override normal output path (eql, shaper),
1266  * but resolution is not made yet.
1267  */
1268
1269 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270 {
1271         struct net_device *dev = skb->dev;
1272
1273         __skb_pull(skb, skb_network_offset(skb));
1274
1275         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276                             skb->len) < 0 &&
1277             dev->header_ops->rebuild(skb))
1278                 return 0;
1279
1280         return dev_queue_xmit(skb);
1281 }
1282 EXPORT_SYMBOL(neigh_compat_output);
1283
1284 /* Slow and careful. */
1285
1286 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287 {
1288         struct dst_entry *dst = skb_dst(skb);
1289         int rc = 0;
1290
1291         if (!dst)
1292                 goto discard;
1293
1294         if (!neigh_event_send(neigh, skb)) {
1295                 int err;
1296                 struct net_device *dev = neigh->dev;
1297                 unsigned int seq;
1298
1299                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1300                         neigh_hh_init(neigh, dst);
1301
1302                 do {
1303                         __skb_pull(skb, skb_network_offset(skb));
1304                         seq = read_seqbegin(&neigh->ha_lock);
1305                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306                                               neigh->ha, NULL, skb->len);
1307                 } while (read_seqretry(&neigh->ha_lock, seq));
1308
1309                 if (err >= 0)
1310                         rc = dev_queue_xmit(skb);
1311                 else
1312                         goto out_kfree_skb;
1313         }
1314 out:
1315         return rc;
1316 discard:
1317         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318 out_kfree_skb:
1319         rc = -EINVAL;
1320         kfree_skb(skb);
1321         goto out;
1322 }
1323 EXPORT_SYMBOL(neigh_resolve_output);
1324
1325 /* As fast as possible without hh cache */
1326
1327 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329         struct net_device *dev = neigh->dev;
1330         unsigned int seq;
1331         int err;
1332
1333         do {
1334                 __skb_pull(skb, skb_network_offset(skb));
1335                 seq = read_seqbegin(&neigh->ha_lock);
1336                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337                                       neigh->ha, NULL, skb->len);
1338         } while (read_seqretry(&neigh->ha_lock, seq));
1339
1340         if (err >= 0)
1341                 err = dev_queue_xmit(skb);
1342         else {
1343                 err = -EINVAL;
1344                 kfree_skb(skb);
1345         }
1346         return err;
1347 }
1348 EXPORT_SYMBOL(neigh_connected_output);
1349
1350 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351 {
1352         return dev_queue_xmit(skb);
1353 }
1354 EXPORT_SYMBOL(neigh_direct_output);
1355
1356 static void neigh_proxy_process(unsigned long arg)
1357 {
1358         struct neigh_table *tbl = (struct neigh_table *)arg;
1359         long sched_next = 0;
1360         unsigned long now = jiffies;
1361         struct sk_buff *skb, *n;
1362
1363         spin_lock(&tbl->proxy_queue.lock);
1364
1365         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366                 long tdif = NEIGH_CB(skb)->sched_next - now;
1367
1368                 if (tdif <= 0) {
1369                         struct net_device *dev = skb->dev;
1370
1371                         __skb_unlink(skb, &tbl->proxy_queue);
1372                         if (tbl->proxy_redo && netif_running(dev)) {
1373                                 rcu_read_lock();
1374                                 tbl->proxy_redo(skb);
1375                                 rcu_read_unlock();
1376                         } else {
1377                                 kfree_skb(skb);
1378                         }
1379
1380                         dev_put(dev);
1381                 } else if (!sched_next || tdif < sched_next)
1382                         sched_next = tdif;
1383         }
1384         del_timer(&tbl->proxy_timer);
1385         if (sched_next)
1386                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387         spin_unlock(&tbl->proxy_queue.lock);
1388 }
1389
1390 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391                     struct sk_buff *skb)
1392 {
1393         unsigned long now = jiffies;
1394         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395
1396         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397                 kfree_skb(skb);
1398                 return;
1399         }
1400
1401         NEIGH_CB(skb)->sched_next = sched_next;
1402         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403
1404         spin_lock(&tbl->proxy_queue.lock);
1405         if (del_timer(&tbl->proxy_timer)) {
1406                 if (time_before(tbl->proxy_timer.expires, sched_next))
1407                         sched_next = tbl->proxy_timer.expires;
1408         }
1409         skb_dst_drop(skb);
1410         dev_hold(skb->dev);
1411         __skb_queue_tail(&tbl->proxy_queue, skb);
1412         mod_timer(&tbl->proxy_timer, sched_next);
1413         spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418                                                       struct net *net, int ifindex)
1419 {
1420         struct neigh_parms *p;
1421
1422         for (p = &tbl->parms; p; p = p->next) {
1423                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1425                         return p;
1426         }
1427
1428         return NULL;
1429 }
1430
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432                                       struct neigh_table *tbl)
1433 {
1434         struct neigh_parms *p;
1435         struct net *net = dev_net(dev);
1436         const struct net_device_ops *ops = dev->netdev_ops;
1437
1438         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439         if (p) {
1440                 p->tbl            = tbl;
1441                 atomic_set(&p->refcnt, 1);
1442                 p->reachable_time =
1443                                 neigh_rand_reach_time(p->base_reachable_time);
1444
1445                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1446                         kfree(p);
1447                         return NULL;
1448                 }
1449
1450                 dev_hold(dev);
1451                 p->dev = dev;
1452                 write_pnet(&p->net, hold_net(net));
1453                 p->sysctl_table = NULL;
1454                 write_lock_bh(&tbl->lock);
1455                 p->next         = tbl->parms.next;
1456                 tbl->parms.next = p;
1457                 write_unlock_bh(&tbl->lock);
1458         }
1459         return p;
1460 }
1461 EXPORT_SYMBOL(neigh_parms_alloc);
1462
1463 static void neigh_rcu_free_parms(struct rcu_head *head)
1464 {
1465         struct neigh_parms *parms =
1466                 container_of(head, struct neigh_parms, rcu_head);
1467
1468         neigh_parms_put(parms);
1469 }
1470
1471 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1472 {
1473         struct neigh_parms **p;
1474
1475         if (!parms || parms == &tbl->parms)
1476                 return;
1477         write_lock_bh(&tbl->lock);
1478         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1479                 if (*p == parms) {
1480                         *p = parms->next;
1481                         parms->dead = 1;
1482                         write_unlock_bh(&tbl->lock);
1483                         if (parms->dev)
1484                                 dev_put(parms->dev);
1485                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1486                         return;
1487                 }
1488         }
1489         write_unlock_bh(&tbl->lock);
1490         neigh_dbg(1, "%s: not found\n", __func__);
1491 }
1492 EXPORT_SYMBOL(neigh_parms_release);
1493
1494 static void neigh_parms_destroy(struct neigh_parms *parms)
1495 {
1496         release_net(neigh_parms_net(parms));
1497         kfree(parms);
1498 }
1499
1500 static struct lock_class_key neigh_table_proxy_queue_class;
1501
1502 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1503 {
1504         unsigned long now = jiffies;
1505         unsigned long phsize;
1506
1507         write_pnet(&tbl->parms.net, &init_net);
1508         atomic_set(&tbl->parms.refcnt, 1);
1509         tbl->parms.reachable_time =
1510                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1511
1512         tbl->stats = alloc_percpu(struct neigh_statistics);
1513         if (!tbl->stats)
1514                 panic("cannot create neighbour cache statistics");
1515
1516 #ifdef CONFIG_PROC_FS
1517         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1518                               &neigh_stat_seq_fops, tbl))
1519                 panic("cannot create neighbour proc dir entry");
1520 #endif
1521
1522         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1523
1524         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1525         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1526
1527         if (!tbl->nht || !tbl->phash_buckets)
1528                 panic("cannot allocate neighbour cache hashes");
1529
1530         if (!tbl->entry_size)
1531                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1532                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1533         else
1534                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1535
1536         rwlock_init(&tbl->lock);
1537         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1538         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1539         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1540         skb_queue_head_init_class(&tbl->proxy_queue,
1541                         &neigh_table_proxy_queue_class);
1542
1543         tbl->last_flush = now;
1544         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1545 }
1546
1547 void neigh_table_init(struct neigh_table *tbl)
1548 {
1549         struct neigh_table *tmp;
1550
1551         neigh_table_init_no_netlink(tbl);
1552         write_lock(&neigh_tbl_lock);
1553         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1554                 if (tmp->family == tbl->family)
1555                         break;
1556         }
1557         tbl->next       = neigh_tables;
1558         neigh_tables    = tbl;
1559         write_unlock(&neigh_tbl_lock);
1560
1561         if (unlikely(tmp)) {
1562                 pr_err("Registering multiple tables for family %d\n",
1563                        tbl->family);
1564                 dump_stack();
1565         }
1566 }
1567 EXPORT_SYMBOL(neigh_table_init);
1568
1569 int neigh_table_clear(struct neigh_table *tbl)
1570 {
1571         struct neigh_table **tp;
1572
1573         /* It is not clean... Fix it to unload IPv6 module safely */
1574         cancel_delayed_work_sync(&tbl->gc_work);
1575         del_timer_sync(&tbl->proxy_timer);
1576         pneigh_queue_purge(&tbl->proxy_queue);
1577         neigh_ifdown(tbl, NULL);
1578         if (atomic_read(&tbl->entries))
1579                 pr_crit("neighbour leakage\n");
1580         write_lock(&neigh_tbl_lock);
1581         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1582                 if (*tp == tbl) {
1583                         *tp = tbl->next;
1584                         break;
1585                 }
1586         }
1587         write_unlock(&neigh_tbl_lock);
1588
1589         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1590                  neigh_hash_free_rcu);
1591         tbl->nht = NULL;
1592
1593         kfree(tbl->phash_buckets);
1594         tbl->phash_buckets = NULL;
1595
1596         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1597
1598         free_percpu(tbl->stats);
1599         tbl->stats = NULL;
1600
1601         return 0;
1602 }
1603 EXPORT_SYMBOL(neigh_table_clear);
1604
1605 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1606 {
1607         struct net *net = sock_net(skb->sk);
1608         struct ndmsg *ndm;
1609         struct nlattr *dst_attr;
1610         struct neigh_table *tbl;
1611         struct net_device *dev = NULL;
1612         int err = -EINVAL;
1613
1614         ASSERT_RTNL();
1615         if (nlmsg_len(nlh) < sizeof(*ndm))
1616                 goto out;
1617
1618         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1619         if (dst_attr == NULL)
1620                 goto out;
1621
1622         ndm = nlmsg_data(nlh);
1623         if (ndm->ndm_ifindex) {
1624                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1625                 if (dev == NULL) {
1626                         err = -ENODEV;
1627                         goto out;
1628                 }
1629         }
1630
1631         read_lock(&neigh_tbl_lock);
1632         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1633                 struct neighbour *neigh;
1634
1635                 if (tbl->family != ndm->ndm_family)
1636                         continue;
1637                 read_unlock(&neigh_tbl_lock);
1638
1639                 if (nla_len(dst_attr) < tbl->key_len)
1640                         goto out;
1641
1642                 if (ndm->ndm_flags & NTF_PROXY) {
1643                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1644                         goto out;
1645                 }
1646
1647                 if (dev == NULL)
1648                         goto out;
1649
1650                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1651                 if (neigh == NULL) {
1652                         err = -ENOENT;
1653                         goto out;
1654                 }
1655
1656                 err = neigh_update(neigh, NULL, NUD_FAILED,
1657                                    NEIGH_UPDATE_F_OVERRIDE |
1658                                    NEIGH_UPDATE_F_ADMIN);
1659                 neigh_release(neigh);
1660                 goto out;
1661         }
1662         read_unlock(&neigh_tbl_lock);
1663         err = -EAFNOSUPPORT;
1664
1665 out:
1666         return err;
1667 }
1668
1669 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1670 {
1671         struct net *net = sock_net(skb->sk);
1672         struct ndmsg *ndm;
1673         struct nlattr *tb[NDA_MAX+1];
1674         struct neigh_table *tbl;
1675         struct net_device *dev = NULL;
1676         int err;
1677
1678         ASSERT_RTNL();
1679         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1680         if (err < 0)
1681                 goto out;
1682
1683         err = -EINVAL;
1684         if (tb[NDA_DST] == NULL)
1685                 goto out;
1686
1687         ndm = nlmsg_data(nlh);
1688         if (ndm->ndm_ifindex) {
1689                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1690                 if (dev == NULL) {
1691                         err = -ENODEV;
1692                         goto out;
1693                 }
1694
1695                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1696                         goto out;
1697         }
1698
1699         read_lock(&neigh_tbl_lock);
1700         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1701                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1702                 struct neighbour *neigh;
1703                 void *dst, *lladdr;
1704
1705                 if (tbl->family != ndm->ndm_family)
1706                         continue;
1707                 read_unlock(&neigh_tbl_lock);
1708
1709                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1710                         goto out;
1711                 dst = nla_data(tb[NDA_DST]);
1712                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1713
1714                 if (ndm->ndm_flags & NTF_PROXY) {
1715                         struct pneigh_entry *pn;
1716
1717                         err = -ENOBUFS;
1718                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1719                         if (pn) {
1720                                 pn->flags = ndm->ndm_flags;
1721                                 err = 0;
1722                         }
1723                         goto out;
1724                 }
1725
1726                 if (dev == NULL)
1727                         goto out;
1728
1729                 neigh = neigh_lookup(tbl, dst, dev);
1730                 if (neigh == NULL) {
1731                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1732                                 err = -ENOENT;
1733                                 goto out;
1734                         }
1735
1736                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1737                         if (IS_ERR(neigh)) {
1738                                 err = PTR_ERR(neigh);
1739                                 goto out;
1740                         }
1741                 } else {
1742                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1743                                 err = -EEXIST;
1744                                 neigh_release(neigh);
1745                                 goto out;
1746                         }
1747
1748                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1749                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1750                 }
1751
1752                 if (ndm->ndm_flags & NTF_USE) {
1753                         neigh_event_send(neigh, NULL);
1754                         err = 0;
1755                 } else
1756                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1757                 neigh_release(neigh);
1758                 goto out;
1759         }
1760
1761         read_unlock(&neigh_tbl_lock);
1762         err = -EAFNOSUPPORT;
1763 out:
1764         return err;
1765 }
1766
1767 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1768 {
1769         struct nlattr *nest;
1770
1771         nest = nla_nest_start(skb, NDTA_PARMS);
1772         if (nest == NULL)
1773                 return -ENOBUFS;
1774
1775         if ((parms->dev &&
1776              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1777             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1778             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1779             /* approximative value for deprecated QUEUE_LEN (in packets) */
1780             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1781                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1782             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1783             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1784             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1785             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1786             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1787             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1788                           parms->base_reachable_time) ||
1789             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1790             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1791                           parms->delay_probe_time) ||
1792             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1793             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1794             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1795             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1796                 goto nla_put_failure;
1797         return nla_nest_end(skb, nest);
1798
1799 nla_put_failure:
1800         nla_nest_cancel(skb, nest);
1801         return -EMSGSIZE;
1802 }
1803
1804 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1805                               u32 pid, u32 seq, int type, int flags)
1806 {
1807         struct nlmsghdr *nlh;
1808         struct ndtmsg *ndtmsg;
1809
1810         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1811         if (nlh == NULL)
1812                 return -EMSGSIZE;
1813
1814         ndtmsg = nlmsg_data(nlh);
1815
1816         read_lock_bh(&tbl->lock);
1817         ndtmsg->ndtm_family = tbl->family;
1818         ndtmsg->ndtm_pad1   = 0;
1819         ndtmsg->ndtm_pad2   = 0;
1820
1821         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1822             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1823             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1824             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1825             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1826                 goto nla_put_failure;
1827         {
1828                 unsigned long now = jiffies;
1829                 unsigned int flush_delta = now - tbl->last_flush;
1830                 unsigned int rand_delta = now - tbl->last_rand;
1831                 struct neigh_hash_table *nht;
1832                 struct ndt_config ndc = {
1833                         .ndtc_key_len           = tbl->key_len,
1834                         .ndtc_entry_size        = tbl->entry_size,
1835                         .ndtc_entries           = atomic_read(&tbl->entries),
1836                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1837                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1838                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1839                 };
1840
1841                 rcu_read_lock_bh();
1842                 nht = rcu_dereference_bh(tbl->nht);
1843                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1844                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1845                 rcu_read_unlock_bh();
1846
1847                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1848                         goto nla_put_failure;
1849         }
1850
1851         {
1852                 int cpu;
1853                 struct ndt_stats ndst;
1854
1855                 memset(&ndst, 0, sizeof(ndst));
1856
1857                 for_each_possible_cpu(cpu) {
1858                         struct neigh_statistics *st;
1859
1860                         st = per_cpu_ptr(tbl->stats, cpu);
1861                         ndst.ndts_allocs                += st->allocs;
1862                         ndst.ndts_destroys              += st->destroys;
1863                         ndst.ndts_hash_grows            += st->hash_grows;
1864                         ndst.ndts_res_failed            += st->res_failed;
1865                         ndst.ndts_lookups               += st->lookups;
1866                         ndst.ndts_hits                  += st->hits;
1867                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1868                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1869                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1870                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1871                 }
1872
1873                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1874                         goto nla_put_failure;
1875         }
1876
1877         BUG_ON(tbl->parms.dev);
1878         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1879                 goto nla_put_failure;
1880
1881         read_unlock_bh(&tbl->lock);
1882         return nlmsg_end(skb, nlh);
1883
1884 nla_put_failure:
1885         read_unlock_bh(&tbl->lock);
1886         nlmsg_cancel(skb, nlh);
1887         return -EMSGSIZE;
1888 }
1889
1890 static int neightbl_fill_param_info(struct sk_buff *skb,
1891                                     struct neigh_table *tbl,
1892                                     struct neigh_parms *parms,
1893                                     u32 pid, u32 seq, int type,
1894                                     unsigned int flags)
1895 {
1896         struct ndtmsg *ndtmsg;
1897         struct nlmsghdr *nlh;
1898
1899         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1900         if (nlh == NULL)
1901                 return -EMSGSIZE;
1902
1903         ndtmsg = nlmsg_data(nlh);
1904
1905         read_lock_bh(&tbl->lock);
1906         ndtmsg->ndtm_family = tbl->family;
1907         ndtmsg->ndtm_pad1   = 0;
1908         ndtmsg->ndtm_pad2   = 0;
1909
1910         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1911             neightbl_fill_parms(skb, parms) < 0)
1912                 goto errout;
1913
1914         read_unlock_bh(&tbl->lock);
1915         return nlmsg_end(skb, nlh);
1916 errout:
1917         read_unlock_bh(&tbl->lock);
1918         nlmsg_cancel(skb, nlh);
1919         return -EMSGSIZE;
1920 }
1921
1922 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1923         [NDTA_NAME]             = { .type = NLA_STRING },
1924         [NDTA_THRESH1]          = { .type = NLA_U32 },
1925         [NDTA_THRESH2]          = { .type = NLA_U32 },
1926         [NDTA_THRESH3]          = { .type = NLA_U32 },
1927         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1928         [NDTA_PARMS]            = { .type = NLA_NESTED },
1929 };
1930
1931 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1932         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1933         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1934         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1935         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1936         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1937         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1938         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1939         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1940         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1941         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1942         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1943         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1944         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1945 };
1946
1947 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1948 {
1949         struct net *net = sock_net(skb->sk);
1950         struct neigh_table *tbl;
1951         struct ndtmsg *ndtmsg;
1952         struct nlattr *tb[NDTA_MAX+1];
1953         int err;
1954
1955         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1956                           nl_neightbl_policy);
1957         if (err < 0)
1958                 goto errout;
1959
1960         if (tb[NDTA_NAME] == NULL) {
1961                 err = -EINVAL;
1962                 goto errout;
1963         }
1964
1965         ndtmsg = nlmsg_data(nlh);
1966         read_lock(&neigh_tbl_lock);
1967         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1968                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1969                         continue;
1970
1971                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1972                         break;
1973         }
1974
1975         if (tbl == NULL) {
1976                 err = -ENOENT;
1977                 goto errout_locked;
1978         }
1979
1980         /*
1981          * We acquire tbl->lock to be nice to the periodic timers and
1982          * make sure they always see a consistent set of values.
1983          */
1984         write_lock_bh(&tbl->lock);
1985
1986         if (tb[NDTA_PARMS]) {
1987                 struct nlattr *tbp[NDTPA_MAX+1];
1988                 struct neigh_parms *p;
1989                 int i, ifindex = 0;
1990
1991                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1992                                        nl_ntbl_parm_policy);
1993                 if (err < 0)
1994                         goto errout_tbl_lock;
1995
1996                 if (tbp[NDTPA_IFINDEX])
1997                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1998
1999                 p = lookup_neigh_parms(tbl, net, ifindex);
2000                 if (p == NULL) {
2001                         err = -ENOENT;
2002                         goto errout_tbl_lock;
2003                 }
2004
2005                 for (i = 1; i <= NDTPA_MAX; i++) {
2006                         if (tbp[i] == NULL)
2007                                 continue;
2008
2009                         switch (i) {
2010                         case NDTPA_QUEUE_LEN:
2011                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2012                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2013                                 break;
2014                         case NDTPA_QUEUE_LENBYTES:
2015                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2016                                 break;
2017                         case NDTPA_PROXY_QLEN:
2018                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2019                                 break;
2020                         case NDTPA_APP_PROBES:
2021                                 p->app_probes = nla_get_u32(tbp[i]);
2022                                 break;
2023                         case NDTPA_UCAST_PROBES:
2024                                 p->ucast_probes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_MCAST_PROBES:
2027                                 p->mcast_probes = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_BASE_REACHABLE_TIME:
2030                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2031                                 break;
2032                         case NDTPA_GC_STALETIME:
2033                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2034                                 break;
2035                         case NDTPA_DELAY_PROBE_TIME:
2036                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2037                                 break;
2038                         case NDTPA_RETRANS_TIME:
2039                                 p->retrans_time = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_ANYCAST_DELAY:
2042                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_PROXY_DELAY:
2045                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_LOCKTIME:
2048                                 p->locktime = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         }
2051                 }
2052         }
2053
2054         err = -ENOENT;
2055         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2056              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2057             !net_eq(net, &init_net))
2058                 goto errout_tbl_lock;
2059
2060         if (tb[NDTA_THRESH1])
2061                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2062
2063         if (tb[NDTA_THRESH2])
2064                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2065
2066         if (tb[NDTA_THRESH3])
2067                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2068
2069         if (tb[NDTA_GC_INTERVAL])
2070                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2071
2072         err = 0;
2073
2074 errout_tbl_lock:
2075         write_unlock_bh(&tbl->lock);
2076 errout_locked:
2077         read_unlock(&neigh_tbl_lock);
2078 errout:
2079         return err;
2080 }
2081
2082 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2083 {
2084         struct net *net = sock_net(skb->sk);
2085         int family, tidx, nidx = 0;
2086         int tbl_skip = cb->args[0];
2087         int neigh_skip = cb->args[1];
2088         struct neigh_table *tbl;
2089
2090         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2091
2092         read_lock(&neigh_tbl_lock);
2093         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2094                 struct neigh_parms *p;
2095
2096                 if (tidx < tbl_skip || (family && tbl->family != family))
2097                         continue;
2098
2099                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2100                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2101                                        NLM_F_MULTI) <= 0)
2102                         break;
2103
2104                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2105                         if (!net_eq(neigh_parms_net(p), net))
2106                                 continue;
2107
2108                         if (nidx < neigh_skip)
2109                                 goto next;
2110
2111                         if (neightbl_fill_param_info(skb, tbl, p,
2112                                                      NETLINK_CB(cb->skb).portid,
2113                                                      cb->nlh->nlmsg_seq,
2114                                                      RTM_NEWNEIGHTBL,
2115                                                      NLM_F_MULTI) <= 0)
2116                                 goto out;
2117                 next:
2118                         nidx++;
2119                 }
2120
2121                 neigh_skip = 0;
2122         }
2123 out:
2124         read_unlock(&neigh_tbl_lock);
2125         cb->args[0] = tidx;
2126         cb->args[1] = nidx;
2127
2128         return skb->len;
2129 }
2130
2131 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2132                            u32 pid, u32 seq, int type, unsigned int flags)
2133 {
2134         unsigned long now = jiffies;
2135         struct nda_cacheinfo ci;
2136         struct nlmsghdr *nlh;
2137         struct ndmsg *ndm;
2138
2139         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2140         if (nlh == NULL)
2141                 return -EMSGSIZE;
2142
2143         ndm = nlmsg_data(nlh);
2144         ndm->ndm_family  = neigh->ops->family;
2145         ndm->ndm_pad1    = 0;
2146         ndm->ndm_pad2    = 0;
2147         ndm->ndm_flags   = neigh->flags;
2148         ndm->ndm_type    = neigh->type;
2149         ndm->ndm_ifindex = neigh->dev->ifindex;
2150
2151         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2152                 goto nla_put_failure;
2153
2154         read_lock_bh(&neigh->lock);
2155         ndm->ndm_state   = neigh->nud_state;
2156         if (neigh->nud_state & NUD_VALID) {
2157                 char haddr[MAX_ADDR_LEN];
2158
2159                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2160                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2161                         read_unlock_bh(&neigh->lock);
2162                         goto nla_put_failure;
2163                 }
2164         }
2165
2166         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2167         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2168         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2169         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2170         read_unlock_bh(&neigh->lock);
2171
2172         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2173             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2174                 goto nla_put_failure;
2175
2176         return nlmsg_end(skb, nlh);
2177
2178 nla_put_failure:
2179         nlmsg_cancel(skb, nlh);
2180         return -EMSGSIZE;
2181 }
2182
2183 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2184                             u32 pid, u32 seq, int type, unsigned int flags,
2185                             struct neigh_table *tbl)
2186 {
2187         struct nlmsghdr *nlh;
2188         struct ndmsg *ndm;
2189
2190         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2191         if (nlh == NULL)
2192                 return -EMSGSIZE;
2193
2194         ndm = nlmsg_data(nlh);
2195         ndm->ndm_family  = tbl->family;
2196         ndm->ndm_pad1    = 0;
2197         ndm->ndm_pad2    = 0;
2198         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2199         ndm->ndm_type    = NDA_DST;
2200         ndm->ndm_ifindex = pn->dev->ifindex;
2201         ndm->ndm_state   = NUD_NONE;
2202
2203         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2204                 goto nla_put_failure;
2205
2206         return nlmsg_end(skb, nlh);
2207
2208 nla_put_failure:
2209         nlmsg_cancel(skb, nlh);
2210         return -EMSGSIZE;
2211 }
2212
2213 static void neigh_update_notify(struct neighbour *neigh)
2214 {
2215         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2216         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2217 }
2218
2219 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2220                             struct netlink_callback *cb)
2221 {
2222         struct net *net = sock_net(skb->sk);
2223         struct neighbour *n;
2224         int rc, h, s_h = cb->args[1];
2225         int idx, s_idx = idx = cb->args[2];
2226         struct neigh_hash_table *nht;
2227
2228         rcu_read_lock_bh();
2229         nht = rcu_dereference_bh(tbl->nht);
2230
2231         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2232                 if (h > s_h)
2233                         s_idx = 0;
2234                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2235                      n != NULL;
2236                      n = rcu_dereference_bh(n->next)) {
2237                         if (!net_eq(dev_net(n->dev), net))
2238                                 continue;
2239                         if (idx < s_idx)
2240                                 goto next;
2241                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2242                                             cb->nlh->nlmsg_seq,
2243                                             RTM_NEWNEIGH,
2244                                             NLM_F_MULTI) <= 0) {
2245                                 rc = -1;
2246                                 goto out;
2247                         }
2248 next:
2249                         idx++;
2250                 }
2251         }
2252         rc = skb->len;
2253 out:
2254         rcu_read_unlock_bh();
2255         cb->args[1] = h;
2256         cb->args[2] = idx;
2257         return rc;
2258 }
2259
2260 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2261                              struct netlink_callback *cb)
2262 {
2263         struct pneigh_entry *n;
2264         struct net *net = sock_net(skb->sk);
2265         int rc, h, s_h = cb->args[3];
2266         int idx, s_idx = idx = cb->args[4];
2267
2268         read_lock_bh(&tbl->lock);
2269
2270         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2271                 if (h > s_h)
2272                         s_idx = 0;
2273                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2274                         if (dev_net(n->dev) != net)
2275                                 continue;
2276                         if (idx < s_idx)
2277                                 goto next;
2278                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2279                                             cb->nlh->nlmsg_seq,
2280                                             RTM_NEWNEIGH,
2281                                             NLM_F_MULTI, tbl) <= 0) {
2282                                 read_unlock_bh(&tbl->lock);
2283                                 rc = -1;
2284                                 goto out;
2285                         }
2286                 next:
2287                         idx++;
2288                 }
2289         }
2290
2291         read_unlock_bh(&tbl->lock);
2292         rc = skb->len;
2293 out:
2294         cb->args[3] = h;
2295         cb->args[4] = idx;
2296         return rc;
2297
2298 }
2299
2300 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2301 {
2302         struct neigh_table *tbl;
2303         int t, family, s_t;
2304         int proxy = 0;
2305         int err;
2306
2307         read_lock(&neigh_tbl_lock);
2308         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2309
2310         /* check for full ndmsg structure presence, family member is
2311          * the same for both structures
2312          */
2313         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2314             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2315                 proxy = 1;
2316
2317         s_t = cb->args[0];
2318
2319         for (tbl = neigh_tables, t = 0; tbl;
2320              tbl = tbl->next, t++) {
2321                 if (t < s_t || (family && tbl->family != family))
2322                         continue;
2323                 if (t > s_t)
2324                         memset(&cb->args[1], 0, sizeof(cb->args) -
2325                                                 sizeof(cb->args[0]));
2326                 if (proxy)
2327                         err = pneigh_dump_table(tbl, skb, cb);
2328                 else
2329                         err = neigh_dump_table(tbl, skb, cb);
2330                 if (err < 0)
2331                         break;
2332         }
2333         read_unlock(&neigh_tbl_lock);
2334
2335         cb->args[0] = t;
2336         return skb->len;
2337 }
2338
2339 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2340 {
2341         int chain;
2342         struct neigh_hash_table *nht;
2343
2344         rcu_read_lock_bh();
2345         nht = rcu_dereference_bh(tbl->nht);
2346
2347         read_lock(&tbl->lock); /* avoid resizes */
2348         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2349                 struct neighbour *n;
2350
2351                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2352                      n != NULL;
2353                      n = rcu_dereference_bh(n->next))
2354                         cb(n, cookie);
2355         }
2356         read_unlock(&tbl->lock);
2357         rcu_read_unlock_bh();
2358 }
2359 EXPORT_SYMBOL(neigh_for_each);
2360
2361 /* The tbl->lock must be held as a writer and BH disabled. */
2362 void __neigh_for_each_release(struct neigh_table *tbl,
2363                               int (*cb)(struct neighbour *))
2364 {
2365         int chain;
2366         struct neigh_hash_table *nht;
2367
2368         nht = rcu_dereference_protected(tbl->nht,
2369                                         lockdep_is_held(&tbl->lock));
2370         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2371                 struct neighbour *n;
2372                 struct neighbour __rcu **np;
2373
2374                 np = &nht->hash_buckets[chain];
2375                 while ((n = rcu_dereference_protected(*np,
2376                                         lockdep_is_held(&tbl->lock))) != NULL) {
2377                         int release;
2378
2379                         write_lock(&n->lock);
2380                         release = cb(n);
2381                         if (release) {
2382                                 rcu_assign_pointer(*np,
2383                                         rcu_dereference_protected(n->next,
2384                                                 lockdep_is_held(&tbl->lock)));
2385                                 n->dead = 1;
2386                         } else
2387                                 np = &n->next;
2388                         write_unlock(&n->lock);
2389                         if (release)
2390                                 neigh_cleanup_and_release(n);
2391                 }
2392         }
2393 }
2394 EXPORT_SYMBOL(__neigh_for_each_release);
2395
2396 #ifdef CONFIG_PROC_FS
2397
2398 static struct neighbour *neigh_get_first(struct seq_file *seq)
2399 {
2400         struct neigh_seq_state *state = seq->private;
2401         struct net *net = seq_file_net(seq);
2402         struct neigh_hash_table *nht = state->nht;
2403         struct neighbour *n = NULL;
2404         int bucket = state->bucket;
2405
2406         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2407         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2408                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2409
2410                 while (n) {
2411                         if (!net_eq(dev_net(n->dev), net))
2412                                 goto next;
2413                         if (state->neigh_sub_iter) {
2414                                 loff_t fakep = 0;
2415                                 void *v;
2416
2417                                 v = state->neigh_sub_iter(state, n, &fakep);
2418                                 if (!v)
2419                                         goto next;
2420                         }
2421                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2422                                 break;
2423                         if (n->nud_state & ~NUD_NOARP)
2424                                 break;
2425 next:
2426                         n = rcu_dereference_bh(n->next);
2427                 }
2428
2429                 if (n)
2430                         break;
2431         }
2432         state->bucket = bucket;
2433
2434         return n;
2435 }
2436
2437 static struct neighbour *neigh_get_next(struct seq_file *seq,
2438                                         struct neighbour *n,
2439                                         loff_t *pos)
2440 {
2441         struct neigh_seq_state *state = seq->private;
2442         struct net *net = seq_file_net(seq);
2443         struct neigh_hash_table *nht = state->nht;
2444
2445         if (state->neigh_sub_iter) {
2446                 void *v = state->neigh_sub_iter(state, n, pos);
2447                 if (v)
2448                         return n;
2449         }
2450         n = rcu_dereference_bh(n->next);
2451
2452         while (1) {
2453                 while (n) {
2454                         if (!net_eq(dev_net(n->dev), net))
2455                                 goto next;
2456                         if (state->neigh_sub_iter) {
2457                                 void *v = state->neigh_sub_iter(state, n, pos);
2458                                 if (v)
2459                                         return n;
2460                                 goto next;
2461                         }
2462                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2463                                 break;
2464
2465                         if (n->nud_state & ~NUD_NOARP)
2466                                 break;
2467 next:
2468                         n = rcu_dereference_bh(n->next);
2469                 }
2470
2471                 if (n)
2472                         break;
2473
2474                 if (++state->bucket >= (1 << nht->hash_shift))
2475                         break;
2476
2477                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2478         }
2479
2480         if (n && pos)
2481                 --(*pos);
2482         return n;
2483 }
2484
2485 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2486 {
2487         struct neighbour *n = neigh_get_first(seq);
2488
2489         if (n) {
2490                 --(*pos);
2491                 while (*pos) {
2492                         n = neigh_get_next(seq, n, pos);
2493                         if (!n)
2494                                 break;
2495                 }
2496         }
2497         return *pos ? NULL : n;
2498 }
2499
2500 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2501 {
2502         struct neigh_seq_state *state = seq->private;
2503         struct net *net = seq_file_net(seq);
2504         struct neigh_table *tbl = state->tbl;
2505         struct pneigh_entry *pn = NULL;
2506         int bucket = state->bucket;
2507
2508         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2509         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2510                 pn = tbl->phash_buckets[bucket];
2511                 while (pn && !net_eq(pneigh_net(pn), net))
2512                         pn = pn->next;
2513                 if (pn)
2514                         break;
2515         }
2516         state->bucket = bucket;
2517
2518         return pn;
2519 }
2520
2521 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2522                                             struct pneigh_entry *pn,
2523                                             loff_t *pos)
2524 {
2525         struct neigh_seq_state *state = seq->private;
2526         struct net *net = seq_file_net(seq);
2527         struct neigh_table *tbl = state->tbl;
2528
2529         do {
2530                 pn = pn->next;
2531         } while (pn && !net_eq(pneigh_net(pn), net));
2532
2533         while (!pn) {
2534                 if (++state->bucket > PNEIGH_HASHMASK)
2535                         break;
2536                 pn = tbl->phash_buckets[state->bucket];
2537                 while (pn && !net_eq(pneigh_net(pn), net))
2538                         pn = pn->next;
2539                 if (pn)
2540                         break;
2541         }
2542
2543         if (pn && pos)
2544                 --(*pos);
2545
2546         return pn;
2547 }
2548
2549 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2550 {
2551         struct pneigh_entry *pn = pneigh_get_first(seq);
2552
2553         if (pn) {
2554                 --(*pos);
2555                 while (*pos) {
2556                         pn = pneigh_get_next(seq, pn, pos);
2557                         if (!pn)
2558                                 break;
2559                 }
2560         }
2561         return *pos ? NULL : pn;
2562 }
2563
2564 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2565 {
2566         struct neigh_seq_state *state = seq->private;
2567         void *rc;
2568         loff_t idxpos = *pos;
2569
2570         rc = neigh_get_idx(seq, &idxpos);
2571         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2572                 rc = pneigh_get_idx(seq, &idxpos);
2573
2574         return rc;
2575 }
2576
2577 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2578         __acquires(rcu_bh)
2579 {
2580         struct neigh_seq_state *state = seq->private;
2581
2582         state->tbl = tbl;
2583         state->bucket = 0;
2584         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2585
2586         rcu_read_lock_bh();
2587         state->nht = rcu_dereference_bh(tbl->nht);
2588
2589         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2590 }
2591 EXPORT_SYMBOL(neigh_seq_start);
2592
2593 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2594 {
2595         struct neigh_seq_state *state;
2596         void *rc;
2597
2598         if (v == SEQ_START_TOKEN) {
2599                 rc = neigh_get_first(seq);
2600                 goto out;
2601         }
2602
2603         state = seq->private;
2604         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2605                 rc = neigh_get_next(seq, v, NULL);
2606                 if (rc)
2607                         goto out;
2608                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2609                         rc = pneigh_get_first(seq);
2610         } else {
2611                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2612                 rc = pneigh_get_next(seq, v, NULL);
2613         }
2614 out:
2615         ++(*pos);
2616         return rc;
2617 }
2618 EXPORT_SYMBOL(neigh_seq_next);
2619
2620 void neigh_seq_stop(struct seq_file *seq, void *v)
2621         __releases(rcu_bh)
2622 {
2623         rcu_read_unlock_bh();
2624 }
2625 EXPORT_SYMBOL(neigh_seq_stop);
2626
2627 /* statistics via seq_file */
2628
2629 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2630 {
2631         struct neigh_table *tbl = seq->private;
2632         int cpu;
2633
2634         if (*pos == 0)
2635                 return SEQ_START_TOKEN;
2636
2637         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2638                 if (!cpu_possible(cpu))
2639                         continue;
2640                 *pos = cpu+1;
2641                 return per_cpu_ptr(tbl->stats, cpu);
2642         }
2643         return NULL;
2644 }
2645
2646 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2647 {
2648         struct neigh_table *tbl = seq->private;
2649         int cpu;
2650
2651         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2652                 if (!cpu_possible(cpu))
2653                         continue;
2654                 *pos = cpu+1;
2655                 return per_cpu_ptr(tbl->stats, cpu);
2656         }
2657         return NULL;
2658 }
2659
2660 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2661 {
2662
2663 }
2664
2665 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2666 {
2667         struct neigh_table *tbl = seq->private;
2668         struct neigh_statistics *st = v;
2669
2670         if (v == SEQ_START_TOKEN) {
2671                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2672                 return 0;
2673         }
2674
2675         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2676                         "%08lx %08lx  %08lx %08lx %08lx\n",
2677                    atomic_read(&tbl->entries),
2678
2679                    st->allocs,
2680                    st->destroys,
2681                    st->hash_grows,
2682
2683                    st->lookups,
2684                    st->hits,
2685
2686                    st->res_failed,
2687
2688                    st->rcv_probes_mcast,
2689                    st->rcv_probes_ucast,
2690
2691                    st->periodic_gc_runs,
2692                    st->forced_gc_runs,
2693                    st->unres_discards
2694                    );
2695
2696         return 0;
2697 }
2698
2699 static const struct seq_operations neigh_stat_seq_ops = {
2700         .start  = neigh_stat_seq_start,
2701         .next   = neigh_stat_seq_next,
2702         .stop   = neigh_stat_seq_stop,
2703         .show   = neigh_stat_seq_show,
2704 };
2705
2706 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2707 {
2708         int ret = seq_open(file, &neigh_stat_seq_ops);
2709
2710         if (!ret) {
2711                 struct seq_file *sf = file->private_data;
2712                 sf->private = PDE_DATA(inode);
2713         }
2714         return ret;
2715 };
2716
2717 static const struct file_operations neigh_stat_seq_fops = {
2718         .owner   = THIS_MODULE,
2719         .open    = neigh_stat_seq_open,
2720         .read    = seq_read,
2721         .llseek  = seq_lseek,
2722         .release = seq_release,
2723 };
2724
2725 #endif /* CONFIG_PROC_FS */
2726
2727 static inline size_t neigh_nlmsg_size(void)
2728 {
2729         return NLMSG_ALIGN(sizeof(struct ndmsg))
2730                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2731                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2732                + nla_total_size(sizeof(struct nda_cacheinfo))
2733                + nla_total_size(4); /* NDA_PROBES */
2734 }
2735
2736 static void __neigh_notify(struct neighbour *n, int type, int flags)
2737 {
2738         struct net *net = dev_net(n->dev);
2739         struct sk_buff *skb;
2740         int err = -ENOBUFS;
2741
2742         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2743         if (skb == NULL)
2744                 goto errout;
2745
2746         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2747         if (err < 0) {
2748                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2749                 WARN_ON(err == -EMSGSIZE);
2750                 kfree_skb(skb);
2751                 goto errout;
2752         }
2753         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2754         return;
2755 errout:
2756         if (err < 0)
2757                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2758 }
2759
2760 #ifdef CONFIG_ARPD
2761 void neigh_app_ns(struct neighbour *n)
2762 {
2763         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2764 }
2765 EXPORT_SYMBOL(neigh_app_ns);
2766 #endif /* CONFIG_ARPD */
2767
2768 #ifdef CONFIG_SYSCTL
2769 static int zero;
2770 static int int_max = INT_MAX;
2771 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2772
2773 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2774                            void __user *buffer, size_t *lenp, loff_t *ppos)
2775 {
2776         int size, ret;
2777         struct ctl_table tmp = *ctl;
2778
2779         tmp.extra1 = &zero;
2780         tmp.extra2 = &unres_qlen_max;
2781         tmp.data = &size;
2782
2783         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2784         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2785
2786         if (write && !ret)
2787                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2788         return ret;
2789 }
2790
2791 enum {
2792         NEIGH_VAR_MCAST_PROBE,
2793         NEIGH_VAR_UCAST_PROBE,
2794         NEIGH_VAR_APP_PROBE,
2795         NEIGH_VAR_RETRANS_TIME,
2796         NEIGH_VAR_BASE_REACHABLE_TIME,
2797         NEIGH_VAR_DELAY_PROBE_TIME,
2798         NEIGH_VAR_GC_STALETIME,
2799         NEIGH_VAR_QUEUE_LEN,
2800         NEIGH_VAR_QUEUE_LEN_BYTES,
2801         NEIGH_VAR_PROXY_QLEN,
2802         NEIGH_VAR_ANYCAST_DELAY,
2803         NEIGH_VAR_PROXY_DELAY,
2804         NEIGH_VAR_LOCKTIME,
2805         NEIGH_VAR_RETRANS_TIME_MS,
2806         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2807         NEIGH_VAR_GC_INTERVAL,
2808         NEIGH_VAR_GC_THRESH1,
2809         NEIGH_VAR_GC_THRESH2,
2810         NEIGH_VAR_GC_THRESH3,
2811         NEIGH_VAR_MAX
2812 };
2813
2814 static struct neigh_sysctl_table {
2815         struct ctl_table_header *sysctl_header;
2816         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2817 } neigh_sysctl_template __read_mostly = {
2818         .neigh_vars = {
2819                 [NEIGH_VAR_MCAST_PROBE] = {
2820                         .procname       = "mcast_solicit",
2821                         .maxlen         = sizeof(int),
2822                         .mode           = 0644,
2823                         .extra1         = &zero,
2824                         .extra2         = &int_max,
2825                         .proc_handler   = proc_dointvec_minmax,
2826                 },
2827                 [NEIGH_VAR_UCAST_PROBE] = {
2828                         .procname       = "ucast_solicit",
2829                         .maxlen         = sizeof(int),
2830                         .mode           = 0644,
2831                         .extra1         = &zero,
2832                         .extra2         = &int_max,
2833                         .proc_handler   = proc_dointvec_minmax,
2834                 },
2835                 [NEIGH_VAR_APP_PROBE] = {
2836                         .procname       = "app_solicit",
2837                         .maxlen         = sizeof(int),
2838                         .mode           = 0644,
2839                         .extra1         = &zero,
2840                         .extra2         = &int_max,
2841                         .proc_handler   = proc_dointvec_minmax,
2842                 },
2843                 [NEIGH_VAR_RETRANS_TIME] = {
2844                         .procname       = "retrans_time",
2845                         .maxlen         = sizeof(int),
2846                         .mode           = 0644,
2847                         .proc_handler   = proc_dointvec_userhz_jiffies,
2848                 },
2849                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2850                         .procname       = "base_reachable_time",
2851                         .maxlen         = sizeof(int),
2852                         .mode           = 0644,
2853                         .proc_handler   = proc_dointvec_jiffies,
2854                 },
2855                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2856                         .procname       = "delay_first_probe_time",
2857                         .maxlen         = sizeof(int),
2858                         .mode           = 0644,
2859                         .proc_handler   = proc_dointvec_jiffies,
2860                 },
2861                 [NEIGH_VAR_GC_STALETIME] = {
2862                         .procname       = "gc_stale_time",
2863                         .maxlen         = sizeof(int),
2864                         .mode           = 0644,
2865                         .proc_handler   = proc_dointvec_jiffies,
2866                 },
2867                 [NEIGH_VAR_QUEUE_LEN] = {
2868                         .procname       = "unres_qlen",
2869                         .maxlen         = sizeof(int),
2870                         .mode           = 0644,
2871                         .proc_handler   = proc_unres_qlen,
2872                 },
2873                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2874                         .procname       = "unres_qlen_bytes",
2875                         .maxlen         = sizeof(int),
2876                         .mode           = 0644,
2877                         .extra1         = &zero,
2878                         .proc_handler   = proc_dointvec_minmax,
2879                 },
2880                 [NEIGH_VAR_PROXY_QLEN] = {
2881                         .procname       = "proxy_qlen",
2882                         .maxlen         = sizeof(int),
2883                         .mode           = 0644,
2884                         .extra1         = &zero,
2885                         .extra2         = &int_max,
2886                         .proc_handler   = proc_dointvec_minmax,
2887                 },
2888                 [NEIGH_VAR_ANYCAST_DELAY] = {
2889                         .procname       = "anycast_delay",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_userhz_jiffies,
2893                 },
2894                 [NEIGH_VAR_PROXY_DELAY] = {
2895                         .procname       = "proxy_delay",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_userhz_jiffies,
2899                 },
2900                 [NEIGH_VAR_LOCKTIME] = {
2901                         .procname       = "locktime",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec_userhz_jiffies,
2905                 },
2906                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2907                         .procname       = "retrans_time_ms",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec_ms_jiffies,
2911                 },
2912                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2913                         .procname       = "base_reachable_time_ms",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec_ms_jiffies,
2917                 },
2918                 [NEIGH_VAR_GC_INTERVAL] = {
2919                         .procname       = "gc_interval",
2920                         .maxlen         = sizeof(int),
2921                         .mode           = 0644,
2922                         .proc_handler   = proc_dointvec_jiffies,
2923                 },
2924                 [NEIGH_VAR_GC_THRESH1] = {
2925                         .procname       = "gc_thresh1",
2926                         .maxlen         = sizeof(int),
2927                         .mode           = 0644,
2928                         .extra1         = &zero,
2929                         .extra2         = &int_max,
2930                         .proc_handler   = proc_dointvec_minmax,
2931                 },
2932                 [NEIGH_VAR_GC_THRESH2] = {
2933                         .procname       = "gc_thresh2",
2934                         .maxlen         = sizeof(int),
2935                         .mode           = 0644,
2936                         .extra1         = &zero,
2937                         .extra2         = &int_max,
2938                         .proc_handler   = proc_dointvec_minmax,
2939                 },
2940                 [NEIGH_VAR_GC_THRESH3] = {
2941                         .procname       = "gc_thresh3",
2942                         .maxlen         = sizeof(int),
2943                         .mode           = 0644,
2944                         .extra1         = &zero,
2945                         .extra2         = &int_max,
2946                         .proc_handler   = proc_dointvec_minmax,
2947                 },
2948                 {},
2949         },
2950 };
2951
2952 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2953                           char *p_name, proc_handler *handler)
2954 {
2955         struct neigh_sysctl_table *t;
2956         const char *dev_name_source = NULL;
2957         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2958
2959         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2960         if (!t)
2961                 goto err;
2962
2963         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2964         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2965         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2966         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2967         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2968         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2969         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2970         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2971         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2972         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2973         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2974         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2975         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2976         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2977         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2978
2979         if (dev) {
2980                 dev_name_source = dev->name;
2981                 /* Terminate the table early */
2982                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2983                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2984         } else {
2985                 dev_name_source = "default";
2986                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2987                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2988                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2989                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2990         }
2991
2992
2993         if (handler) {
2994                 /* RetransTime */
2995                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2996                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2997                 /* ReachableTime */
2998                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2999                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3000                 /* RetransTime (in milliseconds)*/
3001                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3002                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3003                 /* ReachableTime (in milliseconds) */
3004                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3005                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3006         }
3007
3008         /* Don't export sysctls to unprivileged users */
3009         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3010                 t->neigh_vars[0].procname = NULL;
3011
3012         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3013                 p_name, dev_name_source);
3014         t->sysctl_header =
3015                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3016         if (!t->sysctl_header)
3017                 goto free;
3018
3019         p->sysctl_table = t;
3020         return 0;
3021
3022 free:
3023         kfree(t);
3024 err:
3025         return -ENOBUFS;
3026 }
3027 EXPORT_SYMBOL(neigh_sysctl_register);
3028
3029 void neigh_sysctl_unregister(struct neigh_parms *p)
3030 {
3031         if (p->sysctl_table) {
3032                 struct neigh_sysctl_table *t = p->sysctl_table;
3033                 p->sysctl_table = NULL;
3034                 unregister_net_sysctl_table(t->sysctl_header);
3035                 kfree(t);
3036         }
3037 }
3038 EXPORT_SYMBOL(neigh_sysctl_unregister);
3039
3040 #endif  /* CONFIG_SYSCTL */
3041
3042 static int __init neigh_init(void)
3043 {
3044         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3045         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3046         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3047
3048         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3049                       NULL);
3050         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3051
3052         return 0;
3053 }
3054
3055 subsys_initcall(neigh_init);
3056