]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/core/neighbour.c
Merge branch 'akpm' (Andrew's patch-bomb)
[can-eth-gw-linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define NEIGH_DEBUG 1
43
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57
58 #define PNEIGH_HASHMASK         0xF
59
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64
65 static struct neigh_table *neigh_tables;
66 #ifdef CONFIG_PROC_FS
67 static const struct file_operations neigh_stat_seq_fops;
68 #endif
69
70 /*
71    Neighbour hash table buckets are protected with rwlock tbl->lock.
72
73    - All the scans/updates to hash buckets MUST be made under this lock.
74    - NOTHING clever should be made under this lock: no callbacks
75      to protocol backends, no attempts to send something to network.
76      It will result in deadlocks, if backend/driver wants to use neighbour
77      cache.
78    - If the entry requires some non-trivial actions, increase
79      its reference count and release table lock.
80
81    Neighbour entries are protected:
82    - with reference count.
83    - with rwlock neigh->lock
84
85    Reference count prevents destruction.
86
87    neigh->lock mainly serializes ll address data and its validity state.
88    However, the same lock is used to protect another entry fields:
89     - timer
90     - resolution queue
91
92    Again, nothing clever shall be made under neigh->lock,
93    the most complicated procedure, which we allow is dev->hard_header.
94    It is supposed, that dev->hard_header is simplistic and does
95    not make callbacks to neighbour tables.
96
97    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
98    list of neighbour tables. This list is used only in process context,
99  */
100
101 static DEFINE_RWLOCK(neigh_tbl_lock);
102
103 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
104 {
105         kfree_skb(skb);
106         return -ENETDOWN;
107 }
108
109 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 {
111         if (neigh->parms->neigh_cleanup)
112                 neigh->parms->neigh_cleanup(neigh);
113
114         __neigh_notify(neigh, RTM_DELNEIGH, 0);
115         neigh_release(neigh);
116 }
117
118 /*
119  * It is random distribution in the interval (1/2)*base...(3/2)*base.
120  * It corresponds to default IPv6 settings and is not overridable,
121  * because it is really reasonable choice.
122  */
123
124 unsigned long neigh_rand_reach_time(unsigned long base)
125 {
126         return base ? (net_random() % base) + (base >> 1) : 0;
127 }
128 EXPORT_SYMBOL(neigh_rand_reach_time);
129
130
131 static int neigh_forced_gc(struct neigh_table *tbl)
132 {
133         int shrunk = 0;
134         int i;
135         struct neigh_hash_table *nht;
136
137         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
138
139         write_lock_bh(&tbl->lock);
140         nht = rcu_dereference_protected(tbl->nht,
141                                         lockdep_is_held(&tbl->lock));
142         for (i = 0; i < (1 << nht->hash_shift); i++) {
143                 struct neighbour *n;
144                 struct neighbour __rcu **np;
145
146                 np = &nht->hash_buckets[i];
147                 while ((n = rcu_dereference_protected(*np,
148                                         lockdep_is_held(&tbl->lock))) != NULL) {
149                         /* Neighbour record may be discarded if:
150                          * - nobody refers to it.
151                          * - it is not permanent
152                          */
153                         write_lock(&n->lock);
154                         if (atomic_read(&n->refcnt) == 1 &&
155                             !(n->nud_state & NUD_PERMANENT)) {
156                                 rcu_assign_pointer(*np,
157                                         rcu_dereference_protected(n->next,
158                                                   lockdep_is_held(&tbl->lock)));
159                                 n->dead = 1;
160                                 shrunk  = 1;
161                                 write_unlock(&n->lock);
162                                 neigh_cleanup_and_release(n);
163                                 continue;
164                         }
165                         write_unlock(&n->lock);
166                         np = &n->next;
167                 }
168         }
169
170         tbl->last_flush = jiffies;
171
172         write_unlock_bh(&tbl->lock);
173
174         return shrunk;
175 }
176
177 static void neigh_add_timer(struct neighbour *n, unsigned long when)
178 {
179         neigh_hold(n);
180         if (unlikely(mod_timer(&n->timer, when))) {
181                 printk("NEIGH: BUG, double timer add, state is %x\n",
182                        n->nud_state);
183                 dump_stack();
184         }
185 }
186
187 static int neigh_del_timer(struct neighbour *n)
188 {
189         if ((n->nud_state & NUD_IN_TIMER) &&
190             del_timer(&n->timer)) {
191                 neigh_release(n);
192                 return 1;
193         }
194         return 0;
195 }
196
197 static void pneigh_queue_purge(struct sk_buff_head *list)
198 {
199         struct sk_buff *skb;
200
201         while ((skb = skb_dequeue(list)) != NULL) {
202                 dev_put(skb->dev);
203                 kfree_skb(skb);
204         }
205 }
206
207 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
208 {
209         int i;
210         struct neigh_hash_table *nht;
211
212         nht = rcu_dereference_protected(tbl->nht,
213                                         lockdep_is_held(&tbl->lock));
214
215         for (i = 0; i < (1 << nht->hash_shift); i++) {
216                 struct neighbour *n;
217                 struct neighbour __rcu **np = &nht->hash_buckets[i];
218
219                 while ((n = rcu_dereference_protected(*np,
220                                         lockdep_is_held(&tbl->lock))) != NULL) {
221                         if (dev && n->dev != dev) {
222                                 np = &n->next;
223                                 continue;
224                         }
225                         rcu_assign_pointer(*np,
226                                    rcu_dereference_protected(n->next,
227                                                 lockdep_is_held(&tbl->lock)));
228                         write_lock(&n->lock);
229                         neigh_del_timer(n);
230                         n->dead = 1;
231
232                         if (atomic_read(&n->refcnt) != 1) {
233                                 /* The most unpleasant situation.
234                                    We must destroy neighbour entry,
235                                    but someone still uses it.
236
237                                    The destroy will be delayed until
238                                    the last user releases us, but
239                                    we must kill timers etc. and move
240                                    it to safe state.
241                                  */
242                                 skb_queue_purge(&n->arp_queue);
243                                 n->arp_queue_len_bytes = 0;
244                                 n->output = neigh_blackhole;
245                                 if (n->nud_state & NUD_VALID)
246                                         n->nud_state = NUD_NOARP;
247                                 else
248                                         n->nud_state = NUD_NONE;
249                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
250                         }
251                         write_unlock(&n->lock);
252                         neigh_cleanup_and_release(n);
253                 }
254         }
255 }
256
257 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         write_unlock_bh(&tbl->lock);
262 }
263 EXPORT_SYMBOL(neigh_changeaddr);
264
265 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
266 {
267         write_lock_bh(&tbl->lock);
268         neigh_flush_dev(tbl, dev);
269         pneigh_ifdown(tbl, dev);
270         write_unlock_bh(&tbl->lock);
271
272         del_timer_sync(&tbl->proxy_timer);
273         pneigh_queue_purge(&tbl->proxy_queue);
274         return 0;
275 }
276 EXPORT_SYMBOL(neigh_ifdown);
277
278 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
279 {
280         struct neighbour *n = NULL;
281         unsigned long now = jiffies;
282         int entries;
283
284         entries = atomic_inc_return(&tbl->entries) - 1;
285         if (entries >= tbl->gc_thresh3 ||
286             (entries >= tbl->gc_thresh2 &&
287              time_after(now, tbl->last_flush + 5 * HZ))) {
288                 if (!neigh_forced_gc(tbl) &&
289                     entries >= tbl->gc_thresh3)
290                         goto out_entries;
291         }
292
293         if (tbl->entry_size)
294                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295         else {
296                 int sz = sizeof(*n) + tbl->key_len;
297
298                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299                 sz += dev->neigh_priv_len;
300                 n = kzalloc(sz, GFP_ATOMIC);
301         }
302         if (!n)
303                 goto out_entries;
304
305         skb_queue_head_init(&n->arp_queue);
306         rwlock_init(&n->lock);
307         seqlock_init(&n->ha_lock);
308         n->updated        = n->used = now;
309         n->nud_state      = NUD_NONE;
310         n->output         = neigh_blackhole;
311         seqlock_init(&n->hh.hh_lock);
312         n->parms          = neigh_parms_clone(&tbl->parms);
313         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
314
315         NEIGH_CACHE_STAT_INC(tbl, allocs);
316         n->tbl            = tbl;
317         atomic_set(&n->refcnt, 1);
318         n->dead           = 1;
319 out:
320         return n;
321
322 out_entries:
323         atomic_dec(&tbl->entries);
324         goto out;
325 }
326
327 static void neigh_get_hash_rnd(u32 *x)
328 {
329         get_random_bytes(x, sizeof(*x));
330         *x |= 1;
331 }
332
333 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 {
335         size_t size = (1 << shift) * sizeof(struct neighbour *);
336         struct neigh_hash_table *ret;
337         struct neighbour __rcu **buckets;
338         int i;
339
340         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
341         if (!ret)
342                 return NULL;
343         if (size <= PAGE_SIZE)
344                 buckets = kzalloc(size, GFP_ATOMIC);
345         else
346                 buckets = (struct neighbour __rcu **)
347                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
348                                            get_order(size));
349         if (!buckets) {
350                 kfree(ret);
351                 return NULL;
352         }
353         ret->hash_buckets = buckets;
354         ret->hash_shift = shift;
355         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
356                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
357         return ret;
358 }
359
360 static void neigh_hash_free_rcu(struct rcu_head *head)
361 {
362         struct neigh_hash_table *nht = container_of(head,
363                                                     struct neigh_hash_table,
364                                                     rcu);
365         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
366         struct neighbour __rcu **buckets = nht->hash_buckets;
367
368         if (size <= PAGE_SIZE)
369                 kfree(buckets);
370         else
371                 free_pages((unsigned long)buckets, get_order(size));
372         kfree(nht);
373 }
374
375 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376                                                 unsigned long new_shift)
377 {
378         unsigned int i, hash;
379         struct neigh_hash_table *new_nht, *old_nht;
380
381         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
382
383         old_nht = rcu_dereference_protected(tbl->nht,
384                                             lockdep_is_held(&tbl->lock));
385         new_nht = neigh_hash_alloc(new_shift);
386         if (!new_nht)
387                 return old_nht;
388
389         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
390                 struct neighbour *n, *next;
391
392                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
393                                                    lockdep_is_held(&tbl->lock));
394                      n != NULL;
395                      n = next) {
396                         hash = tbl->hash(n->primary_key, n->dev,
397                                          new_nht->hash_rnd);
398
399                         hash >>= (32 - new_nht->hash_shift);
400                         next = rcu_dereference_protected(n->next,
401                                                 lockdep_is_held(&tbl->lock));
402
403                         rcu_assign_pointer(n->next,
404                                            rcu_dereference_protected(
405                                                 new_nht->hash_buckets[hash],
406                                                 lockdep_is_held(&tbl->lock)));
407                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
408                 }
409         }
410
411         rcu_assign_pointer(tbl->nht, new_nht);
412         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
413         return new_nht;
414 }
415
416 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
417                                struct net_device *dev)
418 {
419         struct neighbour *n;
420         int key_len = tbl->key_len;
421         u32 hash_val;
422         struct neigh_hash_table *nht;
423
424         NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426         rcu_read_lock_bh();
427         nht = rcu_dereference_bh(tbl->nht);
428         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431              n != NULL;
432              n = rcu_dereference_bh(n->next)) {
433                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434                         if (!atomic_inc_not_zero(&n->refcnt))
435                                 n = NULL;
436                         NEIGH_CACHE_STAT_INC(tbl, hits);
437                         break;
438                 }
439         }
440
441         rcu_read_unlock_bh();
442         return n;
443 }
444 EXPORT_SYMBOL(neigh_lookup);
445
446 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
447                                      const void *pkey)
448 {
449         struct neighbour *n;
450         int key_len = tbl->key_len;
451         u32 hash_val;
452         struct neigh_hash_table *nht;
453
454         NEIGH_CACHE_STAT_INC(tbl, lookups);
455
456         rcu_read_lock_bh();
457         nht = rcu_dereference_bh(tbl->nht);
458         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459
460         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
461              n != NULL;
462              n = rcu_dereference_bh(n->next)) {
463                 if (!memcmp(n->primary_key, pkey, key_len) &&
464                     net_eq(dev_net(n->dev), net)) {
465                         if (!atomic_inc_not_zero(&n->refcnt))
466                                 n = NULL;
467                         NEIGH_CACHE_STAT_INC(tbl, hits);
468                         break;
469                 }
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup_nodev);
476
477 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
478                                  struct net_device *dev, bool want_ref)
479 {
480         u32 hash_val;
481         int key_len = tbl->key_len;
482         int error;
483         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484         struct neigh_hash_table *nht;
485
486         if (!n) {
487                 rc = ERR_PTR(-ENOBUFS);
488                 goto out;
489         }
490
491         memcpy(n->primary_key, pkey, key_len);
492         n->dev = dev;
493         dev_hold(dev);
494
495         /* Protocol specific setup. */
496         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         if (dev->netdev_ops->ndo_neigh_construct) {
502                 error = dev->netdev_ops->ndo_neigh_construct(n);
503                 if (error < 0) {
504                         rc = ERR_PTR(error);
505                         goto out_neigh_release;
506                 }
507         }
508
509         /* Device specific setup. */
510         if (n->parms->neigh_setup &&
511             (error = n->parms->neigh_setup(n)) < 0) {
512                 rc = ERR_PTR(error);
513                 goto out_neigh_release;
514         }
515
516         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
517
518         write_lock_bh(&tbl->lock);
519         nht = rcu_dereference_protected(tbl->nht,
520                                         lockdep_is_held(&tbl->lock));
521
522         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524
525         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526
527         if (n->parms->dead) {
528                 rc = ERR_PTR(-EINVAL);
529                 goto out_tbl_unlock;
530         }
531
532         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533                                             lockdep_is_held(&tbl->lock));
534              n1 != NULL;
535              n1 = rcu_dereference_protected(n1->next,
536                         lockdep_is_held(&tbl->lock))) {
537                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538                         if (want_ref)
539                                 neigh_hold(n1);
540                         rc = n1;
541                         goto out_tbl_unlock;
542                 }
543         }
544
545         n->dead = 0;
546         if (want_ref)
547                 neigh_hold(n);
548         rcu_assign_pointer(n->next,
549                            rcu_dereference_protected(nht->hash_buckets[hash_val],
550                                                      lockdep_is_held(&tbl->lock)));
551         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
552         write_unlock_bh(&tbl->lock);
553         NEIGH_PRINTK2("neigh %p is created.\n", n);
554         rc = n;
555 out:
556         return rc;
557 out_tbl_unlock:
558         write_unlock_bh(&tbl->lock);
559 out_neigh_release:
560         neigh_release(n);
561         goto out;
562 }
563 EXPORT_SYMBOL(__neigh_create);
564
565 static u32 pneigh_hash(const void *pkey, int key_len)
566 {
567         u32 hash_val = *(u32 *)(pkey + key_len - 4);
568         hash_val ^= (hash_val >> 16);
569         hash_val ^= hash_val >> 8;
570         hash_val ^= hash_val >> 4;
571         hash_val &= PNEIGH_HASHMASK;
572         return hash_val;
573 }
574
575 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
576                                               struct net *net,
577                                               const void *pkey,
578                                               int key_len,
579                                               struct net_device *dev)
580 {
581         while (n) {
582                 if (!memcmp(n->key, pkey, key_len) &&
583                     net_eq(pneigh_net(n), net) &&
584                     (n->dev == dev || !n->dev))
585                         return n;
586                 n = n->next;
587         }
588         return NULL;
589 }
590
591 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
592                 struct net *net, const void *pkey, struct net_device *dev)
593 {
594         int key_len = tbl->key_len;
595         u32 hash_val = pneigh_hash(pkey, key_len);
596
597         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
598                                  net, pkey, key_len, dev);
599 }
600 EXPORT_SYMBOL_GPL(__pneigh_lookup);
601
602 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
603                                     struct net *net, const void *pkey,
604                                     struct net_device *dev, int creat)
605 {
606         struct pneigh_entry *n;
607         int key_len = tbl->key_len;
608         u32 hash_val = pneigh_hash(pkey, key_len);
609
610         read_lock_bh(&tbl->lock);
611         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
612                               net, pkey, key_len, dev);
613         read_unlock_bh(&tbl->lock);
614
615         if (n || !creat)
616                 goto out;
617
618         ASSERT_RTNL();
619
620         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
621         if (!n)
622                 goto out;
623
624         write_pnet(&n->net, hold_net(net));
625         memcpy(n->key, pkey, key_len);
626         n->dev = dev;
627         if (dev)
628                 dev_hold(dev);
629
630         if (tbl->pconstructor && tbl->pconstructor(n)) {
631                 if (dev)
632                         dev_put(dev);
633                 release_net(net);
634                 kfree(n);
635                 n = NULL;
636                 goto out;
637         }
638
639         write_lock_bh(&tbl->lock);
640         n->next = tbl->phash_buckets[hash_val];
641         tbl->phash_buckets[hash_val] = n;
642         write_unlock_bh(&tbl->lock);
643 out:
644         return n;
645 }
646 EXPORT_SYMBOL(pneigh_lookup);
647
648
649 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
650                   struct net_device *dev)
651 {
652         struct pneigh_entry *n, **np;
653         int key_len = tbl->key_len;
654         u32 hash_val = pneigh_hash(pkey, key_len);
655
656         write_lock_bh(&tbl->lock);
657         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
658              np = &n->next) {
659                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
660                     net_eq(pneigh_net(n), net)) {
661                         *np = n->next;
662                         write_unlock_bh(&tbl->lock);
663                         if (tbl->pdestructor)
664                                 tbl->pdestructor(n);
665                         if (n->dev)
666                                 dev_put(n->dev);
667                         release_net(pneigh_net(n));
668                         kfree(n);
669                         return 0;
670                 }
671         }
672         write_unlock_bh(&tbl->lock);
673         return -ENOENT;
674 }
675
676 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
677 {
678         struct pneigh_entry *n, **np;
679         u32 h;
680
681         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
682                 np = &tbl->phash_buckets[h];
683                 while ((n = *np) != NULL) {
684                         if (!dev || n->dev == dev) {
685                                 *np = n->next;
686                                 if (tbl->pdestructor)
687                                         tbl->pdestructor(n);
688                                 if (n->dev)
689                                         dev_put(n->dev);
690                                 release_net(pneigh_net(n));
691                                 kfree(n);
692                                 continue;
693                         }
694                         np = &n->next;
695                 }
696         }
697         return -ENOENT;
698 }
699
700 static void neigh_parms_destroy(struct neigh_parms *parms);
701
702 static inline void neigh_parms_put(struct neigh_parms *parms)
703 {
704         if (atomic_dec_and_test(&parms->refcnt))
705                 neigh_parms_destroy(parms);
706 }
707
708 /*
709  *      neighbour must already be out of the table;
710  *
711  */
712 void neigh_destroy(struct neighbour *neigh)
713 {
714         struct net_device *dev = neigh->dev;
715
716         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
717
718         if (!neigh->dead) {
719                 pr_warn("Destroying alive neighbour %p\n", neigh);
720                 dump_stack();
721                 return;
722         }
723
724         if (neigh_del_timer(neigh))
725                 pr_warn("Impossible event\n");
726
727         skb_queue_purge(&neigh->arp_queue);
728         neigh->arp_queue_len_bytes = 0;
729
730         if (dev->netdev_ops->ndo_neigh_destroy)
731                 dev->netdev_ops->ndo_neigh_destroy(neigh);
732
733         dev_put(dev);
734         neigh_parms_put(neigh->parms);
735
736         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
737
738         atomic_dec(&neigh->tbl->entries);
739         kfree_rcu(neigh, rcu);
740 }
741 EXPORT_SYMBOL(neigh_destroy);
742
743 /* Neighbour state is suspicious;
744    disable fast path.
745
746    Called with write_locked neigh.
747  */
748 static void neigh_suspect(struct neighbour *neigh)
749 {
750         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
751
752         neigh->output = neigh->ops->output;
753 }
754
755 /* Neighbour state is OK;
756    enable fast path.
757
758    Called with write_locked neigh.
759  */
760 static void neigh_connect(struct neighbour *neigh)
761 {
762         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
763
764         neigh->output = neigh->ops->connected_output;
765 }
766
767 static void neigh_periodic_work(struct work_struct *work)
768 {
769         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
770         struct neighbour *n;
771         struct neighbour __rcu **np;
772         unsigned int i;
773         struct neigh_hash_table *nht;
774
775         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
776
777         write_lock_bh(&tbl->lock);
778         nht = rcu_dereference_protected(tbl->nht,
779                                         lockdep_is_held(&tbl->lock));
780
781         /*
782          *      periodically recompute ReachableTime from random function
783          */
784
785         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
786                 struct neigh_parms *p;
787                 tbl->last_rand = jiffies;
788                 for (p = &tbl->parms; p; p = p->next)
789                         p->reachable_time =
790                                 neigh_rand_reach_time(p->base_reachable_time);
791         }
792
793         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
794                 np = &nht->hash_buckets[i];
795
796                 while ((n = rcu_dereference_protected(*np,
797                                 lockdep_is_held(&tbl->lock))) != NULL) {
798                         unsigned int state;
799
800                         write_lock(&n->lock);
801
802                         state = n->nud_state;
803                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
804                                 write_unlock(&n->lock);
805                                 goto next_elt;
806                         }
807
808                         if (time_before(n->used, n->confirmed))
809                                 n->used = n->confirmed;
810
811                         if (atomic_read(&n->refcnt) == 1 &&
812                             (state == NUD_FAILED ||
813                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
814                                 *np = n->next;
815                                 n->dead = 1;
816                                 write_unlock(&n->lock);
817                                 neigh_cleanup_and_release(n);
818                                 continue;
819                         }
820                         write_unlock(&n->lock);
821
822 next_elt:
823                         np = &n->next;
824                 }
825                 /*
826                  * It's fine to release lock here, even if hash table
827                  * grows while we are preempted.
828                  */
829                 write_unlock_bh(&tbl->lock);
830                 cond_resched();
831                 write_lock_bh(&tbl->lock);
832                 nht = rcu_dereference_protected(tbl->nht,
833                                                 lockdep_is_held(&tbl->lock));
834         }
835         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
836          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
837          * base_reachable_time.
838          */
839         schedule_delayed_work(&tbl->gc_work,
840                               tbl->parms.base_reachable_time >> 1);
841         write_unlock_bh(&tbl->lock);
842 }
843
844 static __inline__ int neigh_max_probes(struct neighbour *n)
845 {
846         struct neigh_parms *p = n->parms;
847         return (n->nud_state & NUD_PROBE) ?
848                 p->ucast_probes :
849                 p->ucast_probes + p->app_probes + p->mcast_probes;
850 }
851
852 static void neigh_invalidate(struct neighbour *neigh)
853         __releases(neigh->lock)
854         __acquires(neigh->lock)
855 {
856         struct sk_buff *skb;
857
858         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
859         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
860         neigh->updated = jiffies;
861
862         /* It is very thin place. report_unreachable is very complicated
863            routine. Particularly, it can hit the same neighbour entry!
864
865            So that, we try to be accurate and avoid dead loop. --ANK
866          */
867         while (neigh->nud_state == NUD_FAILED &&
868                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
869                 write_unlock(&neigh->lock);
870                 neigh->ops->error_report(neigh, skb);
871                 write_lock(&neigh->lock);
872         }
873         skb_queue_purge(&neigh->arp_queue);
874         neigh->arp_queue_len_bytes = 0;
875 }
876
877 static void neigh_probe(struct neighbour *neigh)
878         __releases(neigh->lock)
879 {
880         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
881         /* keep skb alive even if arp_queue overflows */
882         if (skb)
883                 skb = skb_copy(skb, GFP_ATOMIC);
884         write_unlock(&neigh->lock);
885         neigh->ops->solicit(neigh, skb);
886         atomic_inc(&neigh->probes);
887         kfree_skb(skb);
888 }
889
890 /* Called when a timer expires for a neighbour entry. */
891
892 static void neigh_timer_handler(unsigned long arg)
893 {
894         unsigned long now, next;
895         struct neighbour *neigh = (struct neighbour *)arg;
896         unsigned int state;
897         int notify = 0;
898
899         write_lock(&neigh->lock);
900
901         state = neigh->nud_state;
902         now = jiffies;
903         next = now + HZ;
904
905         if (!(state & NUD_IN_TIMER))
906                 goto out;
907
908         if (state & NUD_REACHABLE) {
909                 if (time_before_eq(now,
910                                    neigh->confirmed + neigh->parms->reachable_time)) {
911                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
912                         next = neigh->confirmed + neigh->parms->reachable_time;
913                 } else if (time_before_eq(now,
914                                           neigh->used + neigh->parms->delay_probe_time)) {
915                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
916                         neigh->nud_state = NUD_DELAY;
917                         neigh->updated = jiffies;
918                         neigh_suspect(neigh);
919                         next = now + neigh->parms->delay_probe_time;
920                 } else {
921                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
922                         neigh->nud_state = NUD_STALE;
923                         neigh->updated = jiffies;
924                         neigh_suspect(neigh);
925                         notify = 1;
926                 }
927         } else if (state & NUD_DELAY) {
928                 if (time_before_eq(now,
929                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
930                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
931                         neigh->nud_state = NUD_REACHABLE;
932                         neigh->updated = jiffies;
933                         neigh_connect(neigh);
934                         notify = 1;
935                         next = neigh->confirmed + neigh->parms->reachable_time;
936                 } else {
937                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
938                         neigh->nud_state = NUD_PROBE;
939                         neigh->updated = jiffies;
940                         atomic_set(&neigh->probes, 0);
941                         next = now + neigh->parms->retrans_time;
942                 }
943         } else {
944                 /* NUD_PROBE|NUD_INCOMPLETE */
945                 next = now + neigh->parms->retrans_time;
946         }
947
948         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
949             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
950                 neigh->nud_state = NUD_FAILED;
951                 notify = 1;
952                 neigh_invalidate(neigh);
953         }
954
955         if (neigh->nud_state & NUD_IN_TIMER) {
956                 if (time_before(next, jiffies + HZ/2))
957                         next = jiffies + HZ/2;
958                 if (!mod_timer(&neigh->timer, next))
959                         neigh_hold(neigh);
960         }
961         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
962                 neigh_probe(neigh);
963         } else {
964 out:
965                 write_unlock(&neigh->lock);
966         }
967
968         if (notify)
969                 neigh_update_notify(neigh);
970
971         neigh_release(neigh);
972 }
973
974 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
975 {
976         int rc;
977         bool immediate_probe = false;
978
979         write_lock_bh(&neigh->lock);
980
981         rc = 0;
982         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
983                 goto out_unlock_bh;
984
985         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
986                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
987                         unsigned long next, now = jiffies;
988
989                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
990                         neigh->nud_state     = NUD_INCOMPLETE;
991                         neigh->updated = now;
992                         next = now + max(neigh->parms->retrans_time, HZ/2);
993                         neigh_add_timer(neigh, next);
994                         immediate_probe = true;
995                 } else {
996                         neigh->nud_state = NUD_FAILED;
997                         neigh->updated = jiffies;
998                         write_unlock_bh(&neigh->lock);
999
1000                         kfree_skb(skb);
1001                         return 1;
1002                 }
1003         } else if (neigh->nud_state & NUD_STALE) {
1004                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1005                 neigh->nud_state = NUD_DELAY;
1006                 neigh->updated = jiffies;
1007                 neigh_add_timer(neigh,
1008                                 jiffies + neigh->parms->delay_probe_time);
1009         }
1010
1011         if (neigh->nud_state == NUD_INCOMPLETE) {
1012                 if (skb) {
1013                         while (neigh->arp_queue_len_bytes + skb->truesize >
1014                                neigh->parms->queue_len_bytes) {
1015                                 struct sk_buff *buff;
1016
1017                                 buff = __skb_dequeue(&neigh->arp_queue);
1018                                 if (!buff)
1019                                         break;
1020                                 neigh->arp_queue_len_bytes -= buff->truesize;
1021                                 kfree_skb(buff);
1022                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1023                         }
1024                         skb_dst_force(skb);
1025                         __skb_queue_tail(&neigh->arp_queue, skb);
1026                         neigh->arp_queue_len_bytes += skb->truesize;
1027                 }
1028                 rc = 1;
1029         }
1030 out_unlock_bh:
1031         if (immediate_probe)
1032                 neigh_probe(neigh);
1033         else
1034                 write_unlock(&neigh->lock);
1035         local_bh_enable();
1036         return rc;
1037 }
1038 EXPORT_SYMBOL(__neigh_event_send);
1039
1040 static void neigh_update_hhs(struct neighbour *neigh)
1041 {
1042         struct hh_cache *hh;
1043         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1044                 = NULL;
1045
1046         if (neigh->dev->header_ops)
1047                 update = neigh->dev->header_ops->cache_update;
1048
1049         if (update) {
1050                 hh = &neigh->hh;
1051                 if (hh->hh_len) {
1052                         write_seqlock_bh(&hh->hh_lock);
1053                         update(hh, neigh->dev, neigh->ha);
1054                         write_sequnlock_bh(&hh->hh_lock);
1055                 }
1056         }
1057 }
1058
1059
1060
1061 /* Generic update routine.
1062    -- lladdr is new lladdr or NULL, if it is not supplied.
1063    -- new    is new state.
1064    -- flags
1065         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1066                                 if it is different.
1067         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1068                                 lladdr instead of overriding it
1069                                 if it is different.
1070                                 It also allows to retain current state
1071                                 if lladdr is unchanged.
1072         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1073
1074         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1075                                 NTF_ROUTER flag.
1076         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1077                                 a router.
1078
1079    Caller MUST hold reference count on the entry.
1080  */
1081
1082 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1083                  u32 flags)
1084 {
1085         u8 old;
1086         int err;
1087         int notify = 0;
1088         struct net_device *dev;
1089         int update_isrouter = 0;
1090
1091         write_lock_bh(&neigh->lock);
1092
1093         dev    = neigh->dev;
1094         old    = neigh->nud_state;
1095         err    = -EPERM;
1096
1097         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1098             (old & (NUD_NOARP | NUD_PERMANENT)))
1099                 goto out;
1100
1101         if (!(new & NUD_VALID)) {
1102                 neigh_del_timer(neigh);
1103                 if (old & NUD_CONNECTED)
1104                         neigh_suspect(neigh);
1105                 neigh->nud_state = new;
1106                 err = 0;
1107                 notify = old & NUD_VALID;
1108                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1109                     (new & NUD_FAILED)) {
1110                         neigh_invalidate(neigh);
1111                         notify = 1;
1112                 }
1113                 goto out;
1114         }
1115
1116         /* Compare new lladdr with cached one */
1117         if (!dev->addr_len) {
1118                 /* First case: device needs no address. */
1119                 lladdr = neigh->ha;
1120         } else if (lladdr) {
1121                 /* The second case: if something is already cached
1122                    and a new address is proposed:
1123                    - compare new & old
1124                    - if they are different, check override flag
1125                  */
1126                 if ((old & NUD_VALID) &&
1127                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1128                         lladdr = neigh->ha;
1129         } else {
1130                 /* No address is supplied; if we know something,
1131                    use it, otherwise discard the request.
1132                  */
1133                 err = -EINVAL;
1134                 if (!(old & NUD_VALID))
1135                         goto out;
1136                 lladdr = neigh->ha;
1137         }
1138
1139         if (new & NUD_CONNECTED)
1140                 neigh->confirmed = jiffies;
1141         neigh->updated = jiffies;
1142
1143         /* If entry was valid and address is not changed,
1144            do not change entry state, if new one is STALE.
1145          */
1146         err = 0;
1147         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1148         if (old & NUD_VALID) {
1149                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1150                         update_isrouter = 0;
1151                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1152                             (old & NUD_CONNECTED)) {
1153                                 lladdr = neigh->ha;
1154                                 new = NUD_STALE;
1155                         } else
1156                                 goto out;
1157                 } else {
1158                         if (lladdr == neigh->ha && new == NUD_STALE &&
1159                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1160                              (old & NUD_CONNECTED))
1161                             )
1162                                 new = old;
1163                 }
1164         }
1165
1166         if (new != old) {
1167                 neigh_del_timer(neigh);
1168                 if (new & NUD_IN_TIMER)
1169                         neigh_add_timer(neigh, (jiffies +
1170                                                 ((new & NUD_REACHABLE) ?
1171                                                  neigh->parms->reachable_time :
1172                                                  0)));
1173                 neigh->nud_state = new;
1174         }
1175
1176         if (lladdr != neigh->ha) {
1177                 write_seqlock(&neigh->ha_lock);
1178                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1179                 write_sequnlock(&neigh->ha_lock);
1180                 neigh_update_hhs(neigh);
1181                 if (!(new & NUD_CONNECTED))
1182                         neigh->confirmed = jiffies -
1183                                       (neigh->parms->base_reachable_time << 1);
1184                 notify = 1;
1185         }
1186         if (new == old)
1187                 goto out;
1188         if (new & NUD_CONNECTED)
1189                 neigh_connect(neigh);
1190         else
1191                 neigh_suspect(neigh);
1192         if (!(old & NUD_VALID)) {
1193                 struct sk_buff *skb;
1194
1195                 /* Again: avoid dead loop if something went wrong */
1196
1197                 while (neigh->nud_state & NUD_VALID &&
1198                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1199                         struct dst_entry *dst = skb_dst(skb);
1200                         struct neighbour *n2, *n1 = neigh;
1201                         write_unlock_bh(&neigh->lock);
1202
1203                         rcu_read_lock();
1204
1205                         /* Why not just use 'neigh' as-is?  The problem is that
1206                          * things such as shaper, eql, and sch_teql can end up
1207                          * using alternative, different, neigh objects to output
1208                          * the packet in the output path.  So what we need to do
1209                          * here is re-lookup the top-level neigh in the path so
1210                          * we can reinject the packet there.
1211                          */
1212                         n2 = NULL;
1213                         if (dst) {
1214                                 n2 = dst_neigh_lookup_skb(dst, skb);
1215                                 if (n2)
1216                                         n1 = n2;
1217                         }
1218                         n1->output(n1, skb);
1219                         if (n2)
1220                                 neigh_release(n2);
1221                         rcu_read_unlock();
1222
1223                         write_lock_bh(&neigh->lock);
1224                 }
1225                 skb_queue_purge(&neigh->arp_queue);
1226                 neigh->arp_queue_len_bytes = 0;
1227         }
1228 out:
1229         if (update_isrouter) {
1230                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1231                         (neigh->flags | NTF_ROUTER) :
1232                         (neigh->flags & ~NTF_ROUTER);
1233         }
1234         write_unlock_bh(&neigh->lock);
1235
1236         if (notify)
1237                 neigh_update_notify(neigh);
1238
1239         return err;
1240 }
1241 EXPORT_SYMBOL(neigh_update);
1242
1243 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1244                                  u8 *lladdr, void *saddr,
1245                                  struct net_device *dev)
1246 {
1247         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1248                                                  lladdr || !dev->addr_len);
1249         if (neigh)
1250                 neigh_update(neigh, lladdr, NUD_STALE,
1251                              NEIGH_UPDATE_F_OVERRIDE);
1252         return neigh;
1253 }
1254 EXPORT_SYMBOL(neigh_event_ns);
1255
1256 /* called with read_lock_bh(&n->lock); */
1257 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1258 {
1259         struct net_device *dev = dst->dev;
1260         __be16 prot = dst->ops->protocol;
1261         struct hh_cache *hh = &n->hh;
1262
1263         write_lock_bh(&n->lock);
1264
1265         /* Only one thread can come in here and initialize the
1266          * hh_cache entry.
1267          */
1268         if (!hh->hh_len)
1269                 dev->header_ops->cache(n, hh, prot);
1270
1271         write_unlock_bh(&n->lock);
1272 }
1273
1274 /* This function can be used in contexts, where only old dev_queue_xmit
1275  * worked, f.e. if you want to override normal output path (eql, shaper),
1276  * but resolution is not made yet.
1277  */
1278
1279 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1280 {
1281         struct net_device *dev = skb->dev;
1282
1283         __skb_pull(skb, skb_network_offset(skb));
1284
1285         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1286                             skb->len) < 0 &&
1287             dev->header_ops->rebuild(skb))
1288                 return 0;
1289
1290         return dev_queue_xmit(skb);
1291 }
1292 EXPORT_SYMBOL(neigh_compat_output);
1293
1294 /* Slow and careful. */
1295
1296 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1297 {
1298         struct dst_entry *dst = skb_dst(skb);
1299         int rc = 0;
1300
1301         if (!dst)
1302                 goto discard;
1303
1304         if (!neigh_event_send(neigh, skb)) {
1305                 int err;
1306                 struct net_device *dev = neigh->dev;
1307                 unsigned int seq;
1308
1309                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1310                         neigh_hh_init(neigh, dst);
1311
1312                 do {
1313                         __skb_pull(skb, skb_network_offset(skb));
1314                         seq = read_seqbegin(&neigh->ha_lock);
1315                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1316                                               neigh->ha, NULL, skb->len);
1317                 } while (read_seqretry(&neigh->ha_lock, seq));
1318
1319                 if (err >= 0)
1320                         rc = dev_queue_xmit(skb);
1321                 else
1322                         goto out_kfree_skb;
1323         }
1324 out:
1325         return rc;
1326 discard:
1327         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1328                       dst, neigh);
1329 out_kfree_skb:
1330         rc = -EINVAL;
1331         kfree_skb(skb);
1332         goto out;
1333 }
1334 EXPORT_SYMBOL(neigh_resolve_output);
1335
1336 /* As fast as possible without hh cache */
1337
1338 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1339 {
1340         struct net_device *dev = neigh->dev;
1341         unsigned int seq;
1342         int err;
1343
1344         do {
1345                 __skb_pull(skb, skb_network_offset(skb));
1346                 seq = read_seqbegin(&neigh->ha_lock);
1347                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1348                                       neigh->ha, NULL, skb->len);
1349         } while (read_seqretry(&neigh->ha_lock, seq));
1350
1351         if (err >= 0)
1352                 err = dev_queue_xmit(skb);
1353         else {
1354                 err = -EINVAL;
1355                 kfree_skb(skb);
1356         }
1357         return err;
1358 }
1359 EXPORT_SYMBOL(neigh_connected_output);
1360
1361 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1362 {
1363         return dev_queue_xmit(skb);
1364 }
1365 EXPORT_SYMBOL(neigh_direct_output);
1366
1367 static void neigh_proxy_process(unsigned long arg)
1368 {
1369         struct neigh_table *tbl = (struct neigh_table *)arg;
1370         long sched_next = 0;
1371         unsigned long now = jiffies;
1372         struct sk_buff *skb, *n;
1373
1374         spin_lock(&tbl->proxy_queue.lock);
1375
1376         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1377                 long tdif = NEIGH_CB(skb)->sched_next - now;
1378
1379                 if (tdif <= 0) {
1380                         struct net_device *dev = skb->dev;
1381
1382                         __skb_unlink(skb, &tbl->proxy_queue);
1383                         if (tbl->proxy_redo && netif_running(dev)) {
1384                                 rcu_read_lock();
1385                                 tbl->proxy_redo(skb);
1386                                 rcu_read_unlock();
1387                         } else {
1388                                 kfree_skb(skb);
1389                         }
1390
1391                         dev_put(dev);
1392                 } else if (!sched_next || tdif < sched_next)
1393                         sched_next = tdif;
1394         }
1395         del_timer(&tbl->proxy_timer);
1396         if (sched_next)
1397                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1398         spin_unlock(&tbl->proxy_queue.lock);
1399 }
1400
1401 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1402                     struct sk_buff *skb)
1403 {
1404         unsigned long now = jiffies;
1405         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1406
1407         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1408                 kfree_skb(skb);
1409                 return;
1410         }
1411
1412         NEIGH_CB(skb)->sched_next = sched_next;
1413         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1414
1415         spin_lock(&tbl->proxy_queue.lock);
1416         if (del_timer(&tbl->proxy_timer)) {
1417                 if (time_before(tbl->proxy_timer.expires, sched_next))
1418                         sched_next = tbl->proxy_timer.expires;
1419         }
1420         skb_dst_drop(skb);
1421         dev_hold(skb->dev);
1422         __skb_queue_tail(&tbl->proxy_queue, skb);
1423         mod_timer(&tbl->proxy_timer, sched_next);
1424         spin_unlock(&tbl->proxy_queue.lock);
1425 }
1426 EXPORT_SYMBOL(pneigh_enqueue);
1427
1428 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1429                                                       struct net *net, int ifindex)
1430 {
1431         struct neigh_parms *p;
1432
1433         for (p = &tbl->parms; p; p = p->next) {
1434                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1435                     (!p->dev && !ifindex))
1436                         return p;
1437         }
1438
1439         return NULL;
1440 }
1441
1442 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1443                                       struct neigh_table *tbl)
1444 {
1445         struct neigh_parms *p, *ref;
1446         struct net *net = dev_net(dev);
1447         const struct net_device_ops *ops = dev->netdev_ops;
1448
1449         ref = lookup_neigh_parms(tbl, net, 0);
1450         if (!ref)
1451                 return NULL;
1452
1453         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1454         if (p) {
1455                 p->tbl            = tbl;
1456                 atomic_set(&p->refcnt, 1);
1457                 p->reachable_time =
1458                                 neigh_rand_reach_time(p->base_reachable_time);
1459
1460                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1461                         kfree(p);
1462                         return NULL;
1463                 }
1464
1465                 dev_hold(dev);
1466                 p->dev = dev;
1467                 write_pnet(&p->net, hold_net(net));
1468                 p->sysctl_table = NULL;
1469                 write_lock_bh(&tbl->lock);
1470                 p->next         = tbl->parms.next;
1471                 tbl->parms.next = p;
1472                 write_unlock_bh(&tbl->lock);
1473         }
1474         return p;
1475 }
1476 EXPORT_SYMBOL(neigh_parms_alloc);
1477
1478 static void neigh_rcu_free_parms(struct rcu_head *head)
1479 {
1480         struct neigh_parms *parms =
1481                 container_of(head, struct neigh_parms, rcu_head);
1482
1483         neigh_parms_put(parms);
1484 }
1485
1486 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1487 {
1488         struct neigh_parms **p;
1489
1490         if (!parms || parms == &tbl->parms)
1491                 return;
1492         write_lock_bh(&tbl->lock);
1493         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1494                 if (*p == parms) {
1495                         *p = parms->next;
1496                         parms->dead = 1;
1497                         write_unlock_bh(&tbl->lock);
1498                         if (parms->dev)
1499                                 dev_put(parms->dev);
1500                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1501                         return;
1502                 }
1503         }
1504         write_unlock_bh(&tbl->lock);
1505         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1506 }
1507 EXPORT_SYMBOL(neigh_parms_release);
1508
1509 static void neigh_parms_destroy(struct neigh_parms *parms)
1510 {
1511         release_net(neigh_parms_net(parms));
1512         kfree(parms);
1513 }
1514
1515 static struct lock_class_key neigh_table_proxy_queue_class;
1516
1517 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1518 {
1519         unsigned long now = jiffies;
1520         unsigned long phsize;
1521
1522         write_pnet(&tbl->parms.net, &init_net);
1523         atomic_set(&tbl->parms.refcnt, 1);
1524         tbl->parms.reachable_time =
1525                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1526
1527         tbl->stats = alloc_percpu(struct neigh_statistics);
1528         if (!tbl->stats)
1529                 panic("cannot create neighbour cache statistics");
1530
1531 #ifdef CONFIG_PROC_FS
1532         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1533                               &neigh_stat_seq_fops, tbl))
1534                 panic("cannot create neighbour proc dir entry");
1535 #endif
1536
1537         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1538
1539         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1540         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1541
1542         if (!tbl->nht || !tbl->phash_buckets)
1543                 panic("cannot allocate neighbour cache hashes");
1544
1545         rwlock_init(&tbl->lock);
1546         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1547         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1548         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1549         skb_queue_head_init_class(&tbl->proxy_queue,
1550                         &neigh_table_proxy_queue_class);
1551
1552         tbl->last_flush = now;
1553         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1554 }
1555
1556 void neigh_table_init(struct neigh_table *tbl)
1557 {
1558         struct neigh_table *tmp;
1559
1560         neigh_table_init_no_netlink(tbl);
1561         write_lock(&neigh_tbl_lock);
1562         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1563                 if (tmp->family == tbl->family)
1564                         break;
1565         }
1566         tbl->next       = neigh_tables;
1567         neigh_tables    = tbl;
1568         write_unlock(&neigh_tbl_lock);
1569
1570         if (unlikely(tmp)) {
1571                 pr_err("Registering multiple tables for family %d\n",
1572                        tbl->family);
1573                 dump_stack();
1574         }
1575 }
1576 EXPORT_SYMBOL(neigh_table_init);
1577
1578 int neigh_table_clear(struct neigh_table *tbl)
1579 {
1580         struct neigh_table **tp;
1581
1582         /* It is not clean... Fix it to unload IPv6 module safely */
1583         cancel_delayed_work_sync(&tbl->gc_work);
1584         del_timer_sync(&tbl->proxy_timer);
1585         pneigh_queue_purge(&tbl->proxy_queue);
1586         neigh_ifdown(tbl, NULL);
1587         if (atomic_read(&tbl->entries))
1588                 pr_crit("neighbour leakage\n");
1589         write_lock(&neigh_tbl_lock);
1590         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1591                 if (*tp == tbl) {
1592                         *tp = tbl->next;
1593                         break;
1594                 }
1595         }
1596         write_unlock(&neigh_tbl_lock);
1597
1598         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1599                  neigh_hash_free_rcu);
1600         tbl->nht = NULL;
1601
1602         kfree(tbl->phash_buckets);
1603         tbl->phash_buckets = NULL;
1604
1605         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1606
1607         free_percpu(tbl->stats);
1608         tbl->stats = NULL;
1609
1610         return 0;
1611 }
1612 EXPORT_SYMBOL(neigh_table_clear);
1613
1614 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1615 {
1616         struct net *net = sock_net(skb->sk);
1617         struct ndmsg *ndm;
1618         struct nlattr *dst_attr;
1619         struct neigh_table *tbl;
1620         struct net_device *dev = NULL;
1621         int err = -EINVAL;
1622
1623         ASSERT_RTNL();
1624         if (nlmsg_len(nlh) < sizeof(*ndm))
1625                 goto out;
1626
1627         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1628         if (dst_attr == NULL)
1629                 goto out;
1630
1631         ndm = nlmsg_data(nlh);
1632         if (ndm->ndm_ifindex) {
1633                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1634                 if (dev == NULL) {
1635                         err = -ENODEV;
1636                         goto out;
1637                 }
1638         }
1639
1640         read_lock(&neigh_tbl_lock);
1641         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1642                 struct neighbour *neigh;
1643
1644                 if (tbl->family != ndm->ndm_family)
1645                         continue;
1646                 read_unlock(&neigh_tbl_lock);
1647
1648                 if (nla_len(dst_attr) < tbl->key_len)
1649                         goto out;
1650
1651                 if (ndm->ndm_flags & NTF_PROXY) {
1652                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1653                         goto out;
1654                 }
1655
1656                 if (dev == NULL)
1657                         goto out;
1658
1659                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1660                 if (neigh == NULL) {
1661                         err = -ENOENT;
1662                         goto out;
1663                 }
1664
1665                 err = neigh_update(neigh, NULL, NUD_FAILED,
1666                                    NEIGH_UPDATE_F_OVERRIDE |
1667                                    NEIGH_UPDATE_F_ADMIN);
1668                 neigh_release(neigh);
1669                 goto out;
1670         }
1671         read_unlock(&neigh_tbl_lock);
1672         err = -EAFNOSUPPORT;
1673
1674 out:
1675         return err;
1676 }
1677
1678 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1679 {
1680         struct net *net = sock_net(skb->sk);
1681         struct ndmsg *ndm;
1682         struct nlattr *tb[NDA_MAX+1];
1683         struct neigh_table *tbl;
1684         struct net_device *dev = NULL;
1685         int err;
1686
1687         ASSERT_RTNL();
1688         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1689         if (err < 0)
1690                 goto out;
1691
1692         err = -EINVAL;
1693         if (tb[NDA_DST] == NULL)
1694                 goto out;
1695
1696         ndm = nlmsg_data(nlh);
1697         if (ndm->ndm_ifindex) {
1698                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1699                 if (dev == NULL) {
1700                         err = -ENODEV;
1701                         goto out;
1702                 }
1703
1704                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1705                         goto out;
1706         }
1707
1708         read_lock(&neigh_tbl_lock);
1709         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1710                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1711                 struct neighbour *neigh;
1712                 void *dst, *lladdr;
1713
1714                 if (tbl->family != ndm->ndm_family)
1715                         continue;
1716                 read_unlock(&neigh_tbl_lock);
1717
1718                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1719                         goto out;
1720                 dst = nla_data(tb[NDA_DST]);
1721                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1722
1723                 if (ndm->ndm_flags & NTF_PROXY) {
1724                         struct pneigh_entry *pn;
1725
1726                         err = -ENOBUFS;
1727                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1728                         if (pn) {
1729                                 pn->flags = ndm->ndm_flags;
1730                                 err = 0;
1731                         }
1732                         goto out;
1733                 }
1734
1735                 if (dev == NULL)
1736                         goto out;
1737
1738                 neigh = neigh_lookup(tbl, dst, dev);
1739                 if (neigh == NULL) {
1740                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1741                                 err = -ENOENT;
1742                                 goto out;
1743                         }
1744
1745                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1746                         if (IS_ERR(neigh)) {
1747                                 err = PTR_ERR(neigh);
1748                                 goto out;
1749                         }
1750                 } else {
1751                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1752                                 err = -EEXIST;
1753                                 neigh_release(neigh);
1754                                 goto out;
1755                         }
1756
1757                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1758                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1759                 }
1760
1761                 if (ndm->ndm_flags & NTF_USE) {
1762                         neigh_event_send(neigh, NULL);
1763                         err = 0;
1764                 } else
1765                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1766                 neigh_release(neigh);
1767                 goto out;
1768         }
1769
1770         read_unlock(&neigh_tbl_lock);
1771         err = -EAFNOSUPPORT;
1772 out:
1773         return err;
1774 }
1775
1776 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1777 {
1778         struct nlattr *nest;
1779
1780         nest = nla_nest_start(skb, NDTA_PARMS);
1781         if (nest == NULL)
1782                 return -ENOBUFS;
1783
1784         if ((parms->dev &&
1785              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1786             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1787             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1788             /* approximative value for deprecated QUEUE_LEN (in packets) */
1789             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1790                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1791             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1792             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1793             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1794             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1795             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1796             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1797                           parms->base_reachable_time) ||
1798             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1799             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1800                           parms->delay_probe_time) ||
1801             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1802             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1803             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1804             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1805                 goto nla_put_failure;
1806         return nla_nest_end(skb, nest);
1807
1808 nla_put_failure:
1809         nla_nest_cancel(skb, nest);
1810         return -EMSGSIZE;
1811 }
1812
1813 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1814                               u32 pid, u32 seq, int type, int flags)
1815 {
1816         struct nlmsghdr *nlh;
1817         struct ndtmsg *ndtmsg;
1818
1819         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1820         if (nlh == NULL)
1821                 return -EMSGSIZE;
1822
1823         ndtmsg = nlmsg_data(nlh);
1824
1825         read_lock_bh(&tbl->lock);
1826         ndtmsg->ndtm_family = tbl->family;
1827         ndtmsg->ndtm_pad1   = 0;
1828         ndtmsg->ndtm_pad2   = 0;
1829
1830         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1831             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1832             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1833             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1834             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1835                 goto nla_put_failure;
1836         {
1837                 unsigned long now = jiffies;
1838                 unsigned int flush_delta = now - tbl->last_flush;
1839                 unsigned int rand_delta = now - tbl->last_rand;
1840                 struct neigh_hash_table *nht;
1841                 struct ndt_config ndc = {
1842                         .ndtc_key_len           = tbl->key_len,
1843                         .ndtc_entry_size        = tbl->entry_size,
1844                         .ndtc_entries           = atomic_read(&tbl->entries),
1845                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1846                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1847                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1848                 };
1849
1850                 rcu_read_lock_bh();
1851                 nht = rcu_dereference_bh(tbl->nht);
1852                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1853                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1854                 rcu_read_unlock_bh();
1855
1856                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1857                         goto nla_put_failure;
1858         }
1859
1860         {
1861                 int cpu;
1862                 struct ndt_stats ndst;
1863
1864                 memset(&ndst, 0, sizeof(ndst));
1865
1866                 for_each_possible_cpu(cpu) {
1867                         struct neigh_statistics *st;
1868
1869                         st = per_cpu_ptr(tbl->stats, cpu);
1870                         ndst.ndts_allocs                += st->allocs;
1871                         ndst.ndts_destroys              += st->destroys;
1872                         ndst.ndts_hash_grows            += st->hash_grows;
1873                         ndst.ndts_res_failed            += st->res_failed;
1874                         ndst.ndts_lookups               += st->lookups;
1875                         ndst.ndts_hits                  += st->hits;
1876                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1877                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1878                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1879                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1880                 }
1881
1882                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1883                         goto nla_put_failure;
1884         }
1885
1886         BUG_ON(tbl->parms.dev);
1887         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1888                 goto nla_put_failure;
1889
1890         read_unlock_bh(&tbl->lock);
1891         return nlmsg_end(skb, nlh);
1892
1893 nla_put_failure:
1894         read_unlock_bh(&tbl->lock);
1895         nlmsg_cancel(skb, nlh);
1896         return -EMSGSIZE;
1897 }
1898
1899 static int neightbl_fill_param_info(struct sk_buff *skb,
1900                                     struct neigh_table *tbl,
1901                                     struct neigh_parms *parms,
1902                                     u32 pid, u32 seq, int type,
1903                                     unsigned int flags)
1904 {
1905         struct ndtmsg *ndtmsg;
1906         struct nlmsghdr *nlh;
1907
1908         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1909         if (nlh == NULL)
1910                 return -EMSGSIZE;
1911
1912         ndtmsg = nlmsg_data(nlh);
1913
1914         read_lock_bh(&tbl->lock);
1915         ndtmsg->ndtm_family = tbl->family;
1916         ndtmsg->ndtm_pad1   = 0;
1917         ndtmsg->ndtm_pad2   = 0;
1918
1919         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1920             neightbl_fill_parms(skb, parms) < 0)
1921                 goto errout;
1922
1923         read_unlock_bh(&tbl->lock);
1924         return nlmsg_end(skb, nlh);
1925 errout:
1926         read_unlock_bh(&tbl->lock);
1927         nlmsg_cancel(skb, nlh);
1928         return -EMSGSIZE;
1929 }
1930
1931 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1932         [NDTA_NAME]             = { .type = NLA_STRING },
1933         [NDTA_THRESH1]          = { .type = NLA_U32 },
1934         [NDTA_THRESH2]          = { .type = NLA_U32 },
1935         [NDTA_THRESH3]          = { .type = NLA_U32 },
1936         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1937         [NDTA_PARMS]            = { .type = NLA_NESTED },
1938 };
1939
1940 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1941         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1942         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1943         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1944         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1945         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1946         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1947         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1948         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1949         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1950         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1951         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1952         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1953         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1954 };
1955
1956 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1957 {
1958         struct net *net = sock_net(skb->sk);
1959         struct neigh_table *tbl;
1960         struct ndtmsg *ndtmsg;
1961         struct nlattr *tb[NDTA_MAX+1];
1962         int err;
1963
1964         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1965                           nl_neightbl_policy);
1966         if (err < 0)
1967                 goto errout;
1968
1969         if (tb[NDTA_NAME] == NULL) {
1970                 err = -EINVAL;
1971                 goto errout;
1972         }
1973
1974         ndtmsg = nlmsg_data(nlh);
1975         read_lock(&neigh_tbl_lock);
1976         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1977                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1978                         continue;
1979
1980                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1981                         break;
1982         }
1983
1984         if (tbl == NULL) {
1985                 err = -ENOENT;
1986                 goto errout_locked;
1987         }
1988
1989         /*
1990          * We acquire tbl->lock to be nice to the periodic timers and
1991          * make sure they always see a consistent set of values.
1992          */
1993         write_lock_bh(&tbl->lock);
1994
1995         if (tb[NDTA_PARMS]) {
1996                 struct nlattr *tbp[NDTPA_MAX+1];
1997                 struct neigh_parms *p;
1998                 int i, ifindex = 0;
1999
2000                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2001                                        nl_ntbl_parm_policy);
2002                 if (err < 0)
2003                         goto errout_tbl_lock;
2004
2005                 if (tbp[NDTPA_IFINDEX])
2006                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2007
2008                 p = lookup_neigh_parms(tbl, net, ifindex);
2009                 if (p == NULL) {
2010                         err = -ENOENT;
2011                         goto errout_tbl_lock;
2012                 }
2013
2014                 for (i = 1; i <= NDTPA_MAX; i++) {
2015                         if (tbp[i] == NULL)
2016                                 continue;
2017
2018                         switch (i) {
2019                         case NDTPA_QUEUE_LEN:
2020                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2021                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2022                                 break;
2023                         case NDTPA_QUEUE_LENBYTES:
2024                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_PROXY_QLEN:
2027                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_APP_PROBES:
2030                                 p->app_probes = nla_get_u32(tbp[i]);
2031                                 break;
2032                         case NDTPA_UCAST_PROBES:
2033                                 p->ucast_probes = nla_get_u32(tbp[i]);
2034                                 break;
2035                         case NDTPA_MCAST_PROBES:
2036                                 p->mcast_probes = nla_get_u32(tbp[i]);
2037                                 break;
2038                         case NDTPA_BASE_REACHABLE_TIME:
2039                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_GC_STALETIME:
2042                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_DELAY_PROBE_TIME:
2045                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_RETRANS_TIME:
2048                                 p->retrans_time = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         case NDTPA_ANYCAST_DELAY:
2051                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         case NDTPA_PROXY_DELAY:
2054                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2055                                 break;
2056                         case NDTPA_LOCKTIME:
2057                                 p->locktime = nla_get_msecs(tbp[i]);
2058                                 break;
2059                         }
2060                 }
2061         }
2062
2063         if (tb[NDTA_THRESH1])
2064                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2065
2066         if (tb[NDTA_THRESH2])
2067                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2068
2069         if (tb[NDTA_THRESH3])
2070                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2071
2072         if (tb[NDTA_GC_INTERVAL])
2073                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2074
2075         err = 0;
2076
2077 errout_tbl_lock:
2078         write_unlock_bh(&tbl->lock);
2079 errout_locked:
2080         read_unlock(&neigh_tbl_lock);
2081 errout:
2082         return err;
2083 }
2084
2085 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2086 {
2087         struct net *net = sock_net(skb->sk);
2088         int family, tidx, nidx = 0;
2089         int tbl_skip = cb->args[0];
2090         int neigh_skip = cb->args[1];
2091         struct neigh_table *tbl;
2092
2093         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2094
2095         read_lock(&neigh_tbl_lock);
2096         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2097                 struct neigh_parms *p;
2098
2099                 if (tidx < tbl_skip || (family && tbl->family != family))
2100                         continue;
2101
2102                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2103                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2104                                        NLM_F_MULTI) <= 0)
2105                         break;
2106
2107                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2108                         if (!net_eq(neigh_parms_net(p), net))
2109                                 continue;
2110
2111                         if (nidx < neigh_skip)
2112                                 goto next;
2113
2114                         if (neightbl_fill_param_info(skb, tbl, p,
2115                                                      NETLINK_CB(cb->skb).portid,
2116                                                      cb->nlh->nlmsg_seq,
2117                                                      RTM_NEWNEIGHTBL,
2118                                                      NLM_F_MULTI) <= 0)
2119                                 goto out;
2120                 next:
2121                         nidx++;
2122                 }
2123
2124                 neigh_skip = 0;
2125         }
2126 out:
2127         read_unlock(&neigh_tbl_lock);
2128         cb->args[0] = tidx;
2129         cb->args[1] = nidx;
2130
2131         return skb->len;
2132 }
2133
2134 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2135                            u32 pid, u32 seq, int type, unsigned int flags)
2136 {
2137         unsigned long now = jiffies;
2138         struct nda_cacheinfo ci;
2139         struct nlmsghdr *nlh;
2140         struct ndmsg *ndm;
2141
2142         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2143         if (nlh == NULL)
2144                 return -EMSGSIZE;
2145
2146         ndm = nlmsg_data(nlh);
2147         ndm->ndm_family  = neigh->ops->family;
2148         ndm->ndm_pad1    = 0;
2149         ndm->ndm_pad2    = 0;
2150         ndm->ndm_flags   = neigh->flags;
2151         ndm->ndm_type    = neigh->type;
2152         ndm->ndm_ifindex = neigh->dev->ifindex;
2153
2154         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2155                 goto nla_put_failure;
2156
2157         read_lock_bh(&neigh->lock);
2158         ndm->ndm_state   = neigh->nud_state;
2159         if (neigh->nud_state & NUD_VALID) {
2160                 char haddr[MAX_ADDR_LEN];
2161
2162                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164                         read_unlock_bh(&neigh->lock);
2165                         goto nla_put_failure;
2166                 }
2167         }
2168
2169         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2170         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2172         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2173         read_unlock_bh(&neigh->lock);
2174
2175         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2176             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2177                 goto nla_put_failure;
2178
2179         return nlmsg_end(skb, nlh);
2180
2181 nla_put_failure:
2182         nlmsg_cancel(skb, nlh);
2183         return -EMSGSIZE;
2184 }
2185
2186 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2187                             u32 pid, u32 seq, int type, unsigned int flags,
2188                             struct neigh_table *tbl)
2189 {
2190         struct nlmsghdr *nlh;
2191         struct ndmsg *ndm;
2192
2193         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2194         if (nlh == NULL)
2195                 return -EMSGSIZE;
2196
2197         ndm = nlmsg_data(nlh);
2198         ndm->ndm_family  = tbl->family;
2199         ndm->ndm_pad1    = 0;
2200         ndm->ndm_pad2    = 0;
2201         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2202         ndm->ndm_type    = NDA_DST;
2203         ndm->ndm_ifindex = pn->dev->ifindex;
2204         ndm->ndm_state   = NUD_NONE;
2205
2206         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2207                 goto nla_put_failure;
2208
2209         return nlmsg_end(skb, nlh);
2210
2211 nla_put_failure:
2212         nlmsg_cancel(skb, nlh);
2213         return -EMSGSIZE;
2214 }
2215
2216 static void neigh_update_notify(struct neighbour *neigh)
2217 {
2218         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2219         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2220 }
2221
2222 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2223                             struct netlink_callback *cb)
2224 {
2225         struct net *net = sock_net(skb->sk);
2226         struct neighbour *n;
2227         int rc, h, s_h = cb->args[1];
2228         int idx, s_idx = idx = cb->args[2];
2229         struct neigh_hash_table *nht;
2230
2231         rcu_read_lock_bh();
2232         nht = rcu_dereference_bh(tbl->nht);
2233
2234         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2235                 if (h > s_h)
2236                         s_idx = 0;
2237                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2238                      n != NULL;
2239                      n = rcu_dereference_bh(n->next)) {
2240                         if (!net_eq(dev_net(n->dev), net))
2241                                 continue;
2242                         if (idx < s_idx)
2243                                 goto next;
2244                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2245                                             cb->nlh->nlmsg_seq,
2246                                             RTM_NEWNEIGH,
2247                                             NLM_F_MULTI) <= 0) {
2248                                 rc = -1;
2249                                 goto out;
2250                         }
2251 next:
2252                         idx++;
2253                 }
2254         }
2255         rc = skb->len;
2256 out:
2257         rcu_read_unlock_bh();
2258         cb->args[1] = h;
2259         cb->args[2] = idx;
2260         return rc;
2261 }
2262
2263 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2264                              struct netlink_callback *cb)
2265 {
2266         struct pneigh_entry *n;
2267         struct net *net = sock_net(skb->sk);
2268         int rc, h, s_h = cb->args[3];
2269         int idx, s_idx = idx = cb->args[4];
2270
2271         read_lock_bh(&tbl->lock);
2272
2273         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2274                 if (h > s_h)
2275                         s_idx = 0;
2276                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2277                         if (dev_net(n->dev) != net)
2278                                 continue;
2279                         if (idx < s_idx)
2280                                 goto next;
2281                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2282                                             cb->nlh->nlmsg_seq,
2283                                             RTM_NEWNEIGH,
2284                                             NLM_F_MULTI, tbl) <= 0) {
2285                                 read_unlock_bh(&tbl->lock);
2286                                 rc = -1;
2287                                 goto out;
2288                         }
2289                 next:
2290                         idx++;
2291                 }
2292         }
2293
2294         read_unlock_bh(&tbl->lock);
2295         rc = skb->len;
2296 out:
2297         cb->args[3] = h;
2298         cb->args[4] = idx;
2299         return rc;
2300
2301 }
2302
2303 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2304 {
2305         struct neigh_table *tbl;
2306         int t, family, s_t;
2307         int proxy = 0;
2308         int err;
2309
2310         read_lock(&neigh_tbl_lock);
2311         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2312
2313         /* check for full ndmsg structure presence, family member is
2314          * the same for both structures
2315          */
2316         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2317             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2318                 proxy = 1;
2319
2320         s_t = cb->args[0];
2321
2322         for (tbl = neigh_tables, t = 0; tbl;
2323              tbl = tbl->next, t++) {
2324                 if (t < s_t || (family && tbl->family != family))
2325                         continue;
2326                 if (t > s_t)
2327                         memset(&cb->args[1], 0, sizeof(cb->args) -
2328                                                 sizeof(cb->args[0]));
2329                 if (proxy)
2330                         err = pneigh_dump_table(tbl, skb, cb);
2331                 else
2332                         err = neigh_dump_table(tbl, skb, cb);
2333                 if (err < 0)
2334                         break;
2335         }
2336         read_unlock(&neigh_tbl_lock);
2337
2338         cb->args[0] = t;
2339         return skb->len;
2340 }
2341
2342 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2343 {
2344         int chain;
2345         struct neigh_hash_table *nht;
2346
2347         rcu_read_lock_bh();
2348         nht = rcu_dereference_bh(tbl->nht);
2349
2350         read_lock(&tbl->lock); /* avoid resizes */
2351         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2352                 struct neighbour *n;
2353
2354                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2355                      n != NULL;
2356                      n = rcu_dereference_bh(n->next))
2357                         cb(n, cookie);
2358         }
2359         read_unlock(&tbl->lock);
2360         rcu_read_unlock_bh();
2361 }
2362 EXPORT_SYMBOL(neigh_for_each);
2363
2364 /* The tbl->lock must be held as a writer and BH disabled. */
2365 void __neigh_for_each_release(struct neigh_table *tbl,
2366                               int (*cb)(struct neighbour *))
2367 {
2368         int chain;
2369         struct neigh_hash_table *nht;
2370
2371         nht = rcu_dereference_protected(tbl->nht,
2372                                         lockdep_is_held(&tbl->lock));
2373         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2374                 struct neighbour *n;
2375                 struct neighbour __rcu **np;
2376
2377                 np = &nht->hash_buckets[chain];
2378                 while ((n = rcu_dereference_protected(*np,
2379                                         lockdep_is_held(&tbl->lock))) != NULL) {
2380                         int release;
2381
2382                         write_lock(&n->lock);
2383                         release = cb(n);
2384                         if (release) {
2385                                 rcu_assign_pointer(*np,
2386                                         rcu_dereference_protected(n->next,
2387                                                 lockdep_is_held(&tbl->lock)));
2388                                 n->dead = 1;
2389                         } else
2390                                 np = &n->next;
2391                         write_unlock(&n->lock);
2392                         if (release)
2393                                 neigh_cleanup_and_release(n);
2394                 }
2395         }
2396 }
2397 EXPORT_SYMBOL(__neigh_for_each_release);
2398
2399 #ifdef CONFIG_PROC_FS
2400
2401 static struct neighbour *neigh_get_first(struct seq_file *seq)
2402 {
2403         struct neigh_seq_state *state = seq->private;
2404         struct net *net = seq_file_net(seq);
2405         struct neigh_hash_table *nht = state->nht;
2406         struct neighbour *n = NULL;
2407         int bucket = state->bucket;
2408
2409         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2410         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2411                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2412
2413                 while (n) {
2414                         if (!net_eq(dev_net(n->dev), net))
2415                                 goto next;
2416                         if (state->neigh_sub_iter) {
2417                                 loff_t fakep = 0;
2418                                 void *v;
2419
2420                                 v = state->neigh_sub_iter(state, n, &fakep);
2421                                 if (!v)
2422                                         goto next;
2423                         }
2424                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2425                                 break;
2426                         if (n->nud_state & ~NUD_NOARP)
2427                                 break;
2428 next:
2429                         n = rcu_dereference_bh(n->next);
2430                 }
2431
2432                 if (n)
2433                         break;
2434         }
2435         state->bucket = bucket;
2436
2437         return n;
2438 }
2439
2440 static struct neighbour *neigh_get_next(struct seq_file *seq,
2441                                         struct neighbour *n,
2442                                         loff_t *pos)
2443 {
2444         struct neigh_seq_state *state = seq->private;
2445         struct net *net = seq_file_net(seq);
2446         struct neigh_hash_table *nht = state->nht;
2447
2448         if (state->neigh_sub_iter) {
2449                 void *v = state->neigh_sub_iter(state, n, pos);
2450                 if (v)
2451                         return n;
2452         }
2453         n = rcu_dereference_bh(n->next);
2454
2455         while (1) {
2456                 while (n) {
2457                         if (!net_eq(dev_net(n->dev), net))
2458                                 goto next;
2459                         if (state->neigh_sub_iter) {
2460                                 void *v = state->neigh_sub_iter(state, n, pos);
2461                                 if (v)
2462                                         return n;
2463                                 goto next;
2464                         }
2465                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2466                                 break;
2467
2468                         if (n->nud_state & ~NUD_NOARP)
2469                                 break;
2470 next:
2471                         n = rcu_dereference_bh(n->next);
2472                 }
2473
2474                 if (n)
2475                         break;
2476
2477                 if (++state->bucket >= (1 << nht->hash_shift))
2478                         break;
2479
2480                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2481         }
2482
2483         if (n && pos)
2484                 --(*pos);
2485         return n;
2486 }
2487
2488 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2489 {
2490         struct neighbour *n = neigh_get_first(seq);
2491
2492         if (n) {
2493                 --(*pos);
2494                 while (*pos) {
2495                         n = neigh_get_next(seq, n, pos);
2496                         if (!n)
2497                                 break;
2498                 }
2499         }
2500         return *pos ? NULL : n;
2501 }
2502
2503 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2504 {
2505         struct neigh_seq_state *state = seq->private;
2506         struct net *net = seq_file_net(seq);
2507         struct neigh_table *tbl = state->tbl;
2508         struct pneigh_entry *pn = NULL;
2509         int bucket = state->bucket;
2510
2511         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2512         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2513                 pn = tbl->phash_buckets[bucket];
2514                 while (pn && !net_eq(pneigh_net(pn), net))
2515                         pn = pn->next;
2516                 if (pn)
2517                         break;
2518         }
2519         state->bucket = bucket;
2520
2521         return pn;
2522 }
2523
2524 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2525                                             struct pneigh_entry *pn,
2526                                             loff_t *pos)
2527 {
2528         struct neigh_seq_state *state = seq->private;
2529         struct net *net = seq_file_net(seq);
2530         struct neigh_table *tbl = state->tbl;
2531
2532         do {
2533                 pn = pn->next;
2534         } while (pn && !net_eq(pneigh_net(pn), net));
2535
2536         while (!pn) {
2537                 if (++state->bucket > PNEIGH_HASHMASK)
2538                         break;
2539                 pn = tbl->phash_buckets[state->bucket];
2540                 while (pn && !net_eq(pneigh_net(pn), net))
2541                         pn = pn->next;
2542                 if (pn)
2543                         break;
2544         }
2545
2546         if (pn && pos)
2547                 --(*pos);
2548
2549         return pn;
2550 }
2551
2552 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2553 {
2554         struct pneigh_entry *pn = pneigh_get_first(seq);
2555
2556         if (pn) {
2557                 --(*pos);
2558                 while (*pos) {
2559                         pn = pneigh_get_next(seq, pn, pos);
2560                         if (!pn)
2561                                 break;
2562                 }
2563         }
2564         return *pos ? NULL : pn;
2565 }
2566
2567 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2568 {
2569         struct neigh_seq_state *state = seq->private;
2570         void *rc;
2571         loff_t idxpos = *pos;
2572
2573         rc = neigh_get_idx(seq, &idxpos);
2574         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2575                 rc = pneigh_get_idx(seq, &idxpos);
2576
2577         return rc;
2578 }
2579
2580 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2581         __acquires(rcu_bh)
2582 {
2583         struct neigh_seq_state *state = seq->private;
2584
2585         state->tbl = tbl;
2586         state->bucket = 0;
2587         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2588
2589         rcu_read_lock_bh();
2590         state->nht = rcu_dereference_bh(tbl->nht);
2591
2592         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2593 }
2594 EXPORT_SYMBOL(neigh_seq_start);
2595
2596 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2597 {
2598         struct neigh_seq_state *state;
2599         void *rc;
2600
2601         if (v == SEQ_START_TOKEN) {
2602                 rc = neigh_get_first(seq);
2603                 goto out;
2604         }
2605
2606         state = seq->private;
2607         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2608                 rc = neigh_get_next(seq, v, NULL);
2609                 if (rc)
2610                         goto out;
2611                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2612                         rc = pneigh_get_first(seq);
2613         } else {
2614                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2615                 rc = pneigh_get_next(seq, v, NULL);
2616         }
2617 out:
2618         ++(*pos);
2619         return rc;
2620 }
2621 EXPORT_SYMBOL(neigh_seq_next);
2622
2623 void neigh_seq_stop(struct seq_file *seq, void *v)
2624         __releases(rcu_bh)
2625 {
2626         rcu_read_unlock_bh();
2627 }
2628 EXPORT_SYMBOL(neigh_seq_stop);
2629
2630 /* statistics via seq_file */
2631
2632 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2633 {
2634         struct neigh_table *tbl = seq->private;
2635         int cpu;
2636
2637         if (*pos == 0)
2638                 return SEQ_START_TOKEN;
2639
2640         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2641                 if (!cpu_possible(cpu))
2642                         continue;
2643                 *pos = cpu+1;
2644                 return per_cpu_ptr(tbl->stats, cpu);
2645         }
2646         return NULL;
2647 }
2648
2649 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2650 {
2651         struct neigh_table *tbl = seq->private;
2652         int cpu;
2653
2654         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2655                 if (!cpu_possible(cpu))
2656                         continue;
2657                 *pos = cpu+1;
2658                 return per_cpu_ptr(tbl->stats, cpu);
2659         }
2660         return NULL;
2661 }
2662
2663 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2664 {
2665
2666 }
2667
2668 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2669 {
2670         struct neigh_table *tbl = seq->private;
2671         struct neigh_statistics *st = v;
2672
2673         if (v == SEQ_START_TOKEN) {
2674                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2675                 return 0;
2676         }
2677
2678         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2679                         "%08lx %08lx  %08lx %08lx %08lx\n",
2680                    atomic_read(&tbl->entries),
2681
2682                    st->allocs,
2683                    st->destroys,
2684                    st->hash_grows,
2685
2686                    st->lookups,
2687                    st->hits,
2688
2689                    st->res_failed,
2690
2691                    st->rcv_probes_mcast,
2692                    st->rcv_probes_ucast,
2693
2694                    st->periodic_gc_runs,
2695                    st->forced_gc_runs,
2696                    st->unres_discards
2697                    );
2698
2699         return 0;
2700 }
2701
2702 static const struct seq_operations neigh_stat_seq_ops = {
2703         .start  = neigh_stat_seq_start,
2704         .next   = neigh_stat_seq_next,
2705         .stop   = neigh_stat_seq_stop,
2706         .show   = neigh_stat_seq_show,
2707 };
2708
2709 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2710 {
2711         int ret = seq_open(file, &neigh_stat_seq_ops);
2712
2713         if (!ret) {
2714                 struct seq_file *sf = file->private_data;
2715                 sf->private = PDE(inode)->data;
2716         }
2717         return ret;
2718 };
2719
2720 static const struct file_operations neigh_stat_seq_fops = {
2721         .owner   = THIS_MODULE,
2722         .open    = neigh_stat_seq_open,
2723         .read    = seq_read,
2724         .llseek  = seq_lseek,
2725         .release = seq_release,
2726 };
2727
2728 #endif /* CONFIG_PROC_FS */
2729
2730 static inline size_t neigh_nlmsg_size(void)
2731 {
2732         return NLMSG_ALIGN(sizeof(struct ndmsg))
2733                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2734                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2735                + nla_total_size(sizeof(struct nda_cacheinfo))
2736                + nla_total_size(4); /* NDA_PROBES */
2737 }
2738
2739 static void __neigh_notify(struct neighbour *n, int type, int flags)
2740 {
2741         struct net *net = dev_net(n->dev);
2742         struct sk_buff *skb;
2743         int err = -ENOBUFS;
2744
2745         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2746         if (skb == NULL)
2747                 goto errout;
2748
2749         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2750         if (err < 0) {
2751                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2752                 WARN_ON(err == -EMSGSIZE);
2753                 kfree_skb(skb);
2754                 goto errout;
2755         }
2756         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2757         return;
2758 errout:
2759         if (err < 0)
2760                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2761 }
2762
2763 #ifdef CONFIG_ARPD
2764 void neigh_app_ns(struct neighbour *n)
2765 {
2766         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2767 }
2768 EXPORT_SYMBOL(neigh_app_ns);
2769 #endif /* CONFIG_ARPD */
2770
2771 #ifdef CONFIG_SYSCTL
2772 static int zero;
2773 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2774
2775 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2776                            size_t *lenp, loff_t *ppos)
2777 {
2778         int size, ret;
2779         ctl_table tmp = *ctl;
2780
2781         tmp.extra1 = &zero;
2782         tmp.extra2 = &unres_qlen_max;
2783         tmp.data = &size;
2784
2785         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2786         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2787
2788         if (write && !ret)
2789                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2790         return ret;
2791 }
2792
2793 enum {
2794         NEIGH_VAR_MCAST_PROBE,
2795         NEIGH_VAR_UCAST_PROBE,
2796         NEIGH_VAR_APP_PROBE,
2797         NEIGH_VAR_RETRANS_TIME,
2798         NEIGH_VAR_BASE_REACHABLE_TIME,
2799         NEIGH_VAR_DELAY_PROBE_TIME,
2800         NEIGH_VAR_GC_STALETIME,
2801         NEIGH_VAR_QUEUE_LEN,
2802         NEIGH_VAR_QUEUE_LEN_BYTES,
2803         NEIGH_VAR_PROXY_QLEN,
2804         NEIGH_VAR_ANYCAST_DELAY,
2805         NEIGH_VAR_PROXY_DELAY,
2806         NEIGH_VAR_LOCKTIME,
2807         NEIGH_VAR_RETRANS_TIME_MS,
2808         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2809         NEIGH_VAR_GC_INTERVAL,
2810         NEIGH_VAR_GC_THRESH1,
2811         NEIGH_VAR_GC_THRESH2,
2812         NEIGH_VAR_GC_THRESH3,
2813         NEIGH_VAR_MAX
2814 };
2815
2816 static struct neigh_sysctl_table {
2817         struct ctl_table_header *sysctl_header;
2818         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2819 } neigh_sysctl_template __read_mostly = {
2820         .neigh_vars = {
2821                 [NEIGH_VAR_MCAST_PROBE] = {
2822                         .procname       = "mcast_solicit",
2823                         .maxlen         = sizeof(int),
2824                         .mode           = 0644,
2825                         .proc_handler   = proc_dointvec,
2826                 },
2827                 [NEIGH_VAR_UCAST_PROBE] = {
2828                         .procname       = "ucast_solicit",
2829                         .maxlen         = sizeof(int),
2830                         .mode           = 0644,
2831                         .proc_handler   = proc_dointvec,
2832                 },
2833                 [NEIGH_VAR_APP_PROBE] = {
2834                         .procname       = "app_solicit",
2835                         .maxlen         = sizeof(int),
2836                         .mode           = 0644,
2837                         .proc_handler   = proc_dointvec,
2838                 },
2839                 [NEIGH_VAR_RETRANS_TIME] = {
2840                         .procname       = "retrans_time",
2841                         .maxlen         = sizeof(int),
2842                         .mode           = 0644,
2843                         .proc_handler   = proc_dointvec_userhz_jiffies,
2844                 },
2845                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2846                         .procname       = "base_reachable_time",
2847                         .maxlen         = sizeof(int),
2848                         .mode           = 0644,
2849                         .proc_handler   = proc_dointvec_jiffies,
2850                 },
2851                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2852                         .procname       = "delay_first_probe_time",
2853                         .maxlen         = sizeof(int),
2854                         .mode           = 0644,
2855                         .proc_handler   = proc_dointvec_jiffies,
2856                 },
2857                 [NEIGH_VAR_GC_STALETIME] = {
2858                         .procname       = "gc_stale_time",
2859                         .maxlen         = sizeof(int),
2860                         .mode           = 0644,
2861                         .proc_handler   = proc_dointvec_jiffies,
2862                 },
2863                 [NEIGH_VAR_QUEUE_LEN] = {
2864                         .procname       = "unres_qlen",
2865                         .maxlen         = sizeof(int),
2866                         .mode           = 0644,
2867                         .proc_handler   = proc_unres_qlen,
2868                 },
2869                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2870                         .procname       = "unres_qlen_bytes",
2871                         .maxlen         = sizeof(int),
2872                         .mode           = 0644,
2873                         .extra1         = &zero,
2874                         .proc_handler   = proc_dointvec_minmax,
2875                 },
2876                 [NEIGH_VAR_PROXY_QLEN] = {
2877                         .procname       = "proxy_qlen",
2878                         .maxlen         = sizeof(int),
2879                         .mode           = 0644,
2880                         .proc_handler   = proc_dointvec,
2881                 },
2882                 [NEIGH_VAR_ANYCAST_DELAY] = {
2883                         .procname       = "anycast_delay",
2884                         .maxlen         = sizeof(int),
2885                         .mode           = 0644,
2886                         .proc_handler   = proc_dointvec_userhz_jiffies,
2887                 },
2888                 [NEIGH_VAR_PROXY_DELAY] = {
2889                         .procname       = "proxy_delay",
2890                         .maxlen         = sizeof(int),
2891                         .mode           = 0644,
2892                         .proc_handler   = proc_dointvec_userhz_jiffies,
2893                 },
2894                 [NEIGH_VAR_LOCKTIME] = {
2895                         .procname       = "locktime",
2896                         .maxlen         = sizeof(int),
2897                         .mode           = 0644,
2898                         .proc_handler   = proc_dointvec_userhz_jiffies,
2899                 },
2900                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2901                         .procname       = "retrans_time_ms",
2902                         .maxlen         = sizeof(int),
2903                         .mode           = 0644,
2904                         .proc_handler   = proc_dointvec_ms_jiffies,
2905                 },
2906                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2907                         .procname       = "base_reachable_time_ms",
2908                         .maxlen         = sizeof(int),
2909                         .mode           = 0644,
2910                         .proc_handler   = proc_dointvec_ms_jiffies,
2911                 },
2912                 [NEIGH_VAR_GC_INTERVAL] = {
2913                         .procname       = "gc_interval",
2914                         .maxlen         = sizeof(int),
2915                         .mode           = 0644,
2916                         .proc_handler   = proc_dointvec_jiffies,
2917                 },
2918                 [NEIGH_VAR_GC_THRESH1] = {
2919                         .procname       = "gc_thresh1",
2920                         .maxlen         = sizeof(int),
2921                         .mode           = 0644,
2922                         .proc_handler   = proc_dointvec,
2923                 },
2924                 [NEIGH_VAR_GC_THRESH2] = {
2925                         .procname       = "gc_thresh2",
2926                         .maxlen         = sizeof(int),
2927                         .mode           = 0644,
2928                         .proc_handler   = proc_dointvec,
2929                 },
2930                 [NEIGH_VAR_GC_THRESH3] = {
2931                         .procname       = "gc_thresh3",
2932                         .maxlen         = sizeof(int),
2933                         .mode           = 0644,
2934                         .proc_handler   = proc_dointvec,
2935                 },
2936                 {},
2937         },
2938 };
2939
2940 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2941                           char *p_name, proc_handler *handler)
2942 {
2943         struct neigh_sysctl_table *t;
2944         const char *dev_name_source = NULL;
2945         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2946
2947         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2948         if (!t)
2949                 goto err;
2950
2951         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2952         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2953         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2954         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2955         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2956         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2957         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2958         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2959         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2960         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2961         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2962         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2963         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2964         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2965         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2966
2967         if (dev) {
2968                 dev_name_source = dev->name;
2969                 /* Terminate the table early */
2970                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2971                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2972         } else {
2973                 dev_name_source = "default";
2974                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2975                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2976                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2977                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2978         }
2979
2980
2981         if (handler) {
2982                 /* RetransTime */
2983                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2984                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2985                 /* ReachableTime */
2986                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2987                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2988                 /* RetransTime (in milliseconds)*/
2989                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2990                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2991                 /* ReachableTime (in milliseconds) */
2992                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2993                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2994         }
2995
2996         /* Don't export sysctls to unprivileged users */
2997         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2998                 t->neigh_vars[0].procname = NULL;
2999
3000         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3001                 p_name, dev_name_source);
3002         t->sysctl_header =
3003                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3004         if (!t->sysctl_header)
3005                 goto free;
3006
3007         p->sysctl_table = t;
3008         return 0;
3009
3010 free:
3011         kfree(t);
3012 err:
3013         return -ENOBUFS;
3014 }
3015 EXPORT_SYMBOL(neigh_sysctl_register);
3016
3017 void neigh_sysctl_unregister(struct neigh_parms *p)
3018 {
3019         if (p->sysctl_table) {
3020                 struct neigh_sysctl_table *t = p->sysctl_table;
3021                 p->sysctl_table = NULL;
3022                 unregister_net_sysctl_table(t->sysctl_header);
3023                 kfree(t);
3024         }
3025 }
3026 EXPORT_SYMBOL(neigh_sysctl_unregister);
3027
3028 #endif  /* CONFIG_SYSCTL */
3029
3030 static int __init neigh_init(void)
3031 {
3032         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3033         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3034         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3035
3036         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3037                       NULL);
3038         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3039
3040         return 0;
3041 }
3042
3043 subsys_initcall(neigh_init);
3044