]> rtime.felk.cvut.cz Git - linux-imx.git/blob - net/ipv4/devinet.c
Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty...
[linux-imx.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218         kfree(rcu_dereference_protected(idev->mc_hash, 1));
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static void check_lifetime(struct work_struct *work);
423
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427                              u32 portid)
428 {
429         struct in_device *in_dev = ifa->ifa_dev;
430         struct in_ifaddr *ifa1, **ifap, **last_primary;
431
432         ASSERT_RTNL();
433
434         if (!ifa->ifa_local) {
435                 inet_free_ifa(ifa);
436                 return 0;
437         }
438
439         ifa->ifa_flags &= ~IFA_F_SECONDARY;
440         last_primary = &in_dev->ifa_list;
441
442         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443              ifap = &ifa1->ifa_next) {
444                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445                     ifa->ifa_scope <= ifa1->ifa_scope)
446                         last_primary = &ifa1->ifa_next;
447                 if (ifa1->ifa_mask == ifa->ifa_mask &&
448                     inet_ifa_match(ifa1->ifa_address, ifa)) {
449                         if (ifa1->ifa_local == ifa->ifa_local) {
450                                 inet_free_ifa(ifa);
451                                 return -EEXIST;
452                         }
453                         if (ifa1->ifa_scope != ifa->ifa_scope) {
454                                 inet_free_ifa(ifa);
455                                 return -EINVAL;
456                         }
457                         ifa->ifa_flags |= IFA_F_SECONDARY;
458                 }
459         }
460
461         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462                 net_srandom(ifa->ifa_local);
463                 ifap = last_primary;
464         }
465
466         ifa->ifa_next = *ifap;
467         *ifap = ifa;
468
469         inet_hash_insert(dev_net(in_dev->dev), ifa);
470
471         cancel_delayed_work(&check_lifetime_work);
472         schedule_delayed_work(&check_lifetime_work, 0);
473
474         /* Send message first, then call notifier.
475            Notifier will trigger FIB update, so that
476            listeners of netlink will know about new ifaddr */
477         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479
480         return 0;
481 }
482
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485         return __inet_insert_ifa(ifa, NULL, 0);
486 }
487
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490         struct in_device *in_dev = __in_dev_get_rtnl(dev);
491
492         ASSERT_RTNL();
493
494         if (!in_dev) {
495                 inet_free_ifa(ifa);
496                 return -ENOBUFS;
497         }
498         ipv4_devconf_setall(in_dev);
499         if (ifa->ifa_dev != in_dev) {
500                 WARN_ON(ifa->ifa_dev);
501                 in_dev_hold(in_dev);
502                 ifa->ifa_dev = in_dev;
503         }
504         if (ipv4_is_loopback(ifa->ifa_local))
505                 ifa->ifa_scope = RT_SCOPE_HOST;
506         return inet_insert_ifa(ifa);
507 }
508
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514         struct net_device *dev;
515         struct in_device *in_dev = NULL;
516
517         rcu_read_lock();
518         dev = dev_get_by_index_rcu(net, ifindex);
519         if (dev)
520                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521         rcu_read_unlock();
522         return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525
526 /* Called only from RTNL semaphored context. No locks. */
527
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529                                     __be32 mask)
530 {
531         ASSERT_RTNL();
532
533         for_primary_ifa(in_dev) {
534                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535                         return ifa;
536         } endfor_ifa(in_dev);
537         return NULL;
538 }
539
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542         struct net *net = sock_net(skb->sk);
543         struct nlattr *tb[IFA_MAX+1];
544         struct in_device *in_dev;
545         struct ifaddrmsg *ifm;
546         struct in_ifaddr *ifa, **ifap;
547         int err = -EINVAL;
548
549         ASSERT_RTNL();
550
551         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552         if (err < 0)
553                 goto errout;
554
555         ifm = nlmsg_data(nlh);
556         in_dev = inetdev_by_index(net, ifm->ifa_index);
557         if (in_dev == NULL) {
558                 err = -ENODEV;
559                 goto errout;
560         }
561
562         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563              ifap = &ifa->ifa_next) {
564                 if (tb[IFA_LOCAL] &&
565                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566                         continue;
567
568                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569                         continue;
570
571                 if (tb[IFA_ADDRESS] &&
572                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574                         continue;
575
576                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577                 return 0;
578         }
579
580         err = -EADDRNOTAVAIL;
581 errout:
582         return err;
583 }
584
585 #define INFINITY_LIFE_TIME      0xFFFFFFFF
586
587 static void check_lifetime(struct work_struct *work)
588 {
589         unsigned long now, next, next_sec, next_sched;
590         struct in_ifaddr *ifa;
591         struct hlist_node *n;
592         int i;
593
594         now = jiffies;
595         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596
597         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598                 bool change_needed = false;
599
600                 rcu_read_lock();
601                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602                         unsigned long age;
603
604                         if (ifa->ifa_flags & IFA_F_PERMANENT)
605                                 continue;
606
607                         /* We try to batch several events at once. */
608                         age = (now - ifa->ifa_tstamp +
609                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610
611                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612                             age >= ifa->ifa_valid_lft) {
613                                 change_needed = true;
614                         } else if (ifa->ifa_preferred_lft ==
615                                    INFINITY_LIFE_TIME) {
616                                 continue;
617                         } else if (age >= ifa->ifa_preferred_lft) {
618                                 if (time_before(ifa->ifa_tstamp +
619                                                 ifa->ifa_valid_lft * HZ, next))
620                                         next = ifa->ifa_tstamp +
621                                                ifa->ifa_valid_lft * HZ;
622
623                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624                                         change_needed = true;
625                         } else if (time_before(ifa->ifa_tstamp +
626                                                ifa->ifa_preferred_lft * HZ,
627                                                next)) {
628                                 next = ifa->ifa_tstamp +
629                                        ifa->ifa_preferred_lft * HZ;
630                         }
631                 }
632                 rcu_read_unlock();
633                 if (!change_needed)
634                         continue;
635                 rtnl_lock();
636                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637                         unsigned long age;
638
639                         if (ifa->ifa_flags & IFA_F_PERMANENT)
640                                 continue;
641
642                         /* We try to batch several events at once. */
643                         age = (now - ifa->ifa_tstamp +
644                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645
646                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647                             age >= ifa->ifa_valid_lft) {
648                                 struct in_ifaddr **ifap;
649
650                                 for (ifap = &ifa->ifa_dev->ifa_list;
651                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652                                         if (*ifap == ifa) {
653                                                 inet_del_ifa(ifa->ifa_dev,
654                                                              ifap, 1);
655                                                 break;
656                                         }
657                                 }
658                         } else if (ifa->ifa_preferred_lft !=
659                                    INFINITY_LIFE_TIME &&
660                                    age >= ifa->ifa_preferred_lft &&
661                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
663                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664                         }
665                 }
666                 rtnl_unlock();
667         }
668
669         next_sec = round_jiffies_up(next);
670         next_sched = next;
671
672         /* If rounded timeout is accurate enough, accept it. */
673         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674                 next_sched = next_sec;
675
676         now = jiffies;
677         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680
681         schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685                              __u32 prefered_lft)
686 {
687         unsigned long timeout;
688
689         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690
691         timeout = addrconf_timeout_fixup(valid_lft, HZ);
692         if (addrconf_finite_timeout(timeout))
693                 ifa->ifa_valid_lft = timeout;
694         else
695                 ifa->ifa_flags |= IFA_F_PERMANENT;
696
697         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698         if (addrconf_finite_timeout(timeout)) {
699                 if (timeout == 0)
700                         ifa->ifa_flags |= IFA_F_DEPRECATED;
701                 ifa->ifa_preferred_lft = timeout;
702         }
703         ifa->ifa_tstamp = jiffies;
704         if (!ifa->ifa_cstamp)
705                 ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711         struct nlattr *tb[IFA_MAX+1];
712         struct in_ifaddr *ifa;
713         struct ifaddrmsg *ifm;
714         struct net_device *dev;
715         struct in_device *in_dev;
716         int err;
717
718         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719         if (err < 0)
720                 goto errout;
721
722         ifm = nlmsg_data(nlh);
723         err = -EINVAL;
724         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725                 goto errout;
726
727         dev = __dev_get_by_index(net, ifm->ifa_index);
728         err = -ENODEV;
729         if (dev == NULL)
730                 goto errout;
731
732         in_dev = __in_dev_get_rtnl(dev);
733         err = -ENOBUFS;
734         if (in_dev == NULL)
735                 goto errout;
736
737         ifa = inet_alloc_ifa();
738         if (ifa == NULL)
739                 /*
740                  * A potential indev allocation can be left alive, it stays
741                  * assigned to its device and is destroy with it.
742                  */
743                 goto errout;
744
745         ipv4_devconf_setall(in_dev);
746         in_dev_hold(in_dev);
747
748         if (tb[IFA_ADDRESS] == NULL)
749                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750
751         INIT_HLIST_NODE(&ifa->hash);
752         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754         ifa->ifa_flags = ifm->ifa_flags;
755         ifa->ifa_scope = ifm->ifa_scope;
756         ifa->ifa_dev = in_dev;
757
758         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760
761         if (tb[IFA_BROADCAST])
762                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763
764         if (tb[IFA_LABEL])
765                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766         else
767                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768
769         if (tb[IFA_CACHEINFO]) {
770                 struct ifa_cacheinfo *ci;
771
772                 ci = nla_data(tb[IFA_CACHEINFO]);
773                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774                         err = -EINVAL;
775                         goto errout_free;
776                 }
777                 *pvalid_lft = ci->ifa_valid;
778                 *pprefered_lft = ci->ifa_prefered;
779         }
780
781         return ifa;
782
783 errout_free:
784         inet_free_ifa(ifa);
785 errout:
786         return ERR_PTR(err);
787 }
788
789 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
790 {
791         struct in_device *in_dev = ifa->ifa_dev;
792         struct in_ifaddr *ifa1, **ifap;
793
794         if (!ifa->ifa_local)
795                 return NULL;
796
797         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
798              ifap = &ifa1->ifa_next) {
799                 if (ifa1->ifa_mask == ifa->ifa_mask &&
800                     inet_ifa_match(ifa1->ifa_address, ifa) &&
801                     ifa1->ifa_local == ifa->ifa_local)
802                         return ifa1;
803         }
804         return NULL;
805 }
806
807 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
808 {
809         struct net *net = sock_net(skb->sk);
810         struct in_ifaddr *ifa;
811         struct in_ifaddr *ifa_existing;
812         __u32 valid_lft = INFINITY_LIFE_TIME;
813         __u32 prefered_lft = INFINITY_LIFE_TIME;
814
815         ASSERT_RTNL();
816
817         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
818         if (IS_ERR(ifa))
819                 return PTR_ERR(ifa);
820
821         ifa_existing = find_matching_ifa(ifa);
822         if (!ifa_existing) {
823                 /* It would be best to check for !NLM_F_CREATE here but
824                  * userspace alreay relies on not having to provide this.
825                  */
826                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
827                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
828         } else {
829                 inet_free_ifa(ifa);
830
831                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
832                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
833                         return -EEXIST;
834                 ifa = ifa_existing;
835                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836                 cancel_delayed_work(&check_lifetime_work);
837                 schedule_delayed_work(&check_lifetime_work, 0);
838                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
839                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
840         }
841         return 0;
842 }
843
844 /*
845  *      Determine a default network mask, based on the IP address.
846  */
847
848 static int inet_abc_len(__be32 addr)
849 {
850         int rc = -1;    /* Something else, probably a multicast. */
851
852         if (ipv4_is_zeronet(addr))
853                 rc = 0;
854         else {
855                 __u32 haddr = ntohl(addr);
856
857                 if (IN_CLASSA(haddr))
858                         rc = 8;
859                 else if (IN_CLASSB(haddr))
860                         rc = 16;
861                 else if (IN_CLASSC(haddr))
862                         rc = 24;
863         }
864
865         return rc;
866 }
867
868
869 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
870 {
871         struct ifreq ifr;
872         struct sockaddr_in sin_orig;
873         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
874         struct in_device *in_dev;
875         struct in_ifaddr **ifap = NULL;
876         struct in_ifaddr *ifa = NULL;
877         struct net_device *dev;
878         char *colon;
879         int ret = -EFAULT;
880         int tryaddrmatch = 0;
881
882         /*
883          *      Fetch the caller's info block into kernel space
884          */
885
886         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
887                 goto out;
888         ifr.ifr_name[IFNAMSIZ - 1] = 0;
889
890         /* save original address for comparison */
891         memcpy(&sin_orig, sin, sizeof(*sin));
892
893         colon = strchr(ifr.ifr_name, ':');
894         if (colon)
895                 *colon = 0;
896
897         dev_load(net, ifr.ifr_name);
898
899         switch (cmd) {
900         case SIOCGIFADDR:       /* Get interface address */
901         case SIOCGIFBRDADDR:    /* Get the broadcast address */
902         case SIOCGIFDSTADDR:    /* Get the destination address */
903         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
904                 /* Note that these ioctls will not sleep,
905                    so that we do not impose a lock.
906                    One day we will be forced to put shlock here (I mean SMP)
907                  */
908                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
909                 memset(sin, 0, sizeof(*sin));
910                 sin->sin_family = AF_INET;
911                 break;
912
913         case SIOCSIFFLAGS:
914                 ret = -EPERM;
915                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
916                         goto out;
917                 break;
918         case SIOCSIFADDR:       /* Set interface address (and family) */
919         case SIOCSIFBRDADDR:    /* Set the broadcast address */
920         case SIOCSIFDSTADDR:    /* Set the destination address */
921         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
922                 ret = -EPERM;
923                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924                         goto out;
925                 ret = -EINVAL;
926                 if (sin->sin_family != AF_INET)
927                         goto out;
928                 break;
929         default:
930                 ret = -EINVAL;
931                 goto out;
932         }
933
934         rtnl_lock();
935
936         ret = -ENODEV;
937         dev = __dev_get_by_name(net, ifr.ifr_name);
938         if (!dev)
939                 goto done;
940
941         if (colon)
942                 *colon = ':';
943
944         in_dev = __in_dev_get_rtnl(dev);
945         if (in_dev) {
946                 if (tryaddrmatch) {
947                         /* Matthias Andree */
948                         /* compare label and address (4.4BSD style) */
949                         /* note: we only do this for a limited set of ioctls
950                            and only if the original address family was AF_INET.
951                            This is checked above. */
952                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
953                              ifap = &ifa->ifa_next) {
954                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
955                                     sin_orig.sin_addr.s_addr ==
956                                                         ifa->ifa_local) {
957                                         break; /* found */
958                                 }
959                         }
960                 }
961                 /* we didn't get a match, maybe the application is
962                    4.3BSD-style and passed in junk so we fall back to
963                    comparing just the label */
964                 if (!ifa) {
965                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
966                              ifap = &ifa->ifa_next)
967                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
968                                         break;
969                 }
970         }
971
972         ret = -EADDRNOTAVAIL;
973         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
974                 goto done;
975
976         switch (cmd) {
977         case SIOCGIFADDR:       /* Get interface address */
978                 sin->sin_addr.s_addr = ifa->ifa_local;
979                 goto rarok;
980
981         case SIOCGIFBRDADDR:    /* Get the broadcast address */
982                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
983                 goto rarok;
984
985         case SIOCGIFDSTADDR:    /* Get the destination address */
986                 sin->sin_addr.s_addr = ifa->ifa_address;
987                 goto rarok;
988
989         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
990                 sin->sin_addr.s_addr = ifa->ifa_mask;
991                 goto rarok;
992
993         case SIOCSIFFLAGS:
994                 if (colon) {
995                         ret = -EADDRNOTAVAIL;
996                         if (!ifa)
997                                 break;
998                         ret = 0;
999                         if (!(ifr.ifr_flags & IFF_UP))
1000                                 inet_del_ifa(in_dev, ifap, 1);
1001                         break;
1002                 }
1003                 ret = dev_change_flags(dev, ifr.ifr_flags);
1004                 break;
1005
1006         case SIOCSIFADDR:       /* Set interface address (and family) */
1007                 ret = -EINVAL;
1008                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1009                         break;
1010
1011                 if (!ifa) {
1012                         ret = -ENOBUFS;
1013                         ifa = inet_alloc_ifa();
1014                         if (!ifa)
1015                                 break;
1016                         INIT_HLIST_NODE(&ifa->hash);
1017                         if (colon)
1018                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1019                         else
1020                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1021                 } else {
1022                         ret = 0;
1023                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1024                                 break;
1025                         inet_del_ifa(in_dev, ifap, 0);
1026                         ifa->ifa_broadcast = 0;
1027                         ifa->ifa_scope = 0;
1028                 }
1029
1030                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1031
1032                 if (!(dev->flags & IFF_POINTOPOINT)) {
1033                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1034                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1035                         if ((dev->flags & IFF_BROADCAST) &&
1036                             ifa->ifa_prefixlen < 31)
1037                                 ifa->ifa_broadcast = ifa->ifa_address |
1038                                                      ~ifa->ifa_mask;
1039                 } else {
1040                         ifa->ifa_prefixlen = 32;
1041                         ifa->ifa_mask = inet_make_mask(32);
1042                 }
1043                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1044                 ret = inet_set_ifa(dev, ifa);
1045                 break;
1046
1047         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1048                 ret = 0;
1049                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1050                         inet_del_ifa(in_dev, ifap, 0);
1051                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1052                         inet_insert_ifa(ifa);
1053                 }
1054                 break;
1055
1056         case SIOCSIFDSTADDR:    /* Set the destination address */
1057                 ret = 0;
1058                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1059                         break;
1060                 ret = -EINVAL;
1061                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1062                         break;
1063                 ret = 0;
1064                 inet_del_ifa(in_dev, ifap, 0);
1065                 ifa->ifa_address = sin->sin_addr.s_addr;
1066                 inet_insert_ifa(ifa);
1067                 break;
1068
1069         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1070
1071                 /*
1072                  *      The mask we set must be legal.
1073                  */
1074                 ret = -EINVAL;
1075                 if (bad_mask(sin->sin_addr.s_addr, 0))
1076                         break;
1077                 ret = 0;
1078                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1079                         __be32 old_mask = ifa->ifa_mask;
1080                         inet_del_ifa(in_dev, ifap, 0);
1081                         ifa->ifa_mask = sin->sin_addr.s_addr;
1082                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1083
1084                         /* See if current broadcast address matches
1085                          * with current netmask, then recalculate
1086                          * the broadcast address. Otherwise it's a
1087                          * funny address, so don't touch it since
1088                          * the user seems to know what (s)he's doing...
1089                          */
1090                         if ((dev->flags & IFF_BROADCAST) &&
1091                             (ifa->ifa_prefixlen < 31) &&
1092                             (ifa->ifa_broadcast ==
1093                              (ifa->ifa_local|~old_mask))) {
1094                                 ifa->ifa_broadcast = (ifa->ifa_local |
1095                                                       ~sin->sin_addr.s_addr);
1096                         }
1097                         inet_insert_ifa(ifa);
1098                 }
1099                 break;
1100         }
1101 done:
1102         rtnl_unlock();
1103 out:
1104         return ret;
1105 rarok:
1106         rtnl_unlock();
1107         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1108         goto out;
1109 }
1110
1111 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1112 {
1113         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1114         struct in_ifaddr *ifa;
1115         struct ifreq ifr;
1116         int done = 0;
1117
1118         if (!in_dev)
1119                 goto out;
1120
1121         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1122                 if (!buf) {
1123                         done += sizeof(ifr);
1124                         continue;
1125                 }
1126                 if (len < (int) sizeof(ifr))
1127                         break;
1128                 memset(&ifr, 0, sizeof(struct ifreq));
1129                 if (ifa->ifa_label)
1130                         strcpy(ifr.ifr_name, ifa->ifa_label);
1131                 else
1132                         strcpy(ifr.ifr_name, dev->name);
1133
1134                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1135                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1136                                                                 ifa->ifa_local;
1137
1138                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1139                         done = -EFAULT;
1140                         break;
1141                 }
1142                 buf  += sizeof(struct ifreq);
1143                 len  -= sizeof(struct ifreq);
1144                 done += sizeof(struct ifreq);
1145         }
1146 out:
1147         return done;
1148 }
1149
1150 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1151 {
1152         __be32 addr = 0;
1153         struct in_device *in_dev;
1154         struct net *net = dev_net(dev);
1155
1156         rcu_read_lock();
1157         in_dev = __in_dev_get_rcu(dev);
1158         if (!in_dev)
1159                 goto no_in_dev;
1160
1161         for_primary_ifa(in_dev) {
1162                 if (ifa->ifa_scope > scope)
1163                         continue;
1164                 if (!dst || inet_ifa_match(dst, ifa)) {
1165                         addr = ifa->ifa_local;
1166                         break;
1167                 }
1168                 if (!addr)
1169                         addr = ifa->ifa_local;
1170         } endfor_ifa(in_dev);
1171
1172         if (addr)
1173                 goto out_unlock;
1174 no_in_dev:
1175
1176         /* Not loopback addresses on loopback should be preferred
1177            in this case. It is importnat that lo is the first interface
1178            in dev_base list.
1179          */
1180         for_each_netdev_rcu(net, dev) {
1181                 in_dev = __in_dev_get_rcu(dev);
1182                 if (!in_dev)
1183                         continue;
1184
1185                 for_primary_ifa(in_dev) {
1186                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1187                             ifa->ifa_scope <= scope) {
1188                                 addr = ifa->ifa_local;
1189                                 goto out_unlock;
1190                         }
1191                 } endfor_ifa(in_dev);
1192         }
1193 out_unlock:
1194         rcu_read_unlock();
1195         return addr;
1196 }
1197 EXPORT_SYMBOL(inet_select_addr);
1198
1199 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1200                               __be32 local, int scope)
1201 {
1202         int same = 0;
1203         __be32 addr = 0;
1204
1205         for_ifa(in_dev) {
1206                 if (!addr &&
1207                     (local == ifa->ifa_local || !local) &&
1208                     ifa->ifa_scope <= scope) {
1209                         addr = ifa->ifa_local;
1210                         if (same)
1211                                 break;
1212                 }
1213                 if (!same) {
1214                         same = (!local || inet_ifa_match(local, ifa)) &&
1215                                 (!dst || inet_ifa_match(dst, ifa));
1216                         if (same && addr) {
1217                                 if (local || !dst)
1218                                         break;
1219                                 /* Is the selected addr into dst subnet? */
1220                                 if (inet_ifa_match(addr, ifa))
1221                                         break;
1222                                 /* No, then can we use new local src? */
1223                                 if (ifa->ifa_scope <= scope) {
1224                                         addr = ifa->ifa_local;
1225                                         break;
1226                                 }
1227                                 /* search for large dst subnet for addr */
1228                                 same = 0;
1229                         }
1230                 }
1231         } endfor_ifa(in_dev);
1232
1233         return same ? addr : 0;
1234 }
1235
1236 /*
1237  * Confirm that local IP address exists using wildcards:
1238  * - in_dev: only on this interface, 0=any interface
1239  * - dst: only in the same subnet as dst, 0=any dst
1240  * - local: address, 0=autoselect the local address
1241  * - scope: maximum allowed scope value for the local address
1242  */
1243 __be32 inet_confirm_addr(struct in_device *in_dev,
1244                          __be32 dst, __be32 local, int scope)
1245 {
1246         __be32 addr = 0;
1247         struct net_device *dev;
1248         struct net *net;
1249
1250         if (scope != RT_SCOPE_LINK)
1251                 return confirm_addr_indev(in_dev, dst, local, scope);
1252
1253         net = dev_net(in_dev->dev);
1254         rcu_read_lock();
1255         for_each_netdev_rcu(net, dev) {
1256                 in_dev = __in_dev_get_rcu(dev);
1257                 if (in_dev) {
1258                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1259                         if (addr)
1260                                 break;
1261                 }
1262         }
1263         rcu_read_unlock();
1264
1265         return addr;
1266 }
1267 EXPORT_SYMBOL(inet_confirm_addr);
1268
1269 /*
1270  *      Device notifier
1271  */
1272
1273 int register_inetaddr_notifier(struct notifier_block *nb)
1274 {
1275         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1276 }
1277 EXPORT_SYMBOL(register_inetaddr_notifier);
1278
1279 int unregister_inetaddr_notifier(struct notifier_block *nb)
1280 {
1281         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1282 }
1283 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1284
1285 /* Rename ifa_labels for a device name change. Make some effort to preserve
1286  * existing alias numbering and to create unique labels if possible.
1287 */
1288 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1289 {
1290         struct in_ifaddr *ifa;
1291         int named = 0;
1292
1293         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1294                 char old[IFNAMSIZ], *dot;
1295
1296                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1297                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1298                 if (named++ == 0)
1299                         goto skip;
1300                 dot = strchr(old, ':');
1301                 if (dot == NULL) {
1302                         sprintf(old, ":%d", named);
1303                         dot = old;
1304                 }
1305                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1306                         strcat(ifa->ifa_label, dot);
1307                 else
1308                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1309 skip:
1310                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1311         }
1312 }
1313
1314 static bool inetdev_valid_mtu(unsigned int mtu)
1315 {
1316         return mtu >= 68;
1317 }
1318
1319 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1320                                         struct in_device *in_dev)
1321
1322 {
1323         struct in_ifaddr *ifa;
1324
1325         for (ifa = in_dev->ifa_list; ifa;
1326              ifa = ifa->ifa_next) {
1327                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1328                          ifa->ifa_local, dev,
1329                          ifa->ifa_local, NULL,
1330                          dev->dev_addr, NULL);
1331         }
1332 }
1333
1334 /* Called only under RTNL semaphore */
1335
1336 static int inetdev_event(struct notifier_block *this, unsigned long event,
1337                          void *ptr)
1338 {
1339         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1340         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1341
1342         ASSERT_RTNL();
1343
1344         if (!in_dev) {
1345                 if (event == NETDEV_REGISTER) {
1346                         in_dev = inetdev_init(dev);
1347                         if (!in_dev)
1348                                 return notifier_from_errno(-ENOMEM);
1349                         if (dev->flags & IFF_LOOPBACK) {
1350                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1351                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1352                         }
1353                 } else if (event == NETDEV_CHANGEMTU) {
1354                         /* Re-enabling IP */
1355                         if (inetdev_valid_mtu(dev->mtu))
1356                                 in_dev = inetdev_init(dev);
1357                 }
1358                 goto out;
1359         }
1360
1361         switch (event) {
1362         case NETDEV_REGISTER:
1363                 pr_debug("%s: bug\n", __func__);
1364                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1365                 break;
1366         case NETDEV_UP:
1367                 if (!inetdev_valid_mtu(dev->mtu))
1368                         break;
1369                 if (dev->flags & IFF_LOOPBACK) {
1370                         struct in_ifaddr *ifa = inet_alloc_ifa();
1371
1372                         if (ifa) {
1373                                 INIT_HLIST_NODE(&ifa->hash);
1374                                 ifa->ifa_local =
1375                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1376                                 ifa->ifa_prefixlen = 8;
1377                                 ifa->ifa_mask = inet_make_mask(8);
1378                                 in_dev_hold(in_dev);
1379                                 ifa->ifa_dev = in_dev;
1380                                 ifa->ifa_scope = RT_SCOPE_HOST;
1381                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1382                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1383                                                  INFINITY_LIFE_TIME);
1384                                 inet_insert_ifa(ifa);
1385                         }
1386                 }
1387                 ip_mc_up(in_dev);
1388                 /* fall through */
1389         case NETDEV_CHANGEADDR:
1390                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1391                         break;
1392                 /* fall through */
1393         case NETDEV_NOTIFY_PEERS:
1394                 /* Send gratuitous ARP to notify of link change */
1395                 inetdev_send_gratuitous_arp(dev, in_dev);
1396                 break;
1397         case NETDEV_DOWN:
1398                 ip_mc_down(in_dev);
1399                 break;
1400         case NETDEV_PRE_TYPE_CHANGE:
1401                 ip_mc_unmap(in_dev);
1402                 break;
1403         case NETDEV_POST_TYPE_CHANGE:
1404                 ip_mc_remap(in_dev);
1405                 break;
1406         case NETDEV_CHANGEMTU:
1407                 if (inetdev_valid_mtu(dev->mtu))
1408                         break;
1409                 /* disable IP when MTU is not enough */
1410         case NETDEV_UNREGISTER:
1411                 inetdev_destroy(in_dev);
1412                 break;
1413         case NETDEV_CHANGENAME:
1414                 /* Do not notify about label change, this event is
1415                  * not interesting to applications using netlink.
1416                  */
1417                 inetdev_changename(dev, in_dev);
1418
1419                 devinet_sysctl_unregister(in_dev);
1420                 devinet_sysctl_register(in_dev);
1421                 break;
1422         }
1423 out:
1424         return NOTIFY_DONE;
1425 }
1426
1427 static struct notifier_block ip_netdev_notifier = {
1428         .notifier_call = inetdev_event,
1429 };
1430
1431 static size_t inet_nlmsg_size(void)
1432 {
1433         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1434                + nla_total_size(4) /* IFA_ADDRESS */
1435                + nla_total_size(4) /* IFA_LOCAL */
1436                + nla_total_size(4) /* IFA_BROADCAST */
1437                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1438 }
1439
1440 static inline u32 cstamp_delta(unsigned long cstamp)
1441 {
1442         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1443 }
1444
1445 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1446                          unsigned long tstamp, u32 preferred, u32 valid)
1447 {
1448         struct ifa_cacheinfo ci;
1449
1450         ci.cstamp = cstamp_delta(cstamp);
1451         ci.tstamp = cstamp_delta(tstamp);
1452         ci.ifa_prefered = preferred;
1453         ci.ifa_valid = valid;
1454
1455         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1456 }
1457
1458 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1459                             u32 portid, u32 seq, int event, unsigned int flags)
1460 {
1461         struct ifaddrmsg *ifm;
1462         struct nlmsghdr  *nlh;
1463         u32 preferred, valid;
1464
1465         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1466         if (nlh == NULL)
1467                 return -EMSGSIZE;
1468
1469         ifm = nlmsg_data(nlh);
1470         ifm->ifa_family = AF_INET;
1471         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1472         ifm->ifa_flags = ifa->ifa_flags;
1473         ifm->ifa_scope = ifa->ifa_scope;
1474         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1475
1476         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1477                 preferred = ifa->ifa_preferred_lft;
1478                 valid = ifa->ifa_valid_lft;
1479                 if (preferred != INFINITY_LIFE_TIME) {
1480                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1481
1482                         if (preferred > tval)
1483                                 preferred -= tval;
1484                         else
1485                                 preferred = 0;
1486                         if (valid != INFINITY_LIFE_TIME) {
1487                                 if (valid > tval)
1488                                         valid -= tval;
1489                                 else
1490                                         valid = 0;
1491                         }
1492                 }
1493         } else {
1494                 preferred = INFINITY_LIFE_TIME;
1495                 valid = INFINITY_LIFE_TIME;
1496         }
1497         if ((ifa->ifa_address &&
1498              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1499             (ifa->ifa_local &&
1500              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1501             (ifa->ifa_broadcast &&
1502              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1503             (ifa->ifa_label[0] &&
1504              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1505             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1506                           preferred, valid))
1507                 goto nla_put_failure;
1508
1509         return nlmsg_end(skb, nlh);
1510
1511 nla_put_failure:
1512         nlmsg_cancel(skb, nlh);
1513         return -EMSGSIZE;
1514 }
1515
1516 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1517 {
1518         struct net *net = sock_net(skb->sk);
1519         int h, s_h;
1520         int idx, s_idx;
1521         int ip_idx, s_ip_idx;
1522         struct net_device *dev;
1523         struct in_device *in_dev;
1524         struct in_ifaddr *ifa;
1525         struct hlist_head *head;
1526
1527         s_h = cb->args[0];
1528         s_idx = idx = cb->args[1];
1529         s_ip_idx = ip_idx = cb->args[2];
1530
1531         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1532                 idx = 0;
1533                 head = &net->dev_index_head[h];
1534                 rcu_read_lock();
1535                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1536                           net->dev_base_seq;
1537                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1538                         if (idx < s_idx)
1539                                 goto cont;
1540                         if (h > s_h || idx > s_idx)
1541                                 s_ip_idx = 0;
1542                         in_dev = __in_dev_get_rcu(dev);
1543                         if (!in_dev)
1544                                 goto cont;
1545
1546                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1547                              ifa = ifa->ifa_next, ip_idx++) {
1548                                 if (ip_idx < s_ip_idx)
1549                                         continue;
1550                                 if (inet_fill_ifaddr(skb, ifa,
1551                                              NETLINK_CB(cb->skb).portid,
1552                                              cb->nlh->nlmsg_seq,
1553                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1554                                         rcu_read_unlock();
1555                                         goto done;
1556                                 }
1557                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1558                         }
1559 cont:
1560                         idx++;
1561                 }
1562                 rcu_read_unlock();
1563         }
1564
1565 done:
1566         cb->args[0] = h;
1567         cb->args[1] = idx;
1568         cb->args[2] = ip_idx;
1569
1570         return skb->len;
1571 }
1572
1573 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1574                       u32 portid)
1575 {
1576         struct sk_buff *skb;
1577         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1578         int err = -ENOBUFS;
1579         struct net *net;
1580
1581         net = dev_net(ifa->ifa_dev->dev);
1582         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1583         if (skb == NULL)
1584                 goto errout;
1585
1586         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1587         if (err < 0) {
1588                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1589                 WARN_ON(err == -EMSGSIZE);
1590                 kfree_skb(skb);
1591                 goto errout;
1592         }
1593         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1594         return;
1595 errout:
1596         if (err < 0)
1597                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1598 }
1599
1600 static size_t inet_get_link_af_size(const struct net_device *dev)
1601 {
1602         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1603
1604         if (!in_dev)
1605                 return 0;
1606
1607         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1608 }
1609
1610 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1611 {
1612         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1613         struct nlattr *nla;
1614         int i;
1615
1616         if (!in_dev)
1617                 return -ENODATA;
1618
1619         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1620         if (nla == NULL)
1621                 return -EMSGSIZE;
1622
1623         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1624                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1625
1626         return 0;
1627 }
1628
1629 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1630         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1631 };
1632
1633 static int inet_validate_link_af(const struct net_device *dev,
1634                                  const struct nlattr *nla)
1635 {
1636         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1637         int err, rem;
1638
1639         if (dev && !__in_dev_get_rtnl(dev))
1640                 return -EAFNOSUPPORT;
1641
1642         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1643         if (err < 0)
1644                 return err;
1645
1646         if (tb[IFLA_INET_CONF]) {
1647                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1648                         int cfgid = nla_type(a);
1649
1650                         if (nla_len(a) < 4)
1651                                 return -EINVAL;
1652
1653                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1654                                 return -EINVAL;
1655                 }
1656         }
1657
1658         return 0;
1659 }
1660
1661 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1662 {
1663         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1664         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1665         int rem;
1666
1667         if (!in_dev)
1668                 return -EAFNOSUPPORT;
1669
1670         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1671                 BUG();
1672
1673         if (tb[IFLA_INET_CONF]) {
1674                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1675                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1676         }
1677
1678         return 0;
1679 }
1680
1681 static int inet_netconf_msgsize_devconf(int type)
1682 {
1683         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1684                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1685
1686         /* type -1 is used for ALL */
1687         if (type == -1 || type == NETCONFA_FORWARDING)
1688                 size += nla_total_size(4);
1689         if (type == -1 || type == NETCONFA_RP_FILTER)
1690                 size += nla_total_size(4);
1691         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1692                 size += nla_total_size(4);
1693
1694         return size;
1695 }
1696
1697 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1698                                      struct ipv4_devconf *devconf, u32 portid,
1699                                      u32 seq, int event, unsigned int flags,
1700                                      int type)
1701 {
1702         struct nlmsghdr  *nlh;
1703         struct netconfmsg *ncm;
1704
1705         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1706                         flags);
1707         if (nlh == NULL)
1708                 return -EMSGSIZE;
1709
1710         ncm = nlmsg_data(nlh);
1711         ncm->ncm_family = AF_INET;
1712
1713         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1714                 goto nla_put_failure;
1715
1716         /* type -1 is used for ALL */
1717         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1718             nla_put_s32(skb, NETCONFA_FORWARDING,
1719                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1720                 goto nla_put_failure;
1721         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1722             nla_put_s32(skb, NETCONFA_RP_FILTER,
1723                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1724                 goto nla_put_failure;
1725         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1726             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1727                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1728                 goto nla_put_failure;
1729
1730         return nlmsg_end(skb, nlh);
1731
1732 nla_put_failure:
1733         nlmsg_cancel(skb, nlh);
1734         return -EMSGSIZE;
1735 }
1736
1737 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1738                                  struct ipv4_devconf *devconf)
1739 {
1740         struct sk_buff *skb;
1741         int err = -ENOBUFS;
1742
1743         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1744         if (skb == NULL)
1745                 goto errout;
1746
1747         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1748                                         RTM_NEWNETCONF, 0, type);
1749         if (err < 0) {
1750                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1751                 WARN_ON(err == -EMSGSIZE);
1752                 kfree_skb(skb);
1753                 goto errout;
1754         }
1755         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1756         return;
1757 errout:
1758         if (err < 0)
1759                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1760 }
1761
1762 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1763         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1764         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1765         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1766 };
1767
1768 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1769                                     struct nlmsghdr *nlh)
1770 {
1771         struct net *net = sock_net(in_skb->sk);
1772         struct nlattr *tb[NETCONFA_MAX+1];
1773         struct netconfmsg *ncm;
1774         struct sk_buff *skb;
1775         struct ipv4_devconf *devconf;
1776         struct in_device *in_dev;
1777         struct net_device *dev;
1778         int ifindex;
1779         int err;
1780
1781         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1782                           devconf_ipv4_policy);
1783         if (err < 0)
1784                 goto errout;
1785
1786         err = EINVAL;
1787         if (!tb[NETCONFA_IFINDEX])
1788                 goto errout;
1789
1790         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1791         switch (ifindex) {
1792         case NETCONFA_IFINDEX_ALL:
1793                 devconf = net->ipv4.devconf_all;
1794                 break;
1795         case NETCONFA_IFINDEX_DEFAULT:
1796                 devconf = net->ipv4.devconf_dflt;
1797                 break;
1798         default:
1799                 dev = __dev_get_by_index(net, ifindex);
1800                 if (dev == NULL)
1801                         goto errout;
1802                 in_dev = __in_dev_get_rtnl(dev);
1803                 if (in_dev == NULL)
1804                         goto errout;
1805                 devconf = &in_dev->cnf;
1806                 break;
1807         }
1808
1809         err = -ENOBUFS;
1810         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1811         if (skb == NULL)
1812                 goto errout;
1813
1814         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1815                                         NETLINK_CB(in_skb).portid,
1816                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1817                                         -1);
1818         if (err < 0) {
1819                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1820                 WARN_ON(err == -EMSGSIZE);
1821                 kfree_skb(skb);
1822                 goto errout;
1823         }
1824         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1825 errout:
1826         return err;
1827 }
1828
1829 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1830                                      struct netlink_callback *cb)
1831 {
1832         struct net *net = sock_net(skb->sk);
1833         int h, s_h;
1834         int idx, s_idx;
1835         struct net_device *dev;
1836         struct in_device *in_dev;
1837         struct hlist_head *head;
1838
1839         s_h = cb->args[0];
1840         s_idx = idx = cb->args[1];
1841
1842         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1843                 idx = 0;
1844                 head = &net->dev_index_head[h];
1845                 rcu_read_lock();
1846                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1847                           net->dev_base_seq;
1848                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1849                         if (idx < s_idx)
1850                                 goto cont;
1851                         in_dev = __in_dev_get_rcu(dev);
1852                         if (!in_dev)
1853                                 goto cont;
1854
1855                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1856                                                       &in_dev->cnf,
1857                                                       NETLINK_CB(cb->skb).portid,
1858                                                       cb->nlh->nlmsg_seq,
1859                                                       RTM_NEWNETCONF,
1860                                                       NLM_F_MULTI,
1861                                                       -1) <= 0) {
1862                                 rcu_read_unlock();
1863                                 goto done;
1864                         }
1865                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1866 cont:
1867                         idx++;
1868                 }
1869                 rcu_read_unlock();
1870         }
1871         if (h == NETDEV_HASHENTRIES) {
1872                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1873                                               net->ipv4.devconf_all,
1874                                               NETLINK_CB(cb->skb).portid,
1875                                               cb->nlh->nlmsg_seq,
1876                                               RTM_NEWNETCONF, NLM_F_MULTI,
1877                                               -1) <= 0)
1878                         goto done;
1879                 else
1880                         h++;
1881         }
1882         if (h == NETDEV_HASHENTRIES + 1) {
1883                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1884                                               net->ipv4.devconf_dflt,
1885                                               NETLINK_CB(cb->skb).portid,
1886                                               cb->nlh->nlmsg_seq,
1887                                               RTM_NEWNETCONF, NLM_F_MULTI,
1888                                               -1) <= 0)
1889                         goto done;
1890                 else
1891                         h++;
1892         }
1893 done:
1894         cb->args[0] = h;
1895         cb->args[1] = idx;
1896
1897         return skb->len;
1898 }
1899
1900 #ifdef CONFIG_SYSCTL
1901
1902 static void devinet_copy_dflt_conf(struct net *net, int i)
1903 {
1904         struct net_device *dev;
1905
1906         rcu_read_lock();
1907         for_each_netdev_rcu(net, dev) {
1908                 struct in_device *in_dev;
1909
1910                 in_dev = __in_dev_get_rcu(dev);
1911                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1912                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1913         }
1914         rcu_read_unlock();
1915 }
1916
1917 /* called with RTNL locked */
1918 static void inet_forward_change(struct net *net)
1919 {
1920         struct net_device *dev;
1921         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1922
1923         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1924         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1925         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1926                                     NETCONFA_IFINDEX_ALL,
1927                                     net->ipv4.devconf_all);
1928         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929                                     NETCONFA_IFINDEX_DEFAULT,
1930                                     net->ipv4.devconf_dflt);
1931
1932         for_each_netdev(net, dev) {
1933                 struct in_device *in_dev;
1934                 if (on)
1935                         dev_disable_lro(dev);
1936                 rcu_read_lock();
1937                 in_dev = __in_dev_get_rcu(dev);
1938                 if (in_dev) {
1939                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1940                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941                                                     dev->ifindex, &in_dev->cnf);
1942                 }
1943                 rcu_read_unlock();
1944         }
1945 }
1946
1947 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1948                              void __user *buffer,
1949                              size_t *lenp, loff_t *ppos)
1950 {
1951         int old_value = *(int *)ctl->data;
1952         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1953         int new_value = *(int *)ctl->data;
1954
1955         if (write) {
1956                 struct ipv4_devconf *cnf = ctl->extra1;
1957                 struct net *net = ctl->extra2;
1958                 int i = (int *)ctl->data - cnf->data;
1959
1960                 set_bit(i, cnf->state);
1961
1962                 if (cnf == net->ipv4.devconf_dflt)
1963                         devinet_copy_dflt_conf(net, i);
1964                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1965                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1966                         if ((new_value == 0) && (old_value != 0))
1967                                 rt_cache_flush(net);
1968                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1969                     new_value != old_value) {
1970                         int ifindex;
1971
1972                         if (cnf == net->ipv4.devconf_dflt)
1973                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1974                         else if (cnf == net->ipv4.devconf_all)
1975                                 ifindex = NETCONFA_IFINDEX_ALL;
1976                         else {
1977                                 struct in_device *idev =
1978                                         container_of(cnf, struct in_device,
1979                                                      cnf);
1980                                 ifindex = idev->dev->ifindex;
1981                         }
1982                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1983                                                     ifindex, cnf);
1984                 }
1985         }
1986
1987         return ret;
1988 }
1989
1990 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1991                                   void __user *buffer,
1992                                   size_t *lenp, loff_t *ppos)
1993 {
1994         int *valp = ctl->data;
1995         int val = *valp;
1996         loff_t pos = *ppos;
1997         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1998
1999         if (write && *valp != val) {
2000                 struct net *net = ctl->extra2;
2001
2002                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2003                         if (!rtnl_trylock()) {
2004                                 /* Restore the original values before restarting */
2005                                 *valp = val;
2006                                 *ppos = pos;
2007                                 return restart_syscall();
2008                         }
2009                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2010                                 inet_forward_change(net);
2011                         } else {
2012                                 struct ipv4_devconf *cnf = ctl->extra1;
2013                                 struct in_device *idev =
2014                                         container_of(cnf, struct in_device, cnf);
2015                                 if (*valp)
2016                                         dev_disable_lro(idev->dev);
2017                                 inet_netconf_notify_devconf(net,
2018                                                             NETCONFA_FORWARDING,
2019                                                             idev->dev->ifindex,
2020                                                             cnf);
2021                         }
2022                         rtnl_unlock();
2023                         rt_cache_flush(net);
2024                 } else
2025                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2026                                                     NETCONFA_IFINDEX_DEFAULT,
2027                                                     net->ipv4.devconf_dflt);
2028         }
2029
2030         return ret;
2031 }
2032
2033 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2034                                 void __user *buffer,
2035                                 size_t *lenp, loff_t *ppos)
2036 {
2037         int *valp = ctl->data;
2038         int val = *valp;
2039         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2040         struct net *net = ctl->extra2;
2041
2042         if (write && *valp != val)
2043                 rt_cache_flush(net);
2044
2045         return ret;
2046 }
2047
2048 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2049         { \
2050                 .procname       = name, \
2051                 .data           = ipv4_devconf.data + \
2052                                   IPV4_DEVCONF_ ## attr - 1, \
2053                 .maxlen         = sizeof(int), \
2054                 .mode           = mval, \
2055                 .proc_handler   = proc, \
2056                 .extra1         = &ipv4_devconf, \
2057         }
2058
2059 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2060         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2061
2062 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2063         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2064
2065 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2066         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2067
2068 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2069         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2070
2071 static struct devinet_sysctl_table {
2072         struct ctl_table_header *sysctl_header;
2073         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2074 } devinet_sysctl = {
2075         .devinet_vars = {
2076                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2077                                              devinet_sysctl_forward),
2078                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2079
2080                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2081                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2082                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2083                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2084                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2085                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2086                                         "accept_source_route"),
2087                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2088                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2089                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2090                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2091                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2092                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2093                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2094                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2095                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2096                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2097                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2098                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2099                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2100
2101                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2102                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2103                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2104                                               "force_igmp_version"),
2105                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2106                                               "promote_secondaries"),
2107                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2108                                               "route_localnet"),
2109         },
2110 };
2111
2112 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2113                                         struct ipv4_devconf *p)
2114 {
2115         int i;
2116         struct devinet_sysctl_table *t;
2117         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2118
2119         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2120         if (!t)
2121                 goto out;
2122
2123         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2124                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2125                 t->devinet_vars[i].extra1 = p;
2126                 t->devinet_vars[i].extra2 = net;
2127         }
2128
2129         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2130
2131         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2132         if (!t->sysctl_header)
2133                 goto free;
2134
2135         p->sysctl = t;
2136         return 0;
2137
2138 free:
2139         kfree(t);
2140 out:
2141         return -ENOBUFS;
2142 }
2143
2144 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2145 {
2146         struct devinet_sysctl_table *t = cnf->sysctl;
2147
2148         if (t == NULL)
2149                 return;
2150
2151         cnf->sysctl = NULL;
2152         unregister_net_sysctl_table(t->sysctl_header);
2153         kfree(t);
2154 }
2155
2156 static void devinet_sysctl_register(struct in_device *idev)
2157 {
2158         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2159         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2160                                         &idev->cnf);
2161 }
2162
2163 static void devinet_sysctl_unregister(struct in_device *idev)
2164 {
2165         __devinet_sysctl_unregister(&idev->cnf);
2166         neigh_sysctl_unregister(idev->arp_parms);
2167 }
2168
2169 static struct ctl_table ctl_forward_entry[] = {
2170         {
2171                 .procname       = "ip_forward",
2172                 .data           = &ipv4_devconf.data[
2173                                         IPV4_DEVCONF_FORWARDING - 1],
2174                 .maxlen         = sizeof(int),
2175                 .mode           = 0644,
2176                 .proc_handler   = devinet_sysctl_forward,
2177                 .extra1         = &ipv4_devconf,
2178                 .extra2         = &init_net,
2179         },
2180         { },
2181 };
2182 #endif
2183
2184 static __net_init int devinet_init_net(struct net *net)
2185 {
2186         int err;
2187         struct ipv4_devconf *all, *dflt;
2188 #ifdef CONFIG_SYSCTL
2189         struct ctl_table *tbl = ctl_forward_entry;
2190         struct ctl_table_header *forw_hdr;
2191 #endif
2192
2193         err = -ENOMEM;
2194         all = &ipv4_devconf;
2195         dflt = &ipv4_devconf_dflt;
2196
2197         if (!net_eq(net, &init_net)) {
2198                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2199                 if (all == NULL)
2200                         goto err_alloc_all;
2201
2202                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2203                 if (dflt == NULL)
2204                         goto err_alloc_dflt;
2205
2206 #ifdef CONFIG_SYSCTL
2207                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2208                 if (tbl == NULL)
2209                         goto err_alloc_ctl;
2210
2211                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2212                 tbl[0].extra1 = all;
2213                 tbl[0].extra2 = net;
2214 #endif
2215         }
2216
2217 #ifdef CONFIG_SYSCTL
2218         err = __devinet_sysctl_register(net, "all", all);
2219         if (err < 0)
2220                 goto err_reg_all;
2221
2222         err = __devinet_sysctl_register(net, "default", dflt);
2223         if (err < 0)
2224                 goto err_reg_dflt;
2225
2226         err = -ENOMEM;
2227         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2228         if (forw_hdr == NULL)
2229                 goto err_reg_ctl;
2230         net->ipv4.forw_hdr = forw_hdr;
2231 #endif
2232
2233         net->ipv4.devconf_all = all;
2234         net->ipv4.devconf_dflt = dflt;
2235         return 0;
2236
2237 #ifdef CONFIG_SYSCTL
2238 err_reg_ctl:
2239         __devinet_sysctl_unregister(dflt);
2240 err_reg_dflt:
2241         __devinet_sysctl_unregister(all);
2242 err_reg_all:
2243         if (tbl != ctl_forward_entry)
2244                 kfree(tbl);
2245 err_alloc_ctl:
2246 #endif
2247         if (dflt != &ipv4_devconf_dflt)
2248                 kfree(dflt);
2249 err_alloc_dflt:
2250         if (all != &ipv4_devconf)
2251                 kfree(all);
2252 err_alloc_all:
2253         return err;
2254 }
2255
2256 static __net_exit void devinet_exit_net(struct net *net)
2257 {
2258 #ifdef CONFIG_SYSCTL
2259         struct ctl_table *tbl;
2260
2261         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2262         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2263         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2264         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2265         kfree(tbl);
2266 #endif
2267         kfree(net->ipv4.devconf_dflt);
2268         kfree(net->ipv4.devconf_all);
2269 }
2270
2271 static __net_initdata struct pernet_operations devinet_ops = {
2272         .init = devinet_init_net,
2273         .exit = devinet_exit_net,
2274 };
2275
2276 static struct rtnl_af_ops inet_af_ops = {
2277         .family           = AF_INET,
2278         .fill_link_af     = inet_fill_link_af,
2279         .get_link_af_size = inet_get_link_af_size,
2280         .validate_link_af = inet_validate_link_af,
2281         .set_link_af      = inet_set_link_af,
2282 };
2283
2284 void __init devinet_init(void)
2285 {
2286         int i;
2287
2288         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2289                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2290
2291         register_pernet_subsys(&devinet_ops);
2292
2293         register_gifconf(PF_INET, inet_gifconf);
2294         register_netdevice_notifier(&ip_netdev_notifier);
2295
2296         schedule_delayed_work(&check_lifetime_work, 0);
2297
2298         rtnl_af_register(&inet_af_ops);
2299
2300         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2301         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2302         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2303         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2304                       inet_netconf_dump_devconf, NULL);
2305 }
2306