]> rtime.felk.cvut.cz Git - linux-imx.git/blob - net/ipv4/devinet.c
virtio-scsi: Fix virtqueue affinity setup
[linux-imx.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218         kfree(rcu_dereference_protected(idev->mc_hash, 1));
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static void check_lifetime(struct work_struct *work);
423
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427                              u32 portid)
428 {
429         struct in_device *in_dev = ifa->ifa_dev;
430         struct in_ifaddr *ifa1, **ifap, **last_primary;
431
432         ASSERT_RTNL();
433
434         if (!ifa->ifa_local) {
435                 inet_free_ifa(ifa);
436                 return 0;
437         }
438
439         ifa->ifa_flags &= ~IFA_F_SECONDARY;
440         last_primary = &in_dev->ifa_list;
441
442         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443              ifap = &ifa1->ifa_next) {
444                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445                     ifa->ifa_scope <= ifa1->ifa_scope)
446                         last_primary = &ifa1->ifa_next;
447                 if (ifa1->ifa_mask == ifa->ifa_mask &&
448                     inet_ifa_match(ifa1->ifa_address, ifa)) {
449                         if (ifa1->ifa_local == ifa->ifa_local) {
450                                 inet_free_ifa(ifa);
451                                 return -EEXIST;
452                         }
453                         if (ifa1->ifa_scope != ifa->ifa_scope) {
454                                 inet_free_ifa(ifa);
455                                 return -EINVAL;
456                         }
457                         ifa->ifa_flags |= IFA_F_SECONDARY;
458                 }
459         }
460
461         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462                 net_srandom(ifa->ifa_local);
463                 ifap = last_primary;
464         }
465
466         ifa->ifa_next = *ifap;
467         *ifap = ifa;
468
469         inet_hash_insert(dev_net(in_dev->dev), ifa);
470
471         cancel_delayed_work(&check_lifetime_work);
472         schedule_delayed_work(&check_lifetime_work, 0);
473
474         /* Send message first, then call notifier.
475            Notifier will trigger FIB update, so that
476            listeners of netlink will know about new ifaddr */
477         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479
480         return 0;
481 }
482
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485         return __inet_insert_ifa(ifa, NULL, 0);
486 }
487
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490         struct in_device *in_dev = __in_dev_get_rtnl(dev);
491
492         ASSERT_RTNL();
493
494         if (!in_dev) {
495                 inet_free_ifa(ifa);
496                 return -ENOBUFS;
497         }
498         ipv4_devconf_setall(in_dev);
499         if (ifa->ifa_dev != in_dev) {
500                 WARN_ON(ifa->ifa_dev);
501                 in_dev_hold(in_dev);
502                 ifa->ifa_dev = in_dev;
503         }
504         if (ipv4_is_loopback(ifa->ifa_local))
505                 ifa->ifa_scope = RT_SCOPE_HOST;
506         return inet_insert_ifa(ifa);
507 }
508
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514         struct net_device *dev;
515         struct in_device *in_dev = NULL;
516
517         rcu_read_lock();
518         dev = dev_get_by_index_rcu(net, ifindex);
519         if (dev)
520                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521         rcu_read_unlock();
522         return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525
526 /* Called only from RTNL semaphored context. No locks. */
527
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529                                     __be32 mask)
530 {
531         ASSERT_RTNL();
532
533         for_primary_ifa(in_dev) {
534                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535                         return ifa;
536         } endfor_ifa(in_dev);
537         return NULL;
538 }
539
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
541 {
542         struct net *net = sock_net(skb->sk);
543         struct nlattr *tb[IFA_MAX+1];
544         struct in_device *in_dev;
545         struct ifaddrmsg *ifm;
546         struct in_ifaddr *ifa, **ifap;
547         int err = -EINVAL;
548
549         ASSERT_RTNL();
550
551         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552         if (err < 0)
553                 goto errout;
554
555         ifm = nlmsg_data(nlh);
556         in_dev = inetdev_by_index(net, ifm->ifa_index);
557         if (in_dev == NULL) {
558                 err = -ENODEV;
559                 goto errout;
560         }
561
562         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563              ifap = &ifa->ifa_next) {
564                 if (tb[IFA_LOCAL] &&
565                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566                         continue;
567
568                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569                         continue;
570
571                 if (tb[IFA_ADDRESS] &&
572                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574                         continue;
575
576                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577                 return 0;
578         }
579
580         err = -EADDRNOTAVAIL;
581 errout:
582         return err;
583 }
584
585 #define INFINITY_LIFE_TIME      0xFFFFFFFF
586
587 static void check_lifetime(struct work_struct *work)
588 {
589         unsigned long now, next, next_sec, next_sched;
590         struct in_ifaddr *ifa;
591         struct hlist_node *n;
592         int i;
593
594         now = jiffies;
595         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596
597         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
598                 bool change_needed = false;
599
600                 rcu_read_lock();
601                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
602                         unsigned long age;
603
604                         if (ifa->ifa_flags & IFA_F_PERMANENT)
605                                 continue;
606
607                         /* We try to batch several events at once. */
608                         age = (now - ifa->ifa_tstamp +
609                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
610
611                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
612                             age >= ifa->ifa_valid_lft) {
613                                 change_needed = true;
614                         } else if (ifa->ifa_preferred_lft ==
615                                    INFINITY_LIFE_TIME) {
616                                 continue;
617                         } else if (age >= ifa->ifa_preferred_lft) {
618                                 if (time_before(ifa->ifa_tstamp +
619                                                 ifa->ifa_valid_lft * HZ, next))
620                                         next = ifa->ifa_tstamp +
621                                                ifa->ifa_valid_lft * HZ;
622
623                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
624                                         change_needed = true;
625                         } else if (time_before(ifa->ifa_tstamp +
626                                                ifa->ifa_preferred_lft * HZ,
627                                                next)) {
628                                 next = ifa->ifa_tstamp +
629                                        ifa->ifa_preferred_lft * HZ;
630                         }
631                 }
632                 rcu_read_unlock();
633                 if (!change_needed)
634                         continue;
635                 rtnl_lock();
636                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
637                         unsigned long age;
638
639                         if (ifa->ifa_flags & IFA_F_PERMANENT)
640                                 continue;
641
642                         /* We try to batch several events at once. */
643                         age = (now - ifa->ifa_tstamp +
644                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
645
646                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
647                             age >= ifa->ifa_valid_lft) {
648                                 struct in_ifaddr **ifap;
649
650                                 for (ifap = &ifa->ifa_dev->ifa_list;
651                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
652                                         if (*ifap == ifa) {
653                                                 inet_del_ifa(ifa->ifa_dev,
654                                                              ifap, 1);
655                                                 break;
656                                         }
657                                 }
658                         } else if (ifa->ifa_preferred_lft !=
659                                    INFINITY_LIFE_TIME &&
660                                    age >= ifa->ifa_preferred_lft &&
661                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
662                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
663                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
664                         }
665                 }
666                 rtnl_unlock();
667         }
668
669         next_sec = round_jiffies_up(next);
670         next_sched = next;
671
672         /* If rounded timeout is accurate enough, accept it. */
673         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
674                 next_sched = next_sec;
675
676         now = jiffies;
677         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
678         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
679                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
680
681         schedule_delayed_work(&check_lifetime_work, next_sched - now);
682 }
683
684 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
685                              __u32 prefered_lft)
686 {
687         unsigned long timeout;
688
689         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
690
691         timeout = addrconf_timeout_fixup(valid_lft, HZ);
692         if (addrconf_finite_timeout(timeout))
693                 ifa->ifa_valid_lft = timeout;
694         else
695                 ifa->ifa_flags |= IFA_F_PERMANENT;
696
697         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
698         if (addrconf_finite_timeout(timeout)) {
699                 if (timeout == 0)
700                         ifa->ifa_flags |= IFA_F_DEPRECATED;
701                 ifa->ifa_preferred_lft = timeout;
702         }
703         ifa->ifa_tstamp = jiffies;
704         if (!ifa->ifa_cstamp)
705                 ifa->ifa_cstamp = ifa->ifa_tstamp;
706 }
707
708 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
709                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
710 {
711         struct nlattr *tb[IFA_MAX+1];
712         struct in_ifaddr *ifa;
713         struct ifaddrmsg *ifm;
714         struct net_device *dev;
715         struct in_device *in_dev;
716         int err;
717
718         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
719         if (err < 0)
720                 goto errout;
721
722         ifm = nlmsg_data(nlh);
723         err = -EINVAL;
724         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
725                 goto errout;
726
727         dev = __dev_get_by_index(net, ifm->ifa_index);
728         err = -ENODEV;
729         if (dev == NULL)
730                 goto errout;
731
732         in_dev = __in_dev_get_rtnl(dev);
733         err = -ENOBUFS;
734         if (in_dev == NULL)
735                 goto errout;
736
737         ifa = inet_alloc_ifa();
738         if (ifa == NULL)
739                 /*
740                  * A potential indev allocation can be left alive, it stays
741                  * assigned to its device and is destroy with it.
742                  */
743                 goto errout;
744
745         ipv4_devconf_setall(in_dev);
746         in_dev_hold(in_dev);
747
748         if (tb[IFA_ADDRESS] == NULL)
749                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
750
751         INIT_HLIST_NODE(&ifa->hash);
752         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
753         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
754         ifa->ifa_flags = ifm->ifa_flags;
755         ifa->ifa_scope = ifm->ifa_scope;
756         ifa->ifa_dev = in_dev;
757
758         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
759         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
760
761         if (tb[IFA_BROADCAST])
762                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
763
764         if (tb[IFA_LABEL])
765                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
766         else
767                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
768
769         if (tb[IFA_CACHEINFO]) {
770                 struct ifa_cacheinfo *ci;
771
772                 ci = nla_data(tb[IFA_CACHEINFO]);
773                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
774                         err = -EINVAL;
775                         goto errout;
776                 }
777                 *pvalid_lft = ci->ifa_valid;
778                 *pprefered_lft = ci->ifa_prefered;
779         }
780
781         return ifa;
782
783 errout:
784         return ERR_PTR(err);
785 }
786
787 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
788 {
789         struct in_device *in_dev = ifa->ifa_dev;
790         struct in_ifaddr *ifa1, **ifap;
791
792         if (!ifa->ifa_local)
793                 return NULL;
794
795         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
796              ifap = &ifa1->ifa_next) {
797                 if (ifa1->ifa_mask == ifa->ifa_mask &&
798                     inet_ifa_match(ifa1->ifa_address, ifa) &&
799                     ifa1->ifa_local == ifa->ifa_local)
800                         return ifa1;
801         }
802         return NULL;
803 }
804
805 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
806 {
807         struct net *net = sock_net(skb->sk);
808         struct in_ifaddr *ifa;
809         struct in_ifaddr *ifa_existing;
810         __u32 valid_lft = INFINITY_LIFE_TIME;
811         __u32 prefered_lft = INFINITY_LIFE_TIME;
812
813         ASSERT_RTNL();
814
815         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
816         if (IS_ERR(ifa))
817                 return PTR_ERR(ifa);
818
819         ifa_existing = find_matching_ifa(ifa);
820         if (!ifa_existing) {
821                 /* It would be best to check for !NLM_F_CREATE here but
822                  * userspace alreay relies on not having to provide this.
823                  */
824                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
825                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
826         } else {
827                 inet_free_ifa(ifa);
828
829                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
830                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
831                         return -EEXIST;
832                 ifa = ifa_existing;
833                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
834                 cancel_delayed_work(&check_lifetime_work);
835                 schedule_delayed_work(&check_lifetime_work, 0);
836                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
837                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
838         }
839         return 0;
840 }
841
842 /*
843  *      Determine a default network mask, based on the IP address.
844  */
845
846 static int inet_abc_len(__be32 addr)
847 {
848         int rc = -1;    /* Something else, probably a multicast. */
849
850         if (ipv4_is_zeronet(addr))
851                 rc = 0;
852         else {
853                 __u32 haddr = ntohl(addr);
854
855                 if (IN_CLASSA(haddr))
856                         rc = 8;
857                 else if (IN_CLASSB(haddr))
858                         rc = 16;
859                 else if (IN_CLASSC(haddr))
860                         rc = 24;
861         }
862
863         return rc;
864 }
865
866
867 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
868 {
869         struct ifreq ifr;
870         struct sockaddr_in sin_orig;
871         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
872         struct in_device *in_dev;
873         struct in_ifaddr **ifap = NULL;
874         struct in_ifaddr *ifa = NULL;
875         struct net_device *dev;
876         char *colon;
877         int ret = -EFAULT;
878         int tryaddrmatch = 0;
879
880         /*
881          *      Fetch the caller's info block into kernel space
882          */
883
884         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
885                 goto out;
886         ifr.ifr_name[IFNAMSIZ - 1] = 0;
887
888         /* save original address for comparison */
889         memcpy(&sin_orig, sin, sizeof(*sin));
890
891         colon = strchr(ifr.ifr_name, ':');
892         if (colon)
893                 *colon = 0;
894
895         dev_load(net, ifr.ifr_name);
896
897         switch (cmd) {
898         case SIOCGIFADDR:       /* Get interface address */
899         case SIOCGIFBRDADDR:    /* Get the broadcast address */
900         case SIOCGIFDSTADDR:    /* Get the destination address */
901         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
902                 /* Note that these ioctls will not sleep,
903                    so that we do not impose a lock.
904                    One day we will be forced to put shlock here (I mean SMP)
905                  */
906                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
907                 memset(sin, 0, sizeof(*sin));
908                 sin->sin_family = AF_INET;
909                 break;
910
911         case SIOCSIFFLAGS:
912                 ret = -EPERM;
913                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
914                         goto out;
915                 break;
916         case SIOCSIFADDR:       /* Set interface address (and family) */
917         case SIOCSIFBRDADDR:    /* Set the broadcast address */
918         case SIOCSIFDSTADDR:    /* Set the destination address */
919         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
920                 ret = -EPERM;
921                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
922                         goto out;
923                 ret = -EINVAL;
924                 if (sin->sin_family != AF_INET)
925                         goto out;
926                 break;
927         default:
928                 ret = -EINVAL;
929                 goto out;
930         }
931
932         rtnl_lock();
933
934         ret = -ENODEV;
935         dev = __dev_get_by_name(net, ifr.ifr_name);
936         if (!dev)
937                 goto done;
938
939         if (colon)
940                 *colon = ':';
941
942         in_dev = __in_dev_get_rtnl(dev);
943         if (in_dev) {
944                 if (tryaddrmatch) {
945                         /* Matthias Andree */
946                         /* compare label and address (4.4BSD style) */
947                         /* note: we only do this for a limited set of ioctls
948                            and only if the original address family was AF_INET.
949                            This is checked above. */
950                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
951                              ifap = &ifa->ifa_next) {
952                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
953                                     sin_orig.sin_addr.s_addr ==
954                                                         ifa->ifa_local) {
955                                         break; /* found */
956                                 }
957                         }
958                 }
959                 /* we didn't get a match, maybe the application is
960                    4.3BSD-style and passed in junk so we fall back to
961                    comparing just the label */
962                 if (!ifa) {
963                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
964                              ifap = &ifa->ifa_next)
965                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
966                                         break;
967                 }
968         }
969
970         ret = -EADDRNOTAVAIL;
971         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
972                 goto done;
973
974         switch (cmd) {
975         case SIOCGIFADDR:       /* Get interface address */
976                 sin->sin_addr.s_addr = ifa->ifa_local;
977                 goto rarok;
978
979         case SIOCGIFBRDADDR:    /* Get the broadcast address */
980                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
981                 goto rarok;
982
983         case SIOCGIFDSTADDR:    /* Get the destination address */
984                 sin->sin_addr.s_addr = ifa->ifa_address;
985                 goto rarok;
986
987         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
988                 sin->sin_addr.s_addr = ifa->ifa_mask;
989                 goto rarok;
990
991         case SIOCSIFFLAGS:
992                 if (colon) {
993                         ret = -EADDRNOTAVAIL;
994                         if (!ifa)
995                                 break;
996                         ret = 0;
997                         if (!(ifr.ifr_flags & IFF_UP))
998                                 inet_del_ifa(in_dev, ifap, 1);
999                         break;
1000                 }
1001                 ret = dev_change_flags(dev, ifr.ifr_flags);
1002                 break;
1003
1004         case SIOCSIFADDR:       /* Set interface address (and family) */
1005                 ret = -EINVAL;
1006                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1007                         break;
1008
1009                 if (!ifa) {
1010                         ret = -ENOBUFS;
1011                         ifa = inet_alloc_ifa();
1012                         if (!ifa)
1013                                 break;
1014                         INIT_HLIST_NODE(&ifa->hash);
1015                         if (colon)
1016                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1017                         else
1018                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1019                 } else {
1020                         ret = 0;
1021                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1022                                 break;
1023                         inet_del_ifa(in_dev, ifap, 0);
1024                         ifa->ifa_broadcast = 0;
1025                         ifa->ifa_scope = 0;
1026                 }
1027
1028                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1029
1030                 if (!(dev->flags & IFF_POINTOPOINT)) {
1031                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1032                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1033                         if ((dev->flags & IFF_BROADCAST) &&
1034                             ifa->ifa_prefixlen < 31)
1035                                 ifa->ifa_broadcast = ifa->ifa_address |
1036                                                      ~ifa->ifa_mask;
1037                 } else {
1038                         ifa->ifa_prefixlen = 32;
1039                         ifa->ifa_mask = inet_make_mask(32);
1040                 }
1041                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1042                 ret = inet_set_ifa(dev, ifa);
1043                 break;
1044
1045         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1046                 ret = 0;
1047                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1048                         inet_del_ifa(in_dev, ifap, 0);
1049                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1050                         inet_insert_ifa(ifa);
1051                 }
1052                 break;
1053
1054         case SIOCSIFDSTADDR:    /* Set the destination address */
1055                 ret = 0;
1056                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1057                         break;
1058                 ret = -EINVAL;
1059                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1060                         break;
1061                 ret = 0;
1062                 inet_del_ifa(in_dev, ifap, 0);
1063                 ifa->ifa_address = sin->sin_addr.s_addr;
1064                 inet_insert_ifa(ifa);
1065                 break;
1066
1067         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1068
1069                 /*
1070                  *      The mask we set must be legal.
1071                  */
1072                 ret = -EINVAL;
1073                 if (bad_mask(sin->sin_addr.s_addr, 0))
1074                         break;
1075                 ret = 0;
1076                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1077                         __be32 old_mask = ifa->ifa_mask;
1078                         inet_del_ifa(in_dev, ifap, 0);
1079                         ifa->ifa_mask = sin->sin_addr.s_addr;
1080                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1081
1082                         /* See if current broadcast address matches
1083                          * with current netmask, then recalculate
1084                          * the broadcast address. Otherwise it's a
1085                          * funny address, so don't touch it since
1086                          * the user seems to know what (s)he's doing...
1087                          */
1088                         if ((dev->flags & IFF_BROADCAST) &&
1089                             (ifa->ifa_prefixlen < 31) &&
1090                             (ifa->ifa_broadcast ==
1091                              (ifa->ifa_local|~old_mask))) {
1092                                 ifa->ifa_broadcast = (ifa->ifa_local |
1093                                                       ~sin->sin_addr.s_addr);
1094                         }
1095                         inet_insert_ifa(ifa);
1096                 }
1097                 break;
1098         }
1099 done:
1100         rtnl_unlock();
1101 out:
1102         return ret;
1103 rarok:
1104         rtnl_unlock();
1105         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1106         goto out;
1107 }
1108
1109 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1110 {
1111         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1112         struct in_ifaddr *ifa;
1113         struct ifreq ifr;
1114         int done = 0;
1115
1116         if (!in_dev)
1117                 goto out;
1118
1119         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1120                 if (!buf) {
1121                         done += sizeof(ifr);
1122                         continue;
1123                 }
1124                 if (len < (int) sizeof(ifr))
1125                         break;
1126                 memset(&ifr, 0, sizeof(struct ifreq));
1127                 if (ifa->ifa_label)
1128                         strcpy(ifr.ifr_name, ifa->ifa_label);
1129                 else
1130                         strcpy(ifr.ifr_name, dev->name);
1131
1132                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1133                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1134                                                                 ifa->ifa_local;
1135
1136                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1137                         done = -EFAULT;
1138                         break;
1139                 }
1140                 buf  += sizeof(struct ifreq);
1141                 len  -= sizeof(struct ifreq);
1142                 done += sizeof(struct ifreq);
1143         }
1144 out:
1145         return done;
1146 }
1147
1148 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1149 {
1150         __be32 addr = 0;
1151         struct in_device *in_dev;
1152         struct net *net = dev_net(dev);
1153
1154         rcu_read_lock();
1155         in_dev = __in_dev_get_rcu(dev);
1156         if (!in_dev)
1157                 goto no_in_dev;
1158
1159         for_primary_ifa(in_dev) {
1160                 if (ifa->ifa_scope > scope)
1161                         continue;
1162                 if (!dst || inet_ifa_match(dst, ifa)) {
1163                         addr = ifa->ifa_local;
1164                         break;
1165                 }
1166                 if (!addr)
1167                         addr = ifa->ifa_local;
1168         } endfor_ifa(in_dev);
1169
1170         if (addr)
1171                 goto out_unlock;
1172 no_in_dev:
1173
1174         /* Not loopback addresses on loopback should be preferred
1175            in this case. It is importnat that lo is the first interface
1176            in dev_base list.
1177          */
1178         for_each_netdev_rcu(net, dev) {
1179                 in_dev = __in_dev_get_rcu(dev);
1180                 if (!in_dev)
1181                         continue;
1182
1183                 for_primary_ifa(in_dev) {
1184                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1185                             ifa->ifa_scope <= scope) {
1186                                 addr = ifa->ifa_local;
1187                                 goto out_unlock;
1188                         }
1189                 } endfor_ifa(in_dev);
1190         }
1191 out_unlock:
1192         rcu_read_unlock();
1193         return addr;
1194 }
1195 EXPORT_SYMBOL(inet_select_addr);
1196
1197 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1198                               __be32 local, int scope)
1199 {
1200         int same = 0;
1201         __be32 addr = 0;
1202
1203         for_ifa(in_dev) {
1204                 if (!addr &&
1205                     (local == ifa->ifa_local || !local) &&
1206                     ifa->ifa_scope <= scope) {
1207                         addr = ifa->ifa_local;
1208                         if (same)
1209                                 break;
1210                 }
1211                 if (!same) {
1212                         same = (!local || inet_ifa_match(local, ifa)) &&
1213                                 (!dst || inet_ifa_match(dst, ifa));
1214                         if (same && addr) {
1215                                 if (local || !dst)
1216                                         break;
1217                                 /* Is the selected addr into dst subnet? */
1218                                 if (inet_ifa_match(addr, ifa))
1219                                         break;
1220                                 /* No, then can we use new local src? */
1221                                 if (ifa->ifa_scope <= scope) {
1222                                         addr = ifa->ifa_local;
1223                                         break;
1224                                 }
1225                                 /* search for large dst subnet for addr */
1226                                 same = 0;
1227                         }
1228                 }
1229         } endfor_ifa(in_dev);
1230
1231         return same ? addr : 0;
1232 }
1233
1234 /*
1235  * Confirm that local IP address exists using wildcards:
1236  * - in_dev: only on this interface, 0=any interface
1237  * - dst: only in the same subnet as dst, 0=any dst
1238  * - local: address, 0=autoselect the local address
1239  * - scope: maximum allowed scope value for the local address
1240  */
1241 __be32 inet_confirm_addr(struct in_device *in_dev,
1242                          __be32 dst, __be32 local, int scope)
1243 {
1244         __be32 addr = 0;
1245         struct net_device *dev;
1246         struct net *net;
1247
1248         if (scope != RT_SCOPE_LINK)
1249                 return confirm_addr_indev(in_dev, dst, local, scope);
1250
1251         net = dev_net(in_dev->dev);
1252         rcu_read_lock();
1253         for_each_netdev_rcu(net, dev) {
1254                 in_dev = __in_dev_get_rcu(dev);
1255                 if (in_dev) {
1256                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1257                         if (addr)
1258                                 break;
1259                 }
1260         }
1261         rcu_read_unlock();
1262
1263         return addr;
1264 }
1265 EXPORT_SYMBOL(inet_confirm_addr);
1266
1267 /*
1268  *      Device notifier
1269  */
1270
1271 int register_inetaddr_notifier(struct notifier_block *nb)
1272 {
1273         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1274 }
1275 EXPORT_SYMBOL(register_inetaddr_notifier);
1276
1277 int unregister_inetaddr_notifier(struct notifier_block *nb)
1278 {
1279         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1280 }
1281 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1282
1283 /* Rename ifa_labels for a device name change. Make some effort to preserve
1284  * existing alias numbering and to create unique labels if possible.
1285 */
1286 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1287 {
1288         struct in_ifaddr *ifa;
1289         int named = 0;
1290
1291         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1292                 char old[IFNAMSIZ], *dot;
1293
1294                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1295                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1296                 if (named++ == 0)
1297                         goto skip;
1298                 dot = strchr(old, ':');
1299                 if (dot == NULL) {
1300                         sprintf(old, ":%d", named);
1301                         dot = old;
1302                 }
1303                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1304                         strcat(ifa->ifa_label, dot);
1305                 else
1306                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1307 skip:
1308                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1309         }
1310 }
1311
1312 static bool inetdev_valid_mtu(unsigned int mtu)
1313 {
1314         return mtu >= 68;
1315 }
1316
1317 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1318                                         struct in_device *in_dev)
1319
1320 {
1321         struct in_ifaddr *ifa;
1322
1323         for (ifa = in_dev->ifa_list; ifa;
1324              ifa = ifa->ifa_next) {
1325                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1326                          ifa->ifa_local, dev,
1327                          ifa->ifa_local, NULL,
1328                          dev->dev_addr, NULL);
1329         }
1330 }
1331
1332 /* Called only under RTNL semaphore */
1333
1334 static int inetdev_event(struct notifier_block *this, unsigned long event,
1335                          void *ptr)
1336 {
1337         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1338         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1339
1340         ASSERT_RTNL();
1341
1342         if (!in_dev) {
1343                 if (event == NETDEV_REGISTER) {
1344                         in_dev = inetdev_init(dev);
1345                         if (!in_dev)
1346                                 return notifier_from_errno(-ENOMEM);
1347                         if (dev->flags & IFF_LOOPBACK) {
1348                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1349                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1350                         }
1351                 } else if (event == NETDEV_CHANGEMTU) {
1352                         /* Re-enabling IP */
1353                         if (inetdev_valid_mtu(dev->mtu))
1354                                 in_dev = inetdev_init(dev);
1355                 }
1356                 goto out;
1357         }
1358
1359         switch (event) {
1360         case NETDEV_REGISTER:
1361                 pr_debug("%s: bug\n", __func__);
1362                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1363                 break;
1364         case NETDEV_UP:
1365                 if (!inetdev_valid_mtu(dev->mtu))
1366                         break;
1367                 if (dev->flags & IFF_LOOPBACK) {
1368                         struct in_ifaddr *ifa = inet_alloc_ifa();
1369
1370                         if (ifa) {
1371                                 INIT_HLIST_NODE(&ifa->hash);
1372                                 ifa->ifa_local =
1373                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1374                                 ifa->ifa_prefixlen = 8;
1375                                 ifa->ifa_mask = inet_make_mask(8);
1376                                 in_dev_hold(in_dev);
1377                                 ifa->ifa_dev = in_dev;
1378                                 ifa->ifa_scope = RT_SCOPE_HOST;
1379                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1380                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1381                                                  INFINITY_LIFE_TIME);
1382                                 inet_insert_ifa(ifa);
1383                         }
1384                 }
1385                 ip_mc_up(in_dev);
1386                 /* fall through */
1387         case NETDEV_CHANGEADDR:
1388                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1389                         break;
1390                 /* fall through */
1391         case NETDEV_NOTIFY_PEERS:
1392                 /* Send gratuitous ARP to notify of link change */
1393                 inetdev_send_gratuitous_arp(dev, in_dev);
1394                 break;
1395         case NETDEV_DOWN:
1396                 ip_mc_down(in_dev);
1397                 break;
1398         case NETDEV_PRE_TYPE_CHANGE:
1399                 ip_mc_unmap(in_dev);
1400                 break;
1401         case NETDEV_POST_TYPE_CHANGE:
1402                 ip_mc_remap(in_dev);
1403                 break;
1404         case NETDEV_CHANGEMTU:
1405                 if (inetdev_valid_mtu(dev->mtu))
1406                         break;
1407                 /* disable IP when MTU is not enough */
1408         case NETDEV_UNREGISTER:
1409                 inetdev_destroy(in_dev);
1410                 break;
1411         case NETDEV_CHANGENAME:
1412                 /* Do not notify about label change, this event is
1413                  * not interesting to applications using netlink.
1414                  */
1415                 inetdev_changename(dev, in_dev);
1416
1417                 devinet_sysctl_unregister(in_dev);
1418                 devinet_sysctl_register(in_dev);
1419                 break;
1420         }
1421 out:
1422         return NOTIFY_DONE;
1423 }
1424
1425 static struct notifier_block ip_netdev_notifier = {
1426         .notifier_call = inetdev_event,
1427 };
1428
1429 static size_t inet_nlmsg_size(void)
1430 {
1431         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1432                + nla_total_size(4) /* IFA_ADDRESS */
1433                + nla_total_size(4) /* IFA_LOCAL */
1434                + nla_total_size(4) /* IFA_BROADCAST */
1435                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1436 }
1437
1438 static inline u32 cstamp_delta(unsigned long cstamp)
1439 {
1440         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1441 }
1442
1443 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1444                          unsigned long tstamp, u32 preferred, u32 valid)
1445 {
1446         struct ifa_cacheinfo ci;
1447
1448         ci.cstamp = cstamp_delta(cstamp);
1449         ci.tstamp = cstamp_delta(tstamp);
1450         ci.ifa_prefered = preferred;
1451         ci.ifa_valid = valid;
1452
1453         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1454 }
1455
1456 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1457                             u32 portid, u32 seq, int event, unsigned int flags)
1458 {
1459         struct ifaddrmsg *ifm;
1460         struct nlmsghdr  *nlh;
1461         u32 preferred, valid;
1462
1463         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1464         if (nlh == NULL)
1465                 return -EMSGSIZE;
1466
1467         ifm = nlmsg_data(nlh);
1468         ifm->ifa_family = AF_INET;
1469         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1470         ifm->ifa_flags = ifa->ifa_flags;
1471         ifm->ifa_scope = ifa->ifa_scope;
1472         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1473
1474         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1475                 preferred = ifa->ifa_preferred_lft;
1476                 valid = ifa->ifa_valid_lft;
1477                 if (preferred != INFINITY_LIFE_TIME) {
1478                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1479
1480                         if (preferred > tval)
1481                                 preferred -= tval;
1482                         else
1483                                 preferred = 0;
1484                         if (valid != INFINITY_LIFE_TIME) {
1485                                 if (valid > tval)
1486                                         valid -= tval;
1487                                 else
1488                                         valid = 0;
1489                         }
1490                 }
1491         } else {
1492                 preferred = INFINITY_LIFE_TIME;
1493                 valid = INFINITY_LIFE_TIME;
1494         }
1495         if ((ifa->ifa_address &&
1496              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1497             (ifa->ifa_local &&
1498              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1499             (ifa->ifa_broadcast &&
1500              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1501             (ifa->ifa_label[0] &&
1502              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1503             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1504                           preferred, valid))
1505                 goto nla_put_failure;
1506
1507         return nlmsg_end(skb, nlh);
1508
1509 nla_put_failure:
1510         nlmsg_cancel(skb, nlh);
1511         return -EMSGSIZE;
1512 }
1513
1514 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1515 {
1516         struct net *net = sock_net(skb->sk);
1517         int h, s_h;
1518         int idx, s_idx;
1519         int ip_idx, s_ip_idx;
1520         struct net_device *dev;
1521         struct in_device *in_dev;
1522         struct in_ifaddr *ifa;
1523         struct hlist_head *head;
1524
1525         s_h = cb->args[0];
1526         s_idx = idx = cb->args[1];
1527         s_ip_idx = ip_idx = cb->args[2];
1528
1529         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1530                 idx = 0;
1531                 head = &net->dev_index_head[h];
1532                 rcu_read_lock();
1533                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1534                           net->dev_base_seq;
1535                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1536                         if (idx < s_idx)
1537                                 goto cont;
1538                         if (h > s_h || idx > s_idx)
1539                                 s_ip_idx = 0;
1540                         in_dev = __in_dev_get_rcu(dev);
1541                         if (!in_dev)
1542                                 goto cont;
1543
1544                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1545                              ifa = ifa->ifa_next, ip_idx++) {
1546                                 if (ip_idx < s_ip_idx)
1547                                         continue;
1548                                 if (inet_fill_ifaddr(skb, ifa,
1549                                              NETLINK_CB(cb->skb).portid,
1550                                              cb->nlh->nlmsg_seq,
1551                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1552                                         rcu_read_unlock();
1553                                         goto done;
1554                                 }
1555                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1556                         }
1557 cont:
1558                         idx++;
1559                 }
1560                 rcu_read_unlock();
1561         }
1562
1563 done:
1564         cb->args[0] = h;
1565         cb->args[1] = idx;
1566         cb->args[2] = ip_idx;
1567
1568         return skb->len;
1569 }
1570
1571 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1572                       u32 portid)
1573 {
1574         struct sk_buff *skb;
1575         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1576         int err = -ENOBUFS;
1577         struct net *net;
1578
1579         net = dev_net(ifa->ifa_dev->dev);
1580         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1581         if (skb == NULL)
1582                 goto errout;
1583
1584         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1585         if (err < 0) {
1586                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1587                 WARN_ON(err == -EMSGSIZE);
1588                 kfree_skb(skb);
1589                 goto errout;
1590         }
1591         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1592         return;
1593 errout:
1594         if (err < 0)
1595                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1596 }
1597
1598 static size_t inet_get_link_af_size(const struct net_device *dev)
1599 {
1600         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1601
1602         if (!in_dev)
1603                 return 0;
1604
1605         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1606 }
1607
1608 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1609 {
1610         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611         struct nlattr *nla;
1612         int i;
1613
1614         if (!in_dev)
1615                 return -ENODATA;
1616
1617         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1618         if (nla == NULL)
1619                 return -EMSGSIZE;
1620
1621         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1622                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1623
1624         return 0;
1625 }
1626
1627 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1628         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1629 };
1630
1631 static int inet_validate_link_af(const struct net_device *dev,
1632                                  const struct nlattr *nla)
1633 {
1634         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1635         int err, rem;
1636
1637         if (dev && !__in_dev_get_rtnl(dev))
1638                 return -EAFNOSUPPORT;
1639
1640         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1641         if (err < 0)
1642                 return err;
1643
1644         if (tb[IFLA_INET_CONF]) {
1645                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1646                         int cfgid = nla_type(a);
1647
1648                         if (nla_len(a) < 4)
1649                                 return -EINVAL;
1650
1651                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1652                                 return -EINVAL;
1653                 }
1654         }
1655
1656         return 0;
1657 }
1658
1659 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1660 {
1661         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1662         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1663         int rem;
1664
1665         if (!in_dev)
1666                 return -EAFNOSUPPORT;
1667
1668         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1669                 BUG();
1670
1671         if (tb[IFLA_INET_CONF]) {
1672                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1673                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1674         }
1675
1676         return 0;
1677 }
1678
1679 static int inet_netconf_msgsize_devconf(int type)
1680 {
1681         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1682                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1683
1684         /* type -1 is used for ALL */
1685         if (type == -1 || type == NETCONFA_FORWARDING)
1686                 size += nla_total_size(4);
1687         if (type == -1 || type == NETCONFA_RP_FILTER)
1688                 size += nla_total_size(4);
1689         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1690                 size += nla_total_size(4);
1691
1692         return size;
1693 }
1694
1695 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1696                                      struct ipv4_devconf *devconf, u32 portid,
1697                                      u32 seq, int event, unsigned int flags,
1698                                      int type)
1699 {
1700         struct nlmsghdr  *nlh;
1701         struct netconfmsg *ncm;
1702
1703         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1704                         flags);
1705         if (nlh == NULL)
1706                 return -EMSGSIZE;
1707
1708         ncm = nlmsg_data(nlh);
1709         ncm->ncm_family = AF_INET;
1710
1711         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1712                 goto nla_put_failure;
1713
1714         /* type -1 is used for ALL */
1715         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1716             nla_put_s32(skb, NETCONFA_FORWARDING,
1717                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1718                 goto nla_put_failure;
1719         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1720             nla_put_s32(skb, NETCONFA_RP_FILTER,
1721                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1722                 goto nla_put_failure;
1723         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1724             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1725                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1726                 goto nla_put_failure;
1727
1728         return nlmsg_end(skb, nlh);
1729
1730 nla_put_failure:
1731         nlmsg_cancel(skb, nlh);
1732         return -EMSGSIZE;
1733 }
1734
1735 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1736                                  struct ipv4_devconf *devconf)
1737 {
1738         struct sk_buff *skb;
1739         int err = -ENOBUFS;
1740
1741         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1742         if (skb == NULL)
1743                 goto errout;
1744
1745         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1746                                         RTM_NEWNETCONF, 0, type);
1747         if (err < 0) {
1748                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1749                 WARN_ON(err == -EMSGSIZE);
1750                 kfree_skb(skb);
1751                 goto errout;
1752         }
1753         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1754         return;
1755 errout:
1756         if (err < 0)
1757                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1758 }
1759
1760 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1761         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1762         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1763         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1764 };
1765
1766 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1767                                     struct nlmsghdr *nlh)
1768 {
1769         struct net *net = sock_net(in_skb->sk);
1770         struct nlattr *tb[NETCONFA_MAX+1];
1771         struct netconfmsg *ncm;
1772         struct sk_buff *skb;
1773         struct ipv4_devconf *devconf;
1774         struct in_device *in_dev;
1775         struct net_device *dev;
1776         int ifindex;
1777         int err;
1778
1779         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1780                           devconf_ipv4_policy);
1781         if (err < 0)
1782                 goto errout;
1783
1784         err = EINVAL;
1785         if (!tb[NETCONFA_IFINDEX])
1786                 goto errout;
1787
1788         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1789         switch (ifindex) {
1790         case NETCONFA_IFINDEX_ALL:
1791                 devconf = net->ipv4.devconf_all;
1792                 break;
1793         case NETCONFA_IFINDEX_DEFAULT:
1794                 devconf = net->ipv4.devconf_dflt;
1795                 break;
1796         default:
1797                 dev = __dev_get_by_index(net, ifindex);
1798                 if (dev == NULL)
1799                         goto errout;
1800                 in_dev = __in_dev_get_rtnl(dev);
1801                 if (in_dev == NULL)
1802                         goto errout;
1803                 devconf = &in_dev->cnf;
1804                 break;
1805         }
1806
1807         err = -ENOBUFS;
1808         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1809         if (skb == NULL)
1810                 goto errout;
1811
1812         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1813                                         NETLINK_CB(in_skb).portid,
1814                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1815                                         -1);
1816         if (err < 0) {
1817                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1818                 WARN_ON(err == -EMSGSIZE);
1819                 kfree_skb(skb);
1820                 goto errout;
1821         }
1822         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1823 errout:
1824         return err;
1825 }
1826
1827 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1828                                      struct netlink_callback *cb)
1829 {
1830         struct net *net = sock_net(skb->sk);
1831         int h, s_h;
1832         int idx, s_idx;
1833         struct net_device *dev;
1834         struct in_device *in_dev;
1835         struct hlist_head *head;
1836
1837         s_h = cb->args[0];
1838         s_idx = idx = cb->args[1];
1839
1840         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1841                 idx = 0;
1842                 head = &net->dev_index_head[h];
1843                 rcu_read_lock();
1844                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1845                           net->dev_base_seq;
1846                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1847                         if (idx < s_idx)
1848                                 goto cont;
1849                         in_dev = __in_dev_get_rcu(dev);
1850                         if (!in_dev)
1851                                 goto cont;
1852
1853                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1854                                                       &in_dev->cnf,
1855                                                       NETLINK_CB(cb->skb).portid,
1856                                                       cb->nlh->nlmsg_seq,
1857                                                       RTM_NEWNETCONF,
1858                                                       NLM_F_MULTI,
1859                                                       -1) <= 0) {
1860                                 rcu_read_unlock();
1861                                 goto done;
1862                         }
1863                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1864 cont:
1865                         idx++;
1866                 }
1867                 rcu_read_unlock();
1868         }
1869         if (h == NETDEV_HASHENTRIES) {
1870                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1871                                               net->ipv4.devconf_all,
1872                                               NETLINK_CB(cb->skb).portid,
1873                                               cb->nlh->nlmsg_seq,
1874                                               RTM_NEWNETCONF, NLM_F_MULTI,
1875                                               -1) <= 0)
1876                         goto done;
1877                 else
1878                         h++;
1879         }
1880         if (h == NETDEV_HASHENTRIES + 1) {
1881                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1882                                               net->ipv4.devconf_dflt,
1883                                               NETLINK_CB(cb->skb).portid,
1884                                               cb->nlh->nlmsg_seq,
1885                                               RTM_NEWNETCONF, NLM_F_MULTI,
1886                                               -1) <= 0)
1887                         goto done;
1888                 else
1889                         h++;
1890         }
1891 done:
1892         cb->args[0] = h;
1893         cb->args[1] = idx;
1894
1895         return skb->len;
1896 }
1897
1898 #ifdef CONFIG_SYSCTL
1899
1900 static void devinet_copy_dflt_conf(struct net *net, int i)
1901 {
1902         struct net_device *dev;
1903
1904         rcu_read_lock();
1905         for_each_netdev_rcu(net, dev) {
1906                 struct in_device *in_dev;
1907
1908                 in_dev = __in_dev_get_rcu(dev);
1909                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1910                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1911         }
1912         rcu_read_unlock();
1913 }
1914
1915 /* called with RTNL locked */
1916 static void inet_forward_change(struct net *net)
1917 {
1918         struct net_device *dev;
1919         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1920
1921         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1922         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1923         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1924                                     NETCONFA_IFINDEX_ALL,
1925                                     net->ipv4.devconf_all);
1926         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1927                                     NETCONFA_IFINDEX_DEFAULT,
1928                                     net->ipv4.devconf_dflt);
1929
1930         for_each_netdev(net, dev) {
1931                 struct in_device *in_dev;
1932                 if (on)
1933                         dev_disable_lro(dev);
1934                 rcu_read_lock();
1935                 in_dev = __in_dev_get_rcu(dev);
1936                 if (in_dev) {
1937                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1938                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1939                                                     dev->ifindex, &in_dev->cnf);
1940                 }
1941                 rcu_read_unlock();
1942         }
1943 }
1944
1945 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1946                              void __user *buffer,
1947                              size_t *lenp, loff_t *ppos)
1948 {
1949         int old_value = *(int *)ctl->data;
1950         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1951         int new_value = *(int *)ctl->data;
1952
1953         if (write) {
1954                 struct ipv4_devconf *cnf = ctl->extra1;
1955                 struct net *net = ctl->extra2;
1956                 int i = (int *)ctl->data - cnf->data;
1957
1958                 set_bit(i, cnf->state);
1959
1960                 if (cnf == net->ipv4.devconf_dflt)
1961                         devinet_copy_dflt_conf(net, i);
1962                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1963                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1964                         if ((new_value == 0) && (old_value != 0))
1965                                 rt_cache_flush(net);
1966                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1967                     new_value != old_value) {
1968                         int ifindex;
1969
1970                         if (cnf == net->ipv4.devconf_dflt)
1971                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1972                         else if (cnf == net->ipv4.devconf_all)
1973                                 ifindex = NETCONFA_IFINDEX_ALL;
1974                         else {
1975                                 struct in_device *idev =
1976                                         container_of(cnf, struct in_device,
1977                                                      cnf);
1978                                 ifindex = idev->dev->ifindex;
1979                         }
1980                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1981                                                     ifindex, cnf);
1982                 }
1983         }
1984
1985         return ret;
1986 }
1987
1988 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1989                                   void __user *buffer,
1990                                   size_t *lenp, loff_t *ppos)
1991 {
1992         int *valp = ctl->data;
1993         int val = *valp;
1994         loff_t pos = *ppos;
1995         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1996
1997         if (write && *valp != val) {
1998                 struct net *net = ctl->extra2;
1999
2000                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2001                         if (!rtnl_trylock()) {
2002                                 /* Restore the original values before restarting */
2003                                 *valp = val;
2004                                 *ppos = pos;
2005                                 return restart_syscall();
2006                         }
2007                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2008                                 inet_forward_change(net);
2009                         } else {
2010                                 struct ipv4_devconf *cnf = ctl->extra1;
2011                                 struct in_device *idev =
2012                                         container_of(cnf, struct in_device, cnf);
2013                                 if (*valp)
2014                                         dev_disable_lro(idev->dev);
2015                                 inet_netconf_notify_devconf(net,
2016                                                             NETCONFA_FORWARDING,
2017                                                             idev->dev->ifindex,
2018                                                             cnf);
2019                         }
2020                         rtnl_unlock();
2021                         rt_cache_flush(net);
2022                 } else
2023                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2024                                                     NETCONFA_IFINDEX_DEFAULT,
2025                                                     net->ipv4.devconf_dflt);
2026         }
2027
2028         return ret;
2029 }
2030
2031 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2032                                 void __user *buffer,
2033                                 size_t *lenp, loff_t *ppos)
2034 {
2035         int *valp = ctl->data;
2036         int val = *valp;
2037         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2038         struct net *net = ctl->extra2;
2039
2040         if (write && *valp != val)
2041                 rt_cache_flush(net);
2042
2043         return ret;
2044 }
2045
2046 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2047         { \
2048                 .procname       = name, \
2049                 .data           = ipv4_devconf.data + \
2050                                   IPV4_DEVCONF_ ## attr - 1, \
2051                 .maxlen         = sizeof(int), \
2052                 .mode           = mval, \
2053                 .proc_handler   = proc, \
2054                 .extra1         = &ipv4_devconf, \
2055         }
2056
2057 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2058         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2059
2060 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2061         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2062
2063 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2064         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2065
2066 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2067         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2068
2069 static struct devinet_sysctl_table {
2070         struct ctl_table_header *sysctl_header;
2071         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2072 } devinet_sysctl = {
2073         .devinet_vars = {
2074                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2075                                              devinet_sysctl_forward),
2076                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2077
2078                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2079                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2080                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2081                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2082                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2083                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2084                                         "accept_source_route"),
2085                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2086                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2087                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2088                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2089                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2090                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2091                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2092                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2093                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2094                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2095                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2096                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2097                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2098
2099                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2100                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2101                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2102                                               "force_igmp_version"),
2103                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2104                                               "promote_secondaries"),
2105                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2106                                               "route_localnet"),
2107         },
2108 };
2109
2110 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2111                                         struct ipv4_devconf *p)
2112 {
2113         int i;
2114         struct devinet_sysctl_table *t;
2115         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2116
2117         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2118         if (!t)
2119                 goto out;
2120
2121         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2122                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2123                 t->devinet_vars[i].extra1 = p;
2124                 t->devinet_vars[i].extra2 = net;
2125         }
2126
2127         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2128
2129         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2130         if (!t->sysctl_header)
2131                 goto free;
2132
2133         p->sysctl = t;
2134         return 0;
2135
2136 free:
2137         kfree(t);
2138 out:
2139         return -ENOBUFS;
2140 }
2141
2142 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2143 {
2144         struct devinet_sysctl_table *t = cnf->sysctl;
2145
2146         if (t == NULL)
2147                 return;
2148
2149         cnf->sysctl = NULL;
2150         unregister_net_sysctl_table(t->sysctl_header);
2151         kfree(t);
2152 }
2153
2154 static void devinet_sysctl_register(struct in_device *idev)
2155 {
2156         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2157         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2158                                         &idev->cnf);
2159 }
2160
2161 static void devinet_sysctl_unregister(struct in_device *idev)
2162 {
2163         __devinet_sysctl_unregister(&idev->cnf);
2164         neigh_sysctl_unregister(idev->arp_parms);
2165 }
2166
2167 static struct ctl_table ctl_forward_entry[] = {
2168         {
2169                 .procname       = "ip_forward",
2170                 .data           = &ipv4_devconf.data[
2171                                         IPV4_DEVCONF_FORWARDING - 1],
2172                 .maxlen         = sizeof(int),
2173                 .mode           = 0644,
2174                 .proc_handler   = devinet_sysctl_forward,
2175                 .extra1         = &ipv4_devconf,
2176                 .extra2         = &init_net,
2177         },
2178         { },
2179 };
2180 #endif
2181
2182 static __net_init int devinet_init_net(struct net *net)
2183 {
2184         int err;
2185         struct ipv4_devconf *all, *dflt;
2186 #ifdef CONFIG_SYSCTL
2187         struct ctl_table *tbl = ctl_forward_entry;
2188         struct ctl_table_header *forw_hdr;
2189 #endif
2190
2191         err = -ENOMEM;
2192         all = &ipv4_devconf;
2193         dflt = &ipv4_devconf_dflt;
2194
2195         if (!net_eq(net, &init_net)) {
2196                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2197                 if (all == NULL)
2198                         goto err_alloc_all;
2199
2200                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2201                 if (dflt == NULL)
2202                         goto err_alloc_dflt;
2203
2204 #ifdef CONFIG_SYSCTL
2205                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2206                 if (tbl == NULL)
2207                         goto err_alloc_ctl;
2208
2209                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2210                 tbl[0].extra1 = all;
2211                 tbl[0].extra2 = net;
2212 #endif
2213         }
2214
2215 #ifdef CONFIG_SYSCTL
2216         err = __devinet_sysctl_register(net, "all", all);
2217         if (err < 0)
2218                 goto err_reg_all;
2219
2220         err = __devinet_sysctl_register(net, "default", dflt);
2221         if (err < 0)
2222                 goto err_reg_dflt;
2223
2224         err = -ENOMEM;
2225         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2226         if (forw_hdr == NULL)
2227                 goto err_reg_ctl;
2228         net->ipv4.forw_hdr = forw_hdr;
2229 #endif
2230
2231         net->ipv4.devconf_all = all;
2232         net->ipv4.devconf_dflt = dflt;
2233         return 0;
2234
2235 #ifdef CONFIG_SYSCTL
2236 err_reg_ctl:
2237         __devinet_sysctl_unregister(dflt);
2238 err_reg_dflt:
2239         __devinet_sysctl_unregister(all);
2240 err_reg_all:
2241         if (tbl != ctl_forward_entry)
2242                 kfree(tbl);
2243 err_alloc_ctl:
2244 #endif
2245         if (dflt != &ipv4_devconf_dflt)
2246                 kfree(dflt);
2247 err_alloc_dflt:
2248         if (all != &ipv4_devconf)
2249                 kfree(all);
2250 err_alloc_all:
2251         return err;
2252 }
2253
2254 static __net_exit void devinet_exit_net(struct net *net)
2255 {
2256 #ifdef CONFIG_SYSCTL
2257         struct ctl_table *tbl;
2258
2259         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2260         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2261         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2262         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2263         kfree(tbl);
2264 #endif
2265         kfree(net->ipv4.devconf_dflt);
2266         kfree(net->ipv4.devconf_all);
2267 }
2268
2269 static __net_initdata struct pernet_operations devinet_ops = {
2270         .init = devinet_init_net,
2271         .exit = devinet_exit_net,
2272 };
2273
2274 static struct rtnl_af_ops inet_af_ops = {
2275         .family           = AF_INET,
2276         .fill_link_af     = inet_fill_link_af,
2277         .get_link_af_size = inet_get_link_af_size,
2278         .validate_link_af = inet_validate_link_af,
2279         .set_link_af      = inet_set_link_af,
2280 };
2281
2282 void __init devinet_init(void)
2283 {
2284         int i;
2285
2286         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2287                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2288
2289         register_pernet_subsys(&devinet_ops);
2290
2291         register_gifconf(PF_INET, inet_gifconf);
2292         register_netdevice_notifier(&ip_netdev_notifier);
2293
2294         schedule_delayed_work(&check_lifetime_work, 0);
2295
2296         rtnl_af_register(&inet_af_ops);
2297
2298         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2299         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2300         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2301         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2302                       inet_netconf_dump_devconf, NULL);
2303 }
2304