]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/ipv4/ipip.c
618bde867ac11650c802423b13d9cb9804e6aca1
[can-eth-gw-linux.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128         struct ip_tunnel __rcu *tunnels_wc[1];
129         struct ip_tunnel __rcu **tunnels[4];
130
131         struct net_device *fb_tunnel_dev;
132 };
133
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141
142 #define for_each_ip_tunnel_rcu(start) \
143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147         u64     rx_packets;
148         u64     rx_bytes;
149         u64     tx_packets;
150         u64     tx_bytes;
151         struct u64_stats_sync   syncp;
152 };
153
154 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
155                                                   struct rtnl_link_stats64 *tot)
156 {
157         int i;
158
159         for_each_possible_cpu(i) {
160                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
161                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
162                 unsigned int start;
163
164                 do {
165                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
166                         rx_packets = tstats->rx_packets;
167                         tx_packets = tstats->tx_packets;
168                         rx_bytes = tstats->rx_bytes;
169                         tx_bytes = tstats->tx_bytes;
170                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
171
172                 tot->rx_packets += rx_packets;
173                 tot->tx_packets += tx_packets;
174                 tot->rx_bytes   += rx_bytes;
175                 tot->tx_bytes   += tx_bytes;
176         }
177
178         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
179         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
180         tot->tx_dropped = dev->stats.tx_dropped;
181         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
182         tot->tx_errors = dev->stats.tx_errors;
183         tot->collisions = dev->stats.collisions;
184
185         return tot;
186 }
187
188 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
189                 __be32 remote, __be32 local)
190 {
191         unsigned int h0 = HASH(remote);
192         unsigned int h1 = HASH(local);
193         struct ip_tunnel *t;
194         struct ipip_net *ipn = net_generic(net, ipip_net_id);
195
196         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
197                 if (local == t->parms.iph.saddr &&
198                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
199                         return t;
200
201         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
202                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203                         return t;
204
205         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
206                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
207                         return t;
208
209         t = rcu_dereference(ipn->tunnels_wc[0]);
210         if (t && (t->dev->flags&IFF_UP))
211                 return t;
212         return NULL;
213 }
214
215 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
216                 struct ip_tunnel_parm *parms)
217 {
218         __be32 remote = parms->iph.daddr;
219         __be32 local = parms->iph.saddr;
220         unsigned int h = 0;
221         int prio = 0;
222
223         if (remote) {
224                 prio |= 2;
225                 h ^= HASH(remote);
226         }
227         if (local) {
228                 prio |= 1;
229                 h ^= HASH(local);
230         }
231         return &ipn->tunnels[prio][h];
232 }
233
234 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
235                 struct ip_tunnel *t)
236 {
237         return __ipip_bucket(ipn, &t->parms);
238 }
239
240 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242         struct ip_tunnel __rcu **tp;
243         struct ip_tunnel *iter;
244
245         for (tp = ipip_bucket(ipn, t);
246              (iter = rtnl_dereference(*tp)) != NULL;
247              tp = &iter->next) {
248                 if (t == iter) {
249                         rcu_assign_pointer(*tp, t->next);
250                         break;
251                 }
252         }
253 }
254
255 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
256 {
257         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
258
259         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
260         rcu_assign_pointer(*tp, t);
261 }
262
263 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
264                 struct ip_tunnel_parm *parms, int create)
265 {
266         __be32 remote = parms->iph.daddr;
267         __be32 local = parms->iph.saddr;
268         struct ip_tunnel *t, *nt;
269         struct ip_tunnel __rcu **tp;
270         struct net_device *dev;
271         char name[IFNAMSIZ];
272         struct ipip_net *ipn = net_generic(net, ipip_net_id);
273
274         for (tp = __ipip_bucket(ipn, parms);
275                  (t = rtnl_dereference(*tp)) != NULL;
276                  tp = &t->next) {
277                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
278                         return t;
279         }
280         if (!create)
281                 return NULL;
282
283         if (parms->name[0])
284                 strlcpy(name, parms->name, IFNAMSIZ);
285         else
286                 strcpy(name, "tunl%d");
287
288         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
289         if (dev == NULL)
290                 return NULL;
291
292         dev_net_set(dev, net);
293
294         nt = netdev_priv(dev);
295         nt->parms = *parms;
296
297         if (ipip_tunnel_init(dev) < 0)
298                 goto failed_free;
299
300         if (register_netdevice(dev) < 0)
301                 goto failed_free;
302
303         strcpy(nt->parms.name, dev->name);
304
305         dev_hold(dev);
306         ipip_tunnel_link(ipn, nt);
307         return nt;
308
309 failed_free:
310         ipip_dev_free(dev);
311         return NULL;
312 }
313
314 /* called with RTNL */
315 static void ipip_tunnel_uninit(struct net_device *dev)
316 {
317         struct net *net = dev_net(dev);
318         struct ipip_net *ipn = net_generic(net, ipip_net_id);
319
320         if (dev == ipn->fb_tunnel_dev)
321                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
322         else
323                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
324         dev_put(dev);
325 }
326
327 static int ipip_err(struct sk_buff *skb, u32 info)
328 {
329
330 /* All the routers (except for Linux) return only
331    8 bytes of packet payload. It means, that precise relaying of
332    ICMP in the real Internet is absolutely infeasible.
333  */
334         const struct iphdr *iph = (const struct iphdr *)skb->data;
335         const int type = icmp_hdr(skb)->type;
336         const int code = icmp_hdr(skb)->code;
337         struct ip_tunnel *t;
338         int err;
339
340         switch (type) {
341         default:
342         case ICMP_PARAMETERPROB:
343                 return 0;
344
345         case ICMP_DEST_UNREACH:
346                 switch (code) {
347                 case ICMP_SR_FAILED:
348                 case ICMP_PORT_UNREACH:
349                         /* Impossible event. */
350                         return 0;
351                 default:
352                         /* All others are translated to HOST_UNREACH.
353                            rfc2003 contains "deep thoughts" about NET_UNREACH,
354                            I believe they are just ether pollution. --ANK
355                          */
356                         break;
357                 }
358                 break;
359         case ICMP_TIME_EXCEEDED:
360                 if (code != ICMP_EXC_TTL)
361                         return 0;
362                 break;
363         case ICMP_REDIRECT:
364                 break;
365         }
366
367         err = -ENOENT;
368         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
369         if (t == NULL)
370                 goto out;
371
372         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
373                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
374                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
375                 err = 0;
376                 goto out;
377         }
378
379         if (type == ICMP_REDIRECT) {
380                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
381                               IPPROTO_IPIP, 0);
382                 err = 0;
383                 goto out;
384         }
385
386         if (t->parms.iph.daddr == 0)
387                 goto out;
388
389         err = 0;
390         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
391                 goto out;
392
393         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
394                 t->err_count++;
395         else
396                 t->err_count = 1;
397         t->err_time = jiffies;
398 out:
399
400         return err;
401 }
402
403 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
404                                         struct sk_buff *skb)
405 {
406         struct iphdr *inner_iph = ip_hdr(skb);
407
408         if (INET_ECN_is_ce(outer_iph->tos))
409                 IP_ECN_set_ce(inner_iph);
410 }
411
412 static int ipip_rcv(struct sk_buff *skb)
413 {
414         struct ip_tunnel *tunnel;
415         const struct iphdr *iph = ip_hdr(skb);
416
417         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418         if (tunnel != NULL) {
419                 struct pcpu_tstats *tstats;
420
421                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
422                         kfree_skb(skb);
423                         return 0;
424                 }
425
426                 secpath_reset(skb);
427
428                 skb->mac_header = skb->network_header;
429                 skb_reset_network_header(skb);
430                 skb->protocol = htons(ETH_P_IP);
431                 skb->pkt_type = PACKET_HOST;
432
433                 tstats = this_cpu_ptr(tunnel->dev->tstats);
434                 u64_stats_update_begin(&tstats->syncp);
435                 tstats->rx_packets++;
436                 tstats->rx_bytes += skb->len;
437                 u64_stats_update_end(&tstats->syncp);
438
439                 __skb_tunnel_rx(skb, tunnel->dev);
440
441                 ipip_ecn_decapsulate(iph, skb);
442
443                 netif_rx(skb);
444                 return 0;
445         }
446
447         return -1;
448 }
449
450 /*
451  *      This function assumes it is being called from dev_queue_xmit()
452  *      and that skb is filled properly by that function.
453  */
454
455 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
456 {
457         struct ip_tunnel *tunnel = netdev_priv(dev);
458         struct pcpu_tstats *tstats;
459         const struct iphdr  *tiph = &tunnel->parms.iph;
460         u8     tos = tunnel->parms.iph.tos;
461         __be16 df = tiph->frag_off;
462         struct rtable *rt;                      /* Route to the other host */
463         struct net_device *tdev;                /* Device to other host */
464         const struct iphdr  *old_iph = ip_hdr(skb);
465         struct iphdr  *iph;                     /* Our new IP header */
466         unsigned int max_headroom;              /* The extra header space needed */
467         __be32 dst = tiph->daddr;
468         struct flowi4 fl4;
469         int    mtu;
470
471         if (skb->protocol != htons(ETH_P_IP))
472                 goto tx_error;
473
474         if (tos & 1)
475                 tos = old_iph->tos;
476
477         if (!dst) {
478                 /* NBMA tunnel */
479                 if ((rt = skb_rtable(skb)) == NULL) {
480                         dev->stats.tx_fifo_errors++;
481                         goto tx_error;
482                 }
483                 dst = rt_nexthop(rt, old_iph->daddr);
484         }
485
486         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
487                                    dst, tiph->saddr,
488                                    0, 0,
489                                    IPPROTO_IPIP, RT_TOS(tos),
490                                    tunnel->parms.link);
491         if (IS_ERR(rt)) {
492                 dev->stats.tx_carrier_errors++;
493                 goto tx_error_icmp;
494         }
495         tdev = rt->dst.dev;
496
497         if (tdev == dev) {
498                 ip_rt_put(rt);
499                 dev->stats.collisions++;
500                 goto tx_error;
501         }
502
503         df |= old_iph->frag_off & htons(IP_DF);
504
505         if (df) {
506                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
507
508                 if (mtu < 68) {
509                         dev->stats.collisions++;
510                         ip_rt_put(rt);
511                         goto tx_error;
512                 }
513
514                 if (skb_dst(skb))
515                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
516
517                 if ((old_iph->frag_off & htons(IP_DF)) &&
518                     mtu < ntohs(old_iph->tot_len)) {
519                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
520                                   htonl(mtu));
521                         ip_rt_put(rt);
522                         goto tx_error;
523                 }
524         }
525
526         if (tunnel->err_count > 0) {
527                 if (time_before(jiffies,
528                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
529                         tunnel->err_count--;
530                         dst_link_failure(skb);
531                 } else
532                         tunnel->err_count = 0;
533         }
534
535         /*
536          * Okay, now see if we can stuff it in the buffer as-is.
537          */
538         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
539
540         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
541             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
542                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
543                 if (!new_skb) {
544                         ip_rt_put(rt);
545                         dev->stats.tx_dropped++;
546                         dev_kfree_skb(skb);
547                         return NETDEV_TX_OK;
548                 }
549                 if (skb->sk)
550                         skb_set_owner_w(new_skb, skb->sk);
551                 dev_kfree_skb(skb);
552                 skb = new_skb;
553                 old_iph = ip_hdr(skb);
554         }
555
556         skb->transport_header = skb->network_header;
557         skb_push(skb, sizeof(struct iphdr));
558         skb_reset_network_header(skb);
559         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
560         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
561                               IPSKB_REROUTED);
562         skb_dst_drop(skb);
563         skb_dst_set(skb, &rt->dst);
564
565         /*
566          *      Push down and install the IPIP header.
567          */
568
569         iph                     =       ip_hdr(skb);
570         iph->version            =       4;
571         iph->ihl                =       sizeof(struct iphdr)>>2;
572         iph->frag_off           =       df;
573         iph->protocol           =       IPPROTO_IPIP;
574         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
575         iph->daddr              =       fl4.daddr;
576         iph->saddr              =       fl4.saddr;
577
578         if ((iph->ttl = tiph->ttl) == 0)
579                 iph->ttl        =       old_iph->ttl;
580
581         nf_reset(skb);
582         tstats = this_cpu_ptr(dev->tstats);
583         __IPTUNNEL_XMIT(tstats, &dev->stats);
584         return NETDEV_TX_OK;
585
586 tx_error_icmp:
587         dst_link_failure(skb);
588 tx_error:
589         dev->stats.tx_errors++;
590         dev_kfree_skb(skb);
591         return NETDEV_TX_OK;
592 }
593
594 static void ipip_tunnel_bind_dev(struct net_device *dev)
595 {
596         struct net_device *tdev = NULL;
597         struct ip_tunnel *tunnel;
598         const struct iphdr *iph;
599
600         tunnel = netdev_priv(dev);
601         iph = &tunnel->parms.iph;
602
603         if (iph->daddr) {
604                 struct rtable *rt;
605                 struct flowi4 fl4;
606
607                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
608                                            iph->daddr, iph->saddr,
609                                            0, 0,
610                                            IPPROTO_IPIP,
611                                            RT_TOS(iph->tos),
612                                            tunnel->parms.link);
613                 if (!IS_ERR(rt)) {
614                         tdev = rt->dst.dev;
615                         ip_rt_put(rt);
616                 }
617                 dev->flags |= IFF_POINTOPOINT;
618         }
619
620         if (!tdev && tunnel->parms.link)
621                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
622
623         if (tdev) {
624                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
625                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
626         }
627         dev->iflink = tunnel->parms.link;
628 }
629
630 static int
631 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
632 {
633         int err = 0;
634         struct ip_tunnel_parm p;
635         struct ip_tunnel *t;
636         struct net *net = dev_net(dev);
637         struct ipip_net *ipn = net_generic(net, ipip_net_id);
638
639         switch (cmd) {
640         case SIOCGETTUNNEL:
641                 t = NULL;
642                 if (dev == ipn->fb_tunnel_dev) {
643                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
644                                 err = -EFAULT;
645                                 break;
646                         }
647                         t = ipip_tunnel_locate(net, &p, 0);
648                 }
649                 if (t == NULL)
650                         t = netdev_priv(dev);
651                 memcpy(&p, &t->parms, sizeof(p));
652                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
653                         err = -EFAULT;
654                 break;
655
656         case SIOCADDTUNNEL:
657         case SIOCCHGTUNNEL:
658                 err = -EPERM;
659                 if (!capable(CAP_NET_ADMIN))
660                         goto done;
661
662                 err = -EFAULT;
663                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
664                         goto done;
665
666                 err = -EINVAL;
667                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
668                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
669                         goto done;
670                 if (p.iph.ttl)
671                         p.iph.frag_off |= htons(IP_DF);
672
673                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
674
675                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
676                         if (t != NULL) {
677                                 if (t->dev != dev) {
678                                         err = -EEXIST;
679                                         break;
680                                 }
681                         } else {
682                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
683                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
684                                         err = -EINVAL;
685                                         break;
686                                 }
687                                 t = netdev_priv(dev);
688                                 ipip_tunnel_unlink(ipn, t);
689                                 synchronize_net();
690                                 t->parms.iph.saddr = p.iph.saddr;
691                                 t->parms.iph.daddr = p.iph.daddr;
692                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
693                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
694                                 ipip_tunnel_link(ipn, t);
695                                 netdev_state_change(dev);
696                         }
697                 }
698
699                 if (t) {
700                         err = 0;
701                         if (cmd == SIOCCHGTUNNEL) {
702                                 t->parms.iph.ttl = p.iph.ttl;
703                                 t->parms.iph.tos = p.iph.tos;
704                                 t->parms.iph.frag_off = p.iph.frag_off;
705                                 if (t->parms.link != p.link) {
706                                         t->parms.link = p.link;
707                                         ipip_tunnel_bind_dev(dev);
708                                         netdev_state_change(dev);
709                                 }
710                         }
711                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
712                                 err = -EFAULT;
713                 } else
714                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
715                 break;
716
717         case SIOCDELTUNNEL:
718                 err = -EPERM;
719                 if (!capable(CAP_NET_ADMIN))
720                         goto done;
721
722                 if (dev == ipn->fb_tunnel_dev) {
723                         err = -EFAULT;
724                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
725                                 goto done;
726                         err = -ENOENT;
727                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
728                                 goto done;
729                         err = -EPERM;
730                         if (t->dev == ipn->fb_tunnel_dev)
731                                 goto done;
732                         dev = t->dev;
733                 }
734                 unregister_netdevice(dev);
735                 err = 0;
736                 break;
737
738         default:
739                 err = -EINVAL;
740         }
741
742 done:
743         return err;
744 }
745
746 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
747 {
748         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
749                 return -EINVAL;
750         dev->mtu = new_mtu;
751         return 0;
752 }
753
754 static const struct net_device_ops ipip_netdev_ops = {
755         .ndo_uninit     = ipip_tunnel_uninit,
756         .ndo_start_xmit = ipip_tunnel_xmit,
757         .ndo_do_ioctl   = ipip_tunnel_ioctl,
758         .ndo_change_mtu = ipip_tunnel_change_mtu,
759         .ndo_get_stats64 = ipip_get_stats64,
760 };
761
762 static void ipip_dev_free(struct net_device *dev)
763 {
764         free_percpu(dev->tstats);
765         free_netdev(dev);
766 }
767
768 static void ipip_tunnel_setup(struct net_device *dev)
769 {
770         dev->netdev_ops         = &ipip_netdev_ops;
771         dev->destructor         = ipip_dev_free;
772
773         dev->type               = ARPHRD_TUNNEL;
774         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
775         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
776         dev->flags              = IFF_NOARP;
777         dev->iflink             = 0;
778         dev->addr_len           = 4;
779         dev->features           |= NETIF_F_NETNS_LOCAL;
780         dev->features           |= NETIF_F_LLTX;
781         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
782 }
783
784 static int ipip_tunnel_init(struct net_device *dev)
785 {
786         struct ip_tunnel *tunnel = netdev_priv(dev);
787
788         tunnel->dev = dev;
789
790         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
791         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
792
793         ipip_tunnel_bind_dev(dev);
794
795         dev->tstats = alloc_percpu(struct pcpu_tstats);
796         if (!dev->tstats)
797                 return -ENOMEM;
798
799         return 0;
800 }
801
802 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
803 {
804         struct ip_tunnel *tunnel = netdev_priv(dev);
805         struct iphdr *iph = &tunnel->parms.iph;
806         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
807
808         tunnel->dev = dev;
809         strcpy(tunnel->parms.name, dev->name);
810
811         iph->version            = 4;
812         iph->protocol           = IPPROTO_IPIP;
813         iph->ihl                = 5;
814
815         dev->tstats = alloc_percpu(struct pcpu_tstats);
816         if (!dev->tstats)
817                 return -ENOMEM;
818
819         dev_hold(dev);
820         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
821         return 0;
822 }
823
824 static struct xfrm_tunnel ipip_handler __read_mostly = {
825         .handler        =       ipip_rcv,
826         .err_handler    =       ipip_err,
827         .priority       =       1,
828 };
829
830 static const char banner[] __initconst =
831         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
832
833 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
834 {
835         int prio;
836
837         for (prio = 1; prio < 4; prio++) {
838                 int h;
839                 for (h = 0; h < HASH_SIZE; h++) {
840                         struct ip_tunnel *t;
841
842                         t = rtnl_dereference(ipn->tunnels[prio][h]);
843                         while (t != NULL) {
844                                 unregister_netdevice_queue(t->dev, head);
845                                 t = rtnl_dereference(t->next);
846                         }
847                 }
848         }
849 }
850
851 static int __net_init ipip_init_net(struct net *net)
852 {
853         struct ipip_net *ipn = net_generic(net, ipip_net_id);
854         struct ip_tunnel *t;
855         int err;
856
857         ipn->tunnels[0] = ipn->tunnels_wc;
858         ipn->tunnels[1] = ipn->tunnels_l;
859         ipn->tunnels[2] = ipn->tunnels_r;
860         ipn->tunnels[3] = ipn->tunnels_r_l;
861
862         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
863                                            "tunl0",
864                                            ipip_tunnel_setup);
865         if (!ipn->fb_tunnel_dev) {
866                 err = -ENOMEM;
867                 goto err_alloc_dev;
868         }
869         dev_net_set(ipn->fb_tunnel_dev, net);
870
871         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
872         if (err)
873                 goto err_reg_dev;
874
875         if ((err = register_netdev(ipn->fb_tunnel_dev)))
876                 goto err_reg_dev;
877
878         t = netdev_priv(ipn->fb_tunnel_dev);
879
880         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
881         return 0;
882
883 err_reg_dev:
884         ipip_dev_free(ipn->fb_tunnel_dev);
885 err_alloc_dev:
886         /* nothing */
887         return err;
888 }
889
890 static void __net_exit ipip_exit_net(struct net *net)
891 {
892         struct ipip_net *ipn = net_generic(net, ipip_net_id);
893         LIST_HEAD(list);
894
895         rtnl_lock();
896         ipip_destroy_tunnels(ipn, &list);
897         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
898         unregister_netdevice_many(&list);
899         rtnl_unlock();
900 }
901
902 static struct pernet_operations ipip_net_ops = {
903         .init = ipip_init_net,
904         .exit = ipip_exit_net,
905         .id   = &ipip_net_id,
906         .size = sizeof(struct ipip_net),
907 };
908
909 static int __init ipip_init(void)
910 {
911         int err;
912
913         printk(banner);
914
915         err = register_pernet_device(&ipip_net_ops);
916         if (err < 0)
917                 return err;
918         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
919         if (err < 0) {
920                 unregister_pernet_device(&ipip_net_ops);
921                 pr_info("%s: can't register tunnel\n", __func__);
922         }
923         return err;
924 }
925
926 static void __exit ipip_fini(void)
927 {
928         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
929                 pr_info("%s: can't deregister tunnel\n", __func__);
930
931         unregister_pernet_device(&ipip_net_ops);
932 }
933
934 module_init(ipip_init);
935 module_exit(ipip_fini);
936 MODULE_LICENSE("GPL");
937 MODULE_ALIAS_NETDEV("tunl0");