]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/ipv4/ipip.c
ipip: add GSO support
[can-eth-gw-linux.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141
142 /*
143  * Locking : hash tables are protected by RCU and RTNL
144  */
145
146 #define for_each_ip_tunnel_rcu(start) \
147         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148
149 /* often modified stats are per cpu, other are shared (netdev->stats) */
150 struct pcpu_tstats {
151         u64     rx_packets;
152         u64     rx_bytes;
153         u64     tx_packets;
154         u64     tx_bytes;
155         struct u64_stats_sync   syncp;
156 };
157
158 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159                                                   struct rtnl_link_stats64 *tot)
160 {
161         int i;
162
163         for_each_possible_cpu(i) {
164                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
165                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
166                 unsigned int start;
167
168                 do {
169                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
170                         rx_packets = tstats->rx_packets;
171                         tx_packets = tstats->tx_packets;
172                         rx_bytes = tstats->rx_bytes;
173                         tx_bytes = tstats->tx_bytes;
174                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
175
176                 tot->rx_packets += rx_packets;
177                 tot->tx_packets += tx_packets;
178                 tot->rx_bytes   += rx_bytes;
179                 tot->tx_bytes   += tx_bytes;
180         }
181
182         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
183         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
184         tot->tx_dropped = dev->stats.tx_dropped;
185         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
186         tot->tx_errors = dev->stats.tx_errors;
187         tot->collisions = dev->stats.collisions;
188
189         return tot;
190 }
191
192 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
193                 __be32 remote, __be32 local)
194 {
195         unsigned int h0 = HASH(remote);
196         unsigned int h1 = HASH(local);
197         struct ip_tunnel *t;
198         struct ipip_net *ipn = net_generic(net, ipip_net_id);
199
200         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
201                 if (local == t->parms.iph.saddr &&
202                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203                         return t;
204
205         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
206                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207                         return t;
208
209         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
210                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211                         return t;
212
213         t = rcu_dereference(ipn->tunnels_wc[0]);
214         if (t && (t->dev->flags&IFF_UP))
215                 return t;
216         return NULL;
217 }
218
219 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
220                 struct ip_tunnel_parm *parms)
221 {
222         __be32 remote = parms->iph.daddr;
223         __be32 local = parms->iph.saddr;
224         unsigned int h = 0;
225         int prio = 0;
226
227         if (remote) {
228                 prio |= 2;
229                 h ^= HASH(remote);
230         }
231         if (local) {
232                 prio |= 1;
233                 h ^= HASH(local);
234         }
235         return &ipn->tunnels[prio][h];
236 }
237
238 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
239                 struct ip_tunnel *t)
240 {
241         return __ipip_bucket(ipn, &t->parms);
242 }
243
244 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp;
247         struct ip_tunnel *iter;
248
249         for (tp = ipip_bucket(ipn, t);
250              (iter = rtnl_dereference(*tp)) != NULL;
251              tp = &iter->next) {
252                 if (t == iter) {
253                         rcu_assign_pointer(*tp, t->next);
254                         break;
255                 }
256         }
257 }
258
259 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
260 {
261         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
262
263         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
264         rcu_assign_pointer(*tp, t);
265 }
266
267 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268                 struct ip_tunnel_parm *parms, int create)
269 {
270         __be32 remote = parms->iph.daddr;
271         __be32 local = parms->iph.saddr;
272         struct ip_tunnel *t, *nt;
273         struct ip_tunnel __rcu **tp;
274         struct net_device *dev;
275         char name[IFNAMSIZ];
276         struct ipip_net *ipn = net_generic(net, ipip_net_id);
277
278         for (tp = __ipip_bucket(ipn, parms);
279                  (t = rtnl_dereference(*tp)) != NULL;
280                  tp = &t->next) {
281                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
282                         return t;
283         }
284         if (!create)
285                 return NULL;
286
287         if (parms->name[0])
288                 strlcpy(name, parms->name, IFNAMSIZ);
289         else
290                 strcpy(name, "tunl%d");
291
292         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
293         if (dev == NULL)
294                 return NULL;
295
296         dev_net_set(dev, net);
297
298         nt = netdev_priv(dev);
299         nt->parms = *parms;
300
301         if (ipip_tunnel_init(dev) < 0)
302                 goto failed_free;
303
304         if (register_netdevice(dev) < 0)
305                 goto failed_free;
306
307         strcpy(nt->parms.name, dev->name);
308
309         dev_hold(dev);
310         ipip_tunnel_link(ipn, nt);
311         return nt;
312
313 failed_free:
314         ipip_dev_free(dev);
315         return NULL;
316 }
317
318 /* called with RTNL */
319 static void ipip_tunnel_uninit(struct net_device *dev)
320 {
321         struct net *net = dev_net(dev);
322         struct ipip_net *ipn = net_generic(net, ipip_net_id);
323
324         if (dev == ipn->fb_tunnel_dev)
325                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
326         else
327                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
328         dev_put(dev);
329 }
330
331 static int ipip_err(struct sk_buff *skb, u32 info)
332 {
333
334 /* All the routers (except for Linux) return only
335    8 bytes of packet payload. It means, that precise relaying of
336    ICMP in the real Internet is absolutely infeasible.
337  */
338         const struct iphdr *iph = (const struct iphdr *)skb->data;
339         const int type = icmp_hdr(skb)->type;
340         const int code = icmp_hdr(skb)->code;
341         struct ip_tunnel *t;
342         int err;
343
344         switch (type) {
345         default:
346         case ICMP_PARAMETERPROB:
347                 return 0;
348
349         case ICMP_DEST_UNREACH:
350                 switch (code) {
351                 case ICMP_SR_FAILED:
352                 case ICMP_PORT_UNREACH:
353                         /* Impossible event. */
354                         return 0;
355                 default:
356                         /* All others are translated to HOST_UNREACH.
357                            rfc2003 contains "deep thoughts" about NET_UNREACH,
358                            I believe they are just ether pollution. --ANK
359                          */
360                         break;
361                 }
362                 break;
363         case ICMP_TIME_EXCEEDED:
364                 if (code != ICMP_EXC_TTL)
365                         return 0;
366                 break;
367         case ICMP_REDIRECT:
368                 break;
369         }
370
371         err = -ENOENT;
372         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
373         if (t == NULL)
374                 goto out;
375
376         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
377                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
378                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
379                 err = 0;
380                 goto out;
381         }
382
383         if (type == ICMP_REDIRECT) {
384                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
385                               IPPROTO_IPIP, 0);
386                 err = 0;
387                 goto out;
388         }
389
390         if (t->parms.iph.daddr == 0)
391                 goto out;
392
393         err = 0;
394         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395                 goto out;
396
397         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
398                 t->err_count++;
399         else
400                 t->err_count = 1;
401         t->err_time = jiffies;
402 out:
403
404         return err;
405 }
406
407 static int ipip_rcv(struct sk_buff *skb)
408 {
409         struct ip_tunnel *tunnel;
410         const struct iphdr *iph = ip_hdr(skb);
411         int err;
412
413         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
414         if (tunnel != NULL) {
415                 struct pcpu_tstats *tstats;
416
417                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
418                         goto drop;
419
420                 secpath_reset(skb);
421
422                 skb->mac_header = skb->network_header;
423                 skb_reset_network_header(skb);
424                 skb->protocol = htons(ETH_P_IP);
425                 skb->pkt_type = PACKET_HOST;
426
427                 __skb_tunnel_rx(skb, tunnel->dev);
428
429                 err = IP_ECN_decapsulate(iph, skb);
430                 if (unlikely(err)) {
431                         if (log_ecn_error)
432                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
433                                                      &iph->saddr, iph->tos);
434                         if (err > 1) {
435                                 ++tunnel->dev->stats.rx_frame_errors;
436                                 ++tunnel->dev->stats.rx_errors;
437                                 goto drop;
438                         }
439                 }
440
441                 tstats = this_cpu_ptr(tunnel->dev->tstats);
442                 u64_stats_update_begin(&tstats->syncp);
443                 tstats->rx_packets++;
444                 tstats->rx_bytes += skb->len;
445                 u64_stats_update_end(&tstats->syncp);
446
447                 netif_rx(skb);
448                 return 0;
449         }
450
451         return -1;
452
453 drop:
454         kfree_skb(skb);
455         return 0;
456 }
457
458 /*
459  *      This function assumes it is being called from dev_queue_xmit()
460  *      and that skb is filled properly by that function.
461  */
462
463 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464 {
465         struct ip_tunnel *tunnel = netdev_priv(dev);
466         struct pcpu_tstats *tstats;
467         const struct iphdr  *tiph = &tunnel->parms.iph;
468         u8     tos = tunnel->parms.iph.tos;
469         __be16 df = tiph->frag_off;
470         struct rtable *rt;                      /* Route to the other host */
471         struct net_device *tdev;                /* Device to other host */
472         const struct iphdr  *old_iph = ip_hdr(skb);
473         struct iphdr  *iph;                     /* Our new IP header */
474         unsigned int max_headroom;              /* The extra header space needed */
475         __be32 dst = tiph->daddr;
476         struct flowi4 fl4;
477         int    mtu;
478
479         if (skb->protocol != htons(ETH_P_IP))
480                 goto tx_error;
481
482         if (skb->ip_summed == CHECKSUM_PARTIAL &&
483             skb_checksum_help(skb))
484                 goto tx_error;
485
486         if (tos & 1)
487                 tos = old_iph->tos;
488
489         if (!dst) {
490                 /* NBMA tunnel */
491                 if ((rt = skb_rtable(skb)) == NULL) {
492                         dev->stats.tx_fifo_errors++;
493                         goto tx_error;
494                 }
495                 dst = rt_nexthop(rt, old_iph->daddr);
496         }
497
498         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
499                                    dst, tiph->saddr,
500                                    0, 0,
501                                    IPPROTO_IPIP, RT_TOS(tos),
502                                    tunnel->parms.link);
503         if (IS_ERR(rt)) {
504                 dev->stats.tx_carrier_errors++;
505                 goto tx_error_icmp;
506         }
507         tdev = rt->dst.dev;
508
509         if (tdev == dev) {
510                 ip_rt_put(rt);
511                 dev->stats.collisions++;
512                 goto tx_error;
513         }
514
515         df |= old_iph->frag_off & htons(IP_DF);
516
517         if (df) {
518                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
519
520                 if (mtu < 68) {
521                         dev->stats.collisions++;
522                         ip_rt_put(rt);
523                         goto tx_error;
524                 }
525
526                 if (skb_dst(skb))
527                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
528
529                 if ((old_iph->frag_off & htons(IP_DF)) &&
530                     mtu < ntohs(old_iph->tot_len)) {
531                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
532                                   htonl(mtu));
533                         ip_rt_put(rt);
534                         goto tx_error;
535                 }
536         }
537
538         if (tunnel->err_count > 0) {
539                 if (time_before(jiffies,
540                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
541                         tunnel->err_count--;
542                         dst_link_failure(skb);
543                 } else
544                         tunnel->err_count = 0;
545         }
546
547         /*
548          * Okay, now see if we can stuff it in the buffer as-is.
549          */
550         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
551
552         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
553             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
554                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
555                 if (!new_skb) {
556                         ip_rt_put(rt);
557                         dev->stats.tx_dropped++;
558                         dev_kfree_skb(skb);
559                         return NETDEV_TX_OK;
560                 }
561                 if (skb->sk)
562                         skb_set_owner_w(new_skb, skb->sk);
563                 dev_kfree_skb(skb);
564                 skb = new_skb;
565                 old_iph = ip_hdr(skb);
566         }
567
568         skb->transport_header = skb->network_header;
569         skb_push(skb, sizeof(struct iphdr));
570         skb_reset_network_header(skb);
571         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
572         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
573                               IPSKB_REROUTED);
574         skb_dst_drop(skb);
575         skb_dst_set(skb, &rt->dst);
576
577         /*
578          *      Push down and install the IPIP header.
579          */
580
581         iph                     =       ip_hdr(skb);
582         iph->version            =       4;
583         iph->ihl                =       sizeof(struct iphdr)>>2;
584         iph->frag_off           =       df;
585         iph->protocol           =       IPPROTO_IPIP;
586         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
587         iph->daddr              =       fl4.daddr;
588         iph->saddr              =       fl4.saddr;
589
590         if ((iph->ttl = tiph->ttl) == 0)
591                 iph->ttl        =       old_iph->ttl;
592
593         nf_reset(skb);
594         tstats = this_cpu_ptr(dev->tstats);
595         __IPTUNNEL_XMIT(tstats, &dev->stats);
596         return NETDEV_TX_OK;
597
598 tx_error_icmp:
599         dst_link_failure(skb);
600 tx_error:
601         dev->stats.tx_errors++;
602         dev_kfree_skb(skb);
603         return NETDEV_TX_OK;
604 }
605
606 static void ipip_tunnel_bind_dev(struct net_device *dev)
607 {
608         struct net_device *tdev = NULL;
609         struct ip_tunnel *tunnel;
610         const struct iphdr *iph;
611
612         tunnel = netdev_priv(dev);
613         iph = &tunnel->parms.iph;
614
615         if (iph->daddr) {
616                 struct rtable *rt;
617                 struct flowi4 fl4;
618
619                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
620                                            iph->daddr, iph->saddr,
621                                            0, 0,
622                                            IPPROTO_IPIP,
623                                            RT_TOS(iph->tos),
624                                            tunnel->parms.link);
625                 if (!IS_ERR(rt)) {
626                         tdev = rt->dst.dev;
627                         ip_rt_put(rt);
628                 }
629                 dev->flags |= IFF_POINTOPOINT;
630         }
631
632         if (!tdev && tunnel->parms.link)
633                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
634
635         if (tdev) {
636                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
637                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
638         }
639         dev->iflink = tunnel->parms.link;
640 }
641
642 static int
643 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
644 {
645         int err = 0;
646         struct ip_tunnel_parm p;
647         struct ip_tunnel *t;
648         struct net *net = dev_net(dev);
649         struct ipip_net *ipn = net_generic(net, ipip_net_id);
650
651         switch (cmd) {
652         case SIOCGETTUNNEL:
653                 t = NULL;
654                 if (dev == ipn->fb_tunnel_dev) {
655                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
656                                 err = -EFAULT;
657                                 break;
658                         }
659                         t = ipip_tunnel_locate(net, &p, 0);
660                 }
661                 if (t == NULL)
662                         t = netdev_priv(dev);
663                 memcpy(&p, &t->parms, sizeof(p));
664                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
665                         err = -EFAULT;
666                 break;
667
668         case SIOCADDTUNNEL:
669         case SIOCCHGTUNNEL:
670                 err = -EPERM;
671                 if (!capable(CAP_NET_ADMIN))
672                         goto done;
673
674                 err = -EFAULT;
675                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
676                         goto done;
677
678                 err = -EINVAL;
679                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
680                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
681                         goto done;
682                 if (p.iph.ttl)
683                         p.iph.frag_off |= htons(IP_DF);
684
685                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
686
687                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
688                         if (t != NULL) {
689                                 if (t->dev != dev) {
690                                         err = -EEXIST;
691                                         break;
692                                 }
693                         } else {
694                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
695                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
696                                         err = -EINVAL;
697                                         break;
698                                 }
699                                 t = netdev_priv(dev);
700                                 ipip_tunnel_unlink(ipn, t);
701                                 synchronize_net();
702                                 t->parms.iph.saddr = p.iph.saddr;
703                                 t->parms.iph.daddr = p.iph.daddr;
704                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
705                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
706                                 ipip_tunnel_link(ipn, t);
707                                 netdev_state_change(dev);
708                         }
709                 }
710
711                 if (t) {
712                         err = 0;
713                         if (cmd == SIOCCHGTUNNEL) {
714                                 t->parms.iph.ttl = p.iph.ttl;
715                                 t->parms.iph.tos = p.iph.tos;
716                                 t->parms.iph.frag_off = p.iph.frag_off;
717                                 if (t->parms.link != p.link) {
718                                         t->parms.link = p.link;
719                                         ipip_tunnel_bind_dev(dev);
720                                         netdev_state_change(dev);
721                                 }
722                         }
723                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
724                                 err = -EFAULT;
725                 } else
726                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
727                 break;
728
729         case SIOCDELTUNNEL:
730                 err = -EPERM;
731                 if (!capable(CAP_NET_ADMIN))
732                         goto done;
733
734                 if (dev == ipn->fb_tunnel_dev) {
735                         err = -EFAULT;
736                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
737                                 goto done;
738                         err = -ENOENT;
739                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
740                                 goto done;
741                         err = -EPERM;
742                         if (t->dev == ipn->fb_tunnel_dev)
743                                 goto done;
744                         dev = t->dev;
745                 }
746                 unregister_netdevice(dev);
747                 err = 0;
748                 break;
749
750         default:
751                 err = -EINVAL;
752         }
753
754 done:
755         return err;
756 }
757
758 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
759 {
760         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
761                 return -EINVAL;
762         dev->mtu = new_mtu;
763         return 0;
764 }
765
766 static const struct net_device_ops ipip_netdev_ops = {
767         .ndo_uninit     = ipip_tunnel_uninit,
768         .ndo_start_xmit = ipip_tunnel_xmit,
769         .ndo_do_ioctl   = ipip_tunnel_ioctl,
770         .ndo_change_mtu = ipip_tunnel_change_mtu,
771         .ndo_get_stats64 = ipip_get_stats64,
772 };
773
774 static void ipip_dev_free(struct net_device *dev)
775 {
776         free_percpu(dev->tstats);
777         free_netdev(dev);
778 }
779
780 #define IPIP_FEATURES (NETIF_F_SG |             \
781                        NETIF_F_FRAGLIST |       \
782                        NETIF_F_HIGHDMA |        \
783                        NETIF_F_HW_CSUM)
784
785 static void ipip_tunnel_setup(struct net_device *dev)
786 {
787         dev->netdev_ops         = &ipip_netdev_ops;
788         dev->destructor         = ipip_dev_free;
789
790         dev->type               = ARPHRD_TUNNEL;
791         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
792         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
793         dev->flags              = IFF_NOARP;
794         dev->iflink             = 0;
795         dev->addr_len           = 4;
796         dev->features           |= NETIF_F_NETNS_LOCAL;
797         dev->features           |= NETIF_F_LLTX;
798         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
799
800         dev->features           |= IPIP_FEATURES;
801         dev->hw_features        |= IPIP_FEATURES;
802 }
803
804 static int ipip_tunnel_init(struct net_device *dev)
805 {
806         struct ip_tunnel *tunnel = netdev_priv(dev);
807
808         tunnel->dev = dev;
809
810         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
811         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
812
813         ipip_tunnel_bind_dev(dev);
814
815         dev->tstats = alloc_percpu(struct pcpu_tstats);
816         if (!dev->tstats)
817                 return -ENOMEM;
818
819         return 0;
820 }
821
822 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
823 {
824         struct ip_tunnel *tunnel = netdev_priv(dev);
825         struct iphdr *iph = &tunnel->parms.iph;
826         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
827
828         tunnel->dev = dev;
829         strcpy(tunnel->parms.name, dev->name);
830
831         iph->version            = 4;
832         iph->protocol           = IPPROTO_IPIP;
833         iph->ihl                = 5;
834
835         dev->tstats = alloc_percpu(struct pcpu_tstats);
836         if (!dev->tstats)
837                 return -ENOMEM;
838
839         dev_hold(dev);
840         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
841         return 0;
842 }
843
844 static struct xfrm_tunnel ipip_handler __read_mostly = {
845         .handler        =       ipip_rcv,
846         .err_handler    =       ipip_err,
847         .priority       =       1,
848 };
849
850 static const char banner[] __initconst =
851         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
852
853 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
854 {
855         int prio;
856
857         for (prio = 1; prio < 4; prio++) {
858                 int h;
859                 for (h = 0; h < HASH_SIZE; h++) {
860                         struct ip_tunnel *t;
861
862                         t = rtnl_dereference(ipn->tunnels[prio][h]);
863                         while (t != NULL) {
864                                 unregister_netdevice_queue(t->dev, head);
865                                 t = rtnl_dereference(t->next);
866                         }
867                 }
868         }
869 }
870
871 static int __net_init ipip_init_net(struct net *net)
872 {
873         struct ipip_net *ipn = net_generic(net, ipip_net_id);
874         struct ip_tunnel *t;
875         int err;
876
877         ipn->tunnels[0] = ipn->tunnels_wc;
878         ipn->tunnels[1] = ipn->tunnels_l;
879         ipn->tunnels[2] = ipn->tunnels_r;
880         ipn->tunnels[3] = ipn->tunnels_r_l;
881
882         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
883                                            "tunl0",
884                                            ipip_tunnel_setup);
885         if (!ipn->fb_tunnel_dev) {
886                 err = -ENOMEM;
887                 goto err_alloc_dev;
888         }
889         dev_net_set(ipn->fb_tunnel_dev, net);
890
891         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
892         if (err)
893                 goto err_reg_dev;
894
895         if ((err = register_netdev(ipn->fb_tunnel_dev)))
896                 goto err_reg_dev;
897
898         t = netdev_priv(ipn->fb_tunnel_dev);
899
900         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
901         return 0;
902
903 err_reg_dev:
904         ipip_dev_free(ipn->fb_tunnel_dev);
905 err_alloc_dev:
906         /* nothing */
907         return err;
908 }
909
910 static void __net_exit ipip_exit_net(struct net *net)
911 {
912         struct ipip_net *ipn = net_generic(net, ipip_net_id);
913         LIST_HEAD(list);
914
915         rtnl_lock();
916         ipip_destroy_tunnels(ipn, &list);
917         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
918         unregister_netdevice_many(&list);
919         rtnl_unlock();
920 }
921
922 static struct pernet_operations ipip_net_ops = {
923         .init = ipip_init_net,
924         .exit = ipip_exit_net,
925         .id   = &ipip_net_id,
926         .size = sizeof(struct ipip_net),
927 };
928
929 static int __init ipip_init(void)
930 {
931         int err;
932
933         printk(banner);
934
935         err = register_pernet_device(&ipip_net_ops);
936         if (err < 0)
937                 return err;
938         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
939         if (err < 0) {
940                 unregister_pernet_device(&ipip_net_ops);
941                 pr_info("%s: can't register tunnel\n", __func__);
942         }
943         return err;
944 }
945
946 static void __exit ipip_fini(void)
947 {
948         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
949                 pr_info("%s: can't deregister tunnel\n", __func__);
950
951         unregister_pernet_device(&ipip_net_ops);
952 }
953
954 module_init(ipip_init);
955 module_exit(ipip_fini);
956 MODULE_LICENSE("GPL");
957 MODULE_ALIAS_NETDEV("tunl0");