]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/ipv4/ipip.c
Merge branch 'akpm' (Andrew's patch-bomb)
[can-eth-gw-linux.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144                                                   struct rtnl_link_stats64 *tot)
145 {
146         int i;
147
148         for_each_possible_cpu(i) {
149                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151                 unsigned int start;
152
153                 do {
154                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
155                         rx_packets = tstats->rx_packets;
156                         tx_packets = tstats->tx_packets;
157                         rx_bytes = tstats->rx_bytes;
158                         tx_bytes = tstats->tx_bytes;
159                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161                 tot->rx_packets += rx_packets;
162                 tot->tx_packets += tx_packets;
163                 tot->rx_bytes   += rx_bytes;
164                 tot->tx_bytes   += tx_bytes;
165         }
166
167         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169         tot->tx_dropped = dev->stats.tx_dropped;
170         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171         tot->tx_errors = dev->stats.tx_errors;
172         tot->collisions = dev->stats.collisions;
173
174         return tot;
175 }
176
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178                 __be32 remote, __be32 local)
179 {
180         unsigned int h0 = HASH(remote);
181         unsigned int h1 = HASH(local);
182         struct ip_tunnel *t;
183         struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185         for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186                 if (local == t->parms.iph.saddr &&
187                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196                         return t;
197
198         t = rcu_dereference(ipn->tunnels_wc[0]);
199         if (t && (t->dev->flags&IFF_UP))
200                 return t;
201         return NULL;
202 }
203
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205                 struct ip_tunnel_parm *parms)
206 {
207         __be32 remote = parms->iph.daddr;
208         __be32 local = parms->iph.saddr;
209         unsigned int h = 0;
210         int prio = 0;
211
212         if (remote) {
213                 prio |= 2;
214                 h ^= HASH(remote);
215         }
216         if (local) {
217                 prio |= 1;
218                 h ^= HASH(local);
219         }
220         return &ipn->tunnels[prio][h];
221 }
222
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224                 struct ip_tunnel *t)
225 {
226         return __ipip_bucket(ipn, &t->parms);
227 }
228
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231         struct ip_tunnel __rcu **tp;
232         struct ip_tunnel *iter;
233
234         for (tp = ipip_bucket(ipn, t);
235              (iter = rtnl_dereference(*tp)) != NULL;
236              tp = &iter->next) {
237                 if (t == iter) {
238                         rcu_assign_pointer(*tp, t->next);
239                         break;
240                 }
241         }
242 }
243
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249         rcu_assign_pointer(*tp, t);
250 }
251
252 static int ipip_tunnel_create(struct net_device *dev)
253 {
254         struct ip_tunnel *t = netdev_priv(dev);
255         struct net *net = dev_net(dev);
256         struct ipip_net *ipn = net_generic(net, ipip_net_id);
257         int err;
258
259         err = ipip_tunnel_init(dev);
260         if (err < 0)
261                 goto out;
262
263         err = register_netdevice(dev);
264         if (err < 0)
265                 goto out;
266
267         strcpy(t->parms.name, dev->name);
268         dev->rtnl_link_ops = &ipip_link_ops;
269
270         dev_hold(dev);
271         ipip_tunnel_link(ipn, t);
272         return 0;
273
274 out:
275         return err;
276 }
277
278 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279                 struct ip_tunnel_parm *parms, int create)
280 {
281         __be32 remote = parms->iph.daddr;
282         __be32 local = parms->iph.saddr;
283         struct ip_tunnel *t, *nt;
284         struct ip_tunnel __rcu **tp;
285         struct net_device *dev;
286         char name[IFNAMSIZ];
287         struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289         for (tp = __ipip_bucket(ipn, parms);
290                  (t = rtnl_dereference(*tp)) != NULL;
291                  tp = &t->next) {
292                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293                         return t;
294         }
295         if (!create)
296                 return NULL;
297
298         if (parms->name[0])
299                 strlcpy(name, parms->name, IFNAMSIZ);
300         else
301                 strcpy(name, "tunl%d");
302
303         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304         if (dev == NULL)
305                 return NULL;
306
307         dev_net_set(dev, net);
308
309         nt = netdev_priv(dev);
310         nt->parms = *parms;
311
312         if (ipip_tunnel_create(dev) < 0)
313                 goto failed_free;
314
315         return nt;
316
317 failed_free:
318         ipip_dev_free(dev);
319         return NULL;
320 }
321
322 /* called with RTNL */
323 static void ipip_tunnel_uninit(struct net_device *dev)
324 {
325         struct net *net = dev_net(dev);
326         struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328         if (dev == ipn->fb_tunnel_dev)
329                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330         else
331                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332         dev_put(dev);
333 }
334
335 static int ipip_err(struct sk_buff *skb, u32 info)
336 {
337
338 /* All the routers (except for Linux) return only
339    8 bytes of packet payload. It means, that precise relaying of
340    ICMP in the real Internet is absolutely infeasible.
341  */
342         const struct iphdr *iph = (const struct iphdr *)skb->data;
343         const int type = icmp_hdr(skb)->type;
344         const int code = icmp_hdr(skb)->code;
345         struct ip_tunnel *t;
346         int err;
347
348         switch (type) {
349         default:
350         case ICMP_PARAMETERPROB:
351                 return 0;
352
353         case ICMP_DEST_UNREACH:
354                 switch (code) {
355                 case ICMP_SR_FAILED:
356                 case ICMP_PORT_UNREACH:
357                         /* Impossible event. */
358                         return 0;
359                 default:
360                         /* All others are translated to HOST_UNREACH.
361                            rfc2003 contains "deep thoughts" about NET_UNREACH,
362                            I believe they are just ether pollution. --ANK
363                          */
364                         break;
365                 }
366                 break;
367         case ICMP_TIME_EXCEEDED:
368                 if (code != ICMP_EXC_TTL)
369                         return 0;
370                 break;
371         case ICMP_REDIRECT:
372                 break;
373         }
374
375         err = -ENOENT;
376         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
377         if (t == NULL)
378                 goto out;
379
380         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
381                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
382                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
383                 err = 0;
384                 goto out;
385         }
386
387         if (type == ICMP_REDIRECT) {
388                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
389                               IPPROTO_IPIP, 0);
390                 err = 0;
391                 goto out;
392         }
393
394         if (t->parms.iph.daddr == 0)
395                 goto out;
396
397         err = 0;
398         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
399                 goto out;
400
401         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
402                 t->err_count++;
403         else
404                 t->err_count = 1;
405         t->err_time = jiffies;
406 out:
407
408         return err;
409 }
410
411 static int ipip_rcv(struct sk_buff *skb)
412 {
413         struct ip_tunnel *tunnel;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418         if (tunnel != NULL) {
419                 struct pcpu_tstats *tstats;
420
421                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422                         goto drop;
423
424                 secpath_reset(skb);
425
426                 skb->mac_header = skb->network_header;
427                 skb_reset_network_header(skb);
428                 skb->protocol = htons(ETH_P_IP);
429                 skb->pkt_type = PACKET_HOST;
430
431                 __skb_tunnel_rx(skb, tunnel->dev);
432
433                 err = IP_ECN_decapsulate(iph, skb);
434                 if (unlikely(err)) {
435                         if (log_ecn_error)
436                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437                                                      &iph->saddr, iph->tos);
438                         if (err > 1) {
439                                 ++tunnel->dev->stats.rx_frame_errors;
440                                 ++tunnel->dev->stats.rx_errors;
441                                 goto drop;
442                         }
443                 }
444
445                 tstats = this_cpu_ptr(tunnel->dev->tstats);
446                 u64_stats_update_begin(&tstats->syncp);
447                 tstats->rx_packets++;
448                 tstats->rx_bytes += skb->len;
449                 u64_stats_update_end(&tstats->syncp);
450
451                 netif_rx(skb);
452                 return 0;
453         }
454
455         return -1;
456
457 drop:
458         kfree_skb(skb);
459         return 0;
460 }
461
462 /*
463  *      This function assumes it is being called from dev_queue_xmit()
464  *      and that skb is filled properly by that function.
465  */
466
467 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468 {
469         struct ip_tunnel *tunnel = netdev_priv(dev);
470         const struct iphdr  *tiph = &tunnel->parms.iph;
471         u8     tos = tunnel->parms.iph.tos;
472         __be16 df = tiph->frag_off;
473         struct rtable *rt;                      /* Route to the other host */
474         struct net_device *tdev;                /* Device to other host */
475         const struct iphdr  *old_iph = ip_hdr(skb);
476         struct iphdr  *iph;                     /* Our new IP header */
477         unsigned int max_headroom;              /* The extra header space needed */
478         __be32 dst = tiph->daddr;
479         struct flowi4 fl4;
480         int    mtu;
481
482         if (skb->protocol != htons(ETH_P_IP))
483                 goto tx_error;
484
485         if (skb->ip_summed == CHECKSUM_PARTIAL &&
486             skb_checksum_help(skb))
487                 goto tx_error;
488
489         if (tos & 1)
490                 tos = old_iph->tos;
491
492         if (!dst) {
493                 /* NBMA tunnel */
494                 if ((rt = skb_rtable(skb)) == NULL) {
495                         dev->stats.tx_fifo_errors++;
496                         goto tx_error;
497                 }
498                 dst = rt_nexthop(rt, old_iph->daddr);
499         }
500
501         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
502                                    dst, tiph->saddr,
503                                    0, 0,
504                                    IPPROTO_IPIP, RT_TOS(tos),
505                                    tunnel->parms.link);
506         if (IS_ERR(rt)) {
507                 dev->stats.tx_carrier_errors++;
508                 goto tx_error_icmp;
509         }
510         tdev = rt->dst.dev;
511
512         if (tdev == dev) {
513                 ip_rt_put(rt);
514                 dev->stats.collisions++;
515                 goto tx_error;
516         }
517
518         df |= old_iph->frag_off & htons(IP_DF);
519
520         if (df) {
521                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
522
523                 if (mtu < 68) {
524                         dev->stats.collisions++;
525                         ip_rt_put(rt);
526                         goto tx_error;
527                 }
528
529                 if (skb_dst(skb))
530                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
531
532                 if ((old_iph->frag_off & htons(IP_DF)) &&
533                     mtu < ntohs(old_iph->tot_len)) {
534                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
535                                   htonl(mtu));
536                         ip_rt_put(rt);
537                         goto tx_error;
538                 }
539         }
540
541         if (tunnel->err_count > 0) {
542                 if (time_before(jiffies,
543                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
544                         tunnel->err_count--;
545                         dst_link_failure(skb);
546                 } else
547                         tunnel->err_count = 0;
548         }
549
550         /*
551          * Okay, now see if we can stuff it in the buffer as-is.
552          */
553         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
554
555         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
556             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
557                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
558                 if (!new_skb) {
559                         ip_rt_put(rt);
560                         dev->stats.tx_dropped++;
561                         dev_kfree_skb(skb);
562                         return NETDEV_TX_OK;
563                 }
564                 if (skb->sk)
565                         skb_set_owner_w(new_skb, skb->sk);
566                 dev_kfree_skb(skb);
567                 skb = new_skb;
568                 old_iph = ip_hdr(skb);
569         }
570
571         skb->transport_header = skb->network_header;
572         skb_push(skb, sizeof(struct iphdr));
573         skb_reset_network_header(skb);
574         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
575         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
576                               IPSKB_REROUTED);
577         skb_dst_drop(skb);
578         skb_dst_set(skb, &rt->dst);
579
580         /*
581          *      Push down and install the IPIP header.
582          */
583
584         iph                     =       ip_hdr(skb);
585         iph->version            =       4;
586         iph->ihl                =       sizeof(struct iphdr)>>2;
587         iph->frag_off           =       df;
588         iph->protocol           =       IPPROTO_IPIP;
589         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
590         iph->daddr              =       fl4.daddr;
591         iph->saddr              =       fl4.saddr;
592
593         if ((iph->ttl = tiph->ttl) == 0)
594                 iph->ttl        =       old_iph->ttl;
595
596         iptunnel_xmit(skb, dev);
597         return NETDEV_TX_OK;
598
599 tx_error_icmp:
600         dst_link_failure(skb);
601 tx_error:
602         dev->stats.tx_errors++;
603         dev_kfree_skb(skb);
604         return NETDEV_TX_OK;
605 }
606
607 static void ipip_tunnel_bind_dev(struct net_device *dev)
608 {
609         struct net_device *tdev = NULL;
610         struct ip_tunnel *tunnel;
611         const struct iphdr *iph;
612
613         tunnel = netdev_priv(dev);
614         iph = &tunnel->parms.iph;
615
616         if (iph->daddr) {
617                 struct rtable *rt;
618                 struct flowi4 fl4;
619
620                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
621                                            iph->daddr, iph->saddr,
622                                            0, 0,
623                                            IPPROTO_IPIP,
624                                            RT_TOS(iph->tos),
625                                            tunnel->parms.link);
626                 if (!IS_ERR(rt)) {
627                         tdev = rt->dst.dev;
628                         ip_rt_put(rt);
629                 }
630                 dev->flags |= IFF_POINTOPOINT;
631         }
632
633         if (!tdev && tunnel->parms.link)
634                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
635
636         if (tdev) {
637                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
638                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
639         }
640         dev->iflink = tunnel->parms.link;
641 }
642
643 static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
644 {
645         struct net *net = dev_net(t->dev);
646         struct ipip_net *ipn = net_generic(net, ipip_net_id);
647
648         ipip_tunnel_unlink(ipn, t);
649         synchronize_net();
650         t->parms.iph.saddr = p->iph.saddr;
651         t->parms.iph.daddr = p->iph.daddr;
652         memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
653         memcpy(t->dev->broadcast, &p->iph.daddr, 4);
654         ipip_tunnel_link(ipn, t);
655         t->parms.iph.ttl = p->iph.ttl;
656         t->parms.iph.tos = p->iph.tos;
657         t->parms.iph.frag_off = p->iph.frag_off;
658         if (t->parms.link != p->link) {
659                 t->parms.link = p->link;
660                 ipip_tunnel_bind_dev(t->dev);
661         }
662         netdev_state_change(t->dev);
663 }
664
665 static int
666 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
667 {
668         int err = 0;
669         struct ip_tunnel_parm p;
670         struct ip_tunnel *t;
671         struct net *net = dev_net(dev);
672         struct ipip_net *ipn = net_generic(net, ipip_net_id);
673
674         switch (cmd) {
675         case SIOCGETTUNNEL:
676                 t = NULL;
677                 if (dev == ipn->fb_tunnel_dev) {
678                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
679                                 err = -EFAULT;
680                                 break;
681                         }
682                         t = ipip_tunnel_locate(net, &p, 0);
683                 }
684                 if (t == NULL)
685                         t = netdev_priv(dev);
686                 memcpy(&p, &t->parms, sizeof(p));
687                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
688                         err = -EFAULT;
689                 break;
690
691         case SIOCADDTUNNEL:
692         case SIOCCHGTUNNEL:
693                 err = -EPERM;
694                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
695                         goto done;
696
697                 err = -EFAULT;
698                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
699                         goto done;
700
701                 err = -EINVAL;
702                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
703                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
704                         goto done;
705                 if (p.iph.ttl)
706                         p.iph.frag_off |= htons(IP_DF);
707
708                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
709
710                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
711                         if (t != NULL) {
712                                 if (t->dev != dev) {
713                                         err = -EEXIST;
714                                         break;
715                                 }
716                         } else {
717                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
718                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
719                                         err = -EINVAL;
720                                         break;
721                                 }
722                                 t = netdev_priv(dev);
723                         }
724
725                         ipip_tunnel_update(t, &p);
726                 }
727
728                 if (t) {
729                         err = 0;
730                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
731                                 err = -EFAULT;
732                 } else
733                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
734                 break;
735
736         case SIOCDELTUNNEL:
737                 err = -EPERM;
738                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
739                         goto done;
740
741                 if (dev == ipn->fb_tunnel_dev) {
742                         err = -EFAULT;
743                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
744                                 goto done;
745                         err = -ENOENT;
746                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
747                                 goto done;
748                         err = -EPERM;
749                         if (t->dev == ipn->fb_tunnel_dev)
750                                 goto done;
751                         dev = t->dev;
752                 }
753                 unregister_netdevice(dev);
754                 err = 0;
755                 break;
756
757         default:
758                 err = -EINVAL;
759         }
760
761 done:
762         return err;
763 }
764
765 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
766 {
767         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
768                 return -EINVAL;
769         dev->mtu = new_mtu;
770         return 0;
771 }
772
773 static const struct net_device_ops ipip_netdev_ops = {
774         .ndo_uninit     = ipip_tunnel_uninit,
775         .ndo_start_xmit = ipip_tunnel_xmit,
776         .ndo_do_ioctl   = ipip_tunnel_ioctl,
777         .ndo_change_mtu = ipip_tunnel_change_mtu,
778         .ndo_get_stats64 = ipip_get_stats64,
779 };
780
781 static void ipip_dev_free(struct net_device *dev)
782 {
783         free_percpu(dev->tstats);
784         free_netdev(dev);
785 }
786
787 #define IPIP_FEATURES (NETIF_F_SG |             \
788                        NETIF_F_FRAGLIST |       \
789                        NETIF_F_HIGHDMA |        \
790                        NETIF_F_HW_CSUM)
791
792 static void ipip_tunnel_setup(struct net_device *dev)
793 {
794         dev->netdev_ops         = &ipip_netdev_ops;
795         dev->destructor         = ipip_dev_free;
796
797         dev->type               = ARPHRD_TUNNEL;
798         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
799         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
800         dev->flags              = IFF_NOARP;
801         dev->iflink             = 0;
802         dev->addr_len           = 4;
803         dev->features           |= NETIF_F_NETNS_LOCAL;
804         dev->features           |= NETIF_F_LLTX;
805         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
806
807         dev->features           |= IPIP_FEATURES;
808         dev->hw_features        |= IPIP_FEATURES;
809 }
810
811 static int ipip_tunnel_init(struct net_device *dev)
812 {
813         struct ip_tunnel *tunnel = netdev_priv(dev);
814
815         tunnel->dev = dev;
816
817         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
818         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
819
820         ipip_tunnel_bind_dev(dev);
821
822         dev->tstats = alloc_percpu(struct pcpu_tstats);
823         if (!dev->tstats)
824                 return -ENOMEM;
825
826         return 0;
827 }
828
829 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
830 {
831         struct ip_tunnel *tunnel = netdev_priv(dev);
832         struct iphdr *iph = &tunnel->parms.iph;
833         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
834
835         tunnel->dev = dev;
836         strcpy(tunnel->parms.name, dev->name);
837
838         iph->version            = 4;
839         iph->protocol           = IPPROTO_IPIP;
840         iph->ihl                = 5;
841
842         dev->tstats = alloc_percpu(struct pcpu_tstats);
843         if (!dev->tstats)
844                 return -ENOMEM;
845
846         dev_hold(dev);
847         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
848         return 0;
849 }
850
851 static void ipip_netlink_parms(struct nlattr *data[],
852                                struct ip_tunnel_parm *parms)
853 {
854         memset(parms, 0, sizeof(*parms));
855
856         parms->iph.version = 4;
857         parms->iph.protocol = IPPROTO_IPIP;
858         parms->iph.ihl = 5;
859
860         if (!data)
861                 return;
862
863         if (data[IFLA_IPTUN_LINK])
864                 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
865
866         if (data[IFLA_IPTUN_LOCAL])
867                 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
868
869         if (data[IFLA_IPTUN_REMOTE])
870                 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
871
872         if (data[IFLA_IPTUN_TTL]) {
873                 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
874                 if (parms->iph.ttl)
875                         parms->iph.frag_off = htons(IP_DF);
876         }
877
878         if (data[IFLA_IPTUN_TOS])
879                 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
880
881         if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
882                 parms->iph.frag_off = htons(IP_DF);
883 }
884
885 static int ipip_newlink(struct net *src_net, struct net_device *dev,
886                         struct nlattr *tb[], struct nlattr *data[])
887 {
888         struct net *net = dev_net(dev);
889         struct ip_tunnel *nt;
890
891         nt = netdev_priv(dev);
892         ipip_netlink_parms(data, &nt->parms);
893
894         if (ipip_tunnel_locate(net, &nt->parms, 0))
895                 return -EEXIST;
896
897         return ipip_tunnel_create(dev);
898 }
899
900 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
901                            struct nlattr *data[])
902 {
903         struct ip_tunnel *t;
904         struct ip_tunnel_parm p;
905         struct net *net = dev_net(dev);
906         struct ipip_net *ipn = net_generic(net, ipip_net_id);
907
908         if (dev == ipn->fb_tunnel_dev)
909                 return -EINVAL;
910
911         ipip_netlink_parms(data, &p);
912
913         if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
914             (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
915                 return -EINVAL;
916
917         t = ipip_tunnel_locate(net, &p, 0);
918
919         if (t) {
920                 if (t->dev != dev)
921                         return -EEXIST;
922         } else
923                 t = netdev_priv(dev);
924
925         ipip_tunnel_update(t, &p);
926         return 0;
927 }
928
929 static size_t ipip_get_size(const struct net_device *dev)
930 {
931         return
932                 /* IFLA_IPTUN_LINK */
933                 nla_total_size(4) +
934                 /* IFLA_IPTUN_LOCAL */
935                 nla_total_size(4) +
936                 /* IFLA_IPTUN_REMOTE */
937                 nla_total_size(4) +
938                 /* IFLA_IPTUN_TTL */
939                 nla_total_size(1) +
940                 /* IFLA_IPTUN_TOS */
941                 nla_total_size(1) +
942                 /* IFLA_IPTUN_PMTUDISC */
943                 nla_total_size(1) +
944                 0;
945 }
946
947 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
948 {
949         struct ip_tunnel *tunnel = netdev_priv(dev);
950         struct ip_tunnel_parm *parm = &tunnel->parms;
951
952         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
953             nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
954             nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
955             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
956             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
957             nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
958                        !!(parm->iph.frag_off & htons(IP_DF))))
959                 goto nla_put_failure;
960         return 0;
961
962 nla_put_failure:
963         return -EMSGSIZE;
964 }
965
966 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
967         [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
968         [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
969         [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
970         [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
971         [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
972         [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
973 };
974
975 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
976         .kind           = "ipip",
977         .maxtype        = IFLA_IPTUN_MAX,
978         .policy         = ipip_policy,
979         .priv_size      = sizeof(struct ip_tunnel),
980         .setup          = ipip_tunnel_setup,
981         .newlink        = ipip_newlink,
982         .changelink     = ipip_changelink,
983         .get_size       = ipip_get_size,
984         .fill_info      = ipip_fill_info,
985 };
986
987 static struct xfrm_tunnel ipip_handler __read_mostly = {
988         .handler        =       ipip_rcv,
989         .err_handler    =       ipip_err,
990         .priority       =       1,
991 };
992
993 static const char banner[] __initconst =
994         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
995
996 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
997 {
998         int prio;
999
1000         for (prio = 1; prio < 4; prio++) {
1001                 int h;
1002                 for (h = 0; h < HASH_SIZE; h++) {
1003                         struct ip_tunnel *t;
1004
1005                         t = rtnl_dereference(ipn->tunnels[prio][h]);
1006                         while (t != NULL) {
1007                                 unregister_netdevice_queue(t->dev, head);
1008                                 t = rtnl_dereference(t->next);
1009                         }
1010                 }
1011         }
1012 }
1013
1014 static int __net_init ipip_init_net(struct net *net)
1015 {
1016         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1017         struct ip_tunnel *t;
1018         int err;
1019
1020         ipn->tunnels[0] = ipn->tunnels_wc;
1021         ipn->tunnels[1] = ipn->tunnels_l;
1022         ipn->tunnels[2] = ipn->tunnels_r;
1023         ipn->tunnels[3] = ipn->tunnels_r_l;
1024
1025         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1026                                            "tunl0",
1027                                            ipip_tunnel_setup);
1028         if (!ipn->fb_tunnel_dev) {
1029                 err = -ENOMEM;
1030                 goto err_alloc_dev;
1031         }
1032         dev_net_set(ipn->fb_tunnel_dev, net);
1033
1034         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1035         if (err)
1036                 goto err_reg_dev;
1037
1038         if ((err = register_netdev(ipn->fb_tunnel_dev)))
1039                 goto err_reg_dev;
1040
1041         t = netdev_priv(ipn->fb_tunnel_dev);
1042
1043         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1044         return 0;
1045
1046 err_reg_dev:
1047         ipip_dev_free(ipn->fb_tunnel_dev);
1048 err_alloc_dev:
1049         /* nothing */
1050         return err;
1051 }
1052
1053 static void __net_exit ipip_exit_net(struct net *net)
1054 {
1055         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1056         LIST_HEAD(list);
1057
1058         rtnl_lock();
1059         ipip_destroy_tunnels(ipn, &list);
1060         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1061         unregister_netdevice_many(&list);
1062         rtnl_unlock();
1063 }
1064
1065 static struct pernet_operations ipip_net_ops = {
1066         .init = ipip_init_net,
1067         .exit = ipip_exit_net,
1068         .id   = &ipip_net_id,
1069         .size = sizeof(struct ipip_net),
1070 };
1071
1072 static int __init ipip_init(void)
1073 {
1074         int err;
1075
1076         printk(banner);
1077
1078         err = register_pernet_device(&ipip_net_ops);
1079         if (err < 0)
1080                 return err;
1081         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1082         if (err < 0) {
1083                 pr_info("%s: can't register tunnel\n", __func__);
1084                 goto xfrm_tunnel_failed;
1085         }
1086         err = rtnl_link_register(&ipip_link_ops);
1087         if (err < 0)
1088                 goto rtnl_link_failed;
1089
1090 out:
1091         return err;
1092
1093 rtnl_link_failed:
1094         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1095 xfrm_tunnel_failed:
1096         unregister_pernet_device(&ipip_net_ops);
1097         goto out;
1098 }
1099
1100 static void __exit ipip_fini(void)
1101 {
1102         rtnl_link_unregister(&ipip_link_ops);
1103         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1104                 pr_info("%s: can't deregister tunnel\n", __func__);
1105
1106         unregister_pernet_device(&ipip_net_ops);
1107 }
1108
1109 module_init(ipip_init);
1110 module_exit(ipip_fini);
1111 MODULE_LICENSE("GPL");
1112 MODULE_ALIAS_NETDEV("tunl0");