net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <asm/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/mroute.h>
 107 #include <linux/init.h>
 108 #include <linux/netfilter_ipv4.h>
 109 #include <linux/if_ether.h>
 110
 111 #include <net/sock.h>
 112 #include <net/ip.h>
 113 #include <net/icmp.h>
 114 #include <net/ipip.h>
 115 #include <net/inet_ecn.h>
 116 #include <net/xfrm.h>
 117 #include <net/net_namespace.h>
 118 #include <net/netns/generic.h>
 119
 120 #define HASH_SIZE  16
 121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 122
 123 static int ipip_net_id __read_mostly;
 124 struct ipip_net {
 125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
 126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
 127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
 128         struct ip_tunnel __rcu *tunnels_wc[1];
 129         struct ip_tunnel __rcu **tunnels[4];
 130
 131         struct net_device *fb_tunnel_dev;
 132 };
 133
 134 static int ipip_tunnel_init(struct net_device *dev);
 135 static void ipip_tunnel_setup(struct net_device *dev);
 136 static void ipip_dev_free(struct net_device *dev);
 137
 138 /*
 139  * Locking : hash tables are protected by RCU and RTNL
 140  */
 141
 142 #define for_each_ip_tunnel_rcu(start) \
 143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 144
 145 /* often modified stats are per cpu, other are shared (netdev->stats) */
 146 struct pcpu_tstats {
 147         u64     rx_packets;
 148         u64     rx_bytes;
 149         u64     tx_packets;
 150         u64     tx_bytes;
 151         struct u64_stats_sync   syncp;
 152 };
 153
 154 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
 155                                                   struct rtnl_link_stats64 *tot)
 156 {
 157         int i;
 158
 159         for_each_possible_cpu(i) {
 160                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
 161                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
 162                 unsigned int start;
 163
 164                 do {
 165                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
 166                         rx_packets = tstats->rx_packets;
 167                         tx_packets = tstats->tx_packets;
 168                         rx_bytes = tstats->rx_bytes;
 169                         tx_bytes = tstats->tx_bytes;
 170                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
 171
 172                 tot->rx_packets += rx_packets;
 173                 tot->tx_packets += tx_packets;
 174                 tot->rx_bytes   += rx_bytes;
 175                 tot->tx_bytes   += tx_bytes;
 176         }
 177
 178         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 179         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 180         tot->tx_dropped = dev->stats.tx_dropped;
 181         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
 182         tot->tx_errors = dev->stats.tx_errors;
 183         tot->collisions = dev->stats.collisions;
 184
 185         return tot;
 186 }
 187
 188 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
 189                 __be32 remote, __be32 local)
 190 {
 191         unsigned int h0 = HASH(remote);
 192         unsigned int h1 = HASH(local);
 193         struct ip_tunnel *t;
 194         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 195
 196         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
 197                 if (local == t->parms.iph.saddr &&
 198                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 199                         return t;
 200
 201         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
 202                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
 203                         return t;
 204
 205         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
 206                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
 207                         return t;
 208
 209         t = rcu_dereference(ipn->tunnels_wc[0]);
 210         if (t && (t->dev->flags&IFF_UP))
 211                 return t;
 212         return NULL;
 213 }
 214
 215 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
 216                 struct ip_tunnel_parm *parms)
 217 {
 218         __be32 remote = parms->iph.daddr;
 219         __be32 local = parms->iph.saddr;
 220         unsigned int h = 0;
 221         int prio = 0;
 222
 223         if (remote) {
 224                 prio |= 2;
 225                 h ^= HASH(remote);
 226         }
 227         if (local) {
 228                 prio |= 1;
 229                 h ^= HASH(local);
 230         }
 231         return &ipn->tunnels[prio][h];
 232 }
 233
 234 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
 235                 struct ip_tunnel *t)
 236 {
 237         return __ipip_bucket(ipn, &t->parms);
 238 }
 239
 240 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 241 {
 242         struct ip_tunnel __rcu **tp;
 243         struct ip_tunnel *iter;
 244
 245         for (tp = ipip_bucket(ipn, t);
 246              (iter = rtnl_dereference(*tp)) != NULL;
 247              tp = &iter->next) {
 248                 if (t == iter) {
 249                         rcu_assign_pointer(*tp, t->next);
 250                         break;
 251                 }
 252         }
 253 }
 254
 255 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 256 {
 257         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 258
 259         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
 260         rcu_assign_pointer(*tp, t);
 261 }
 262
 263 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
 264                 struct ip_tunnel_parm *parms, int create)
 265 {
 266         __be32 remote = parms->iph.daddr;
 267         __be32 local = parms->iph.saddr;
 268         struct ip_tunnel *t, *nt;
 269         struct ip_tunnel __rcu **tp;
 270         struct net_device *dev;
 271         char name[IFNAMSIZ];
 272         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 273
 274         for (tp = __ipip_bucket(ipn, parms);
 275                  (t = rtnl_dereference(*tp)) != NULL;
 276                  tp = &t->next) {
 277                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 278                         return t;
 279         }
 280         if (!create)
 281                 return NULL;
 282
 283         if (parms->name[0])
 284                 strlcpy(name, parms->name, IFNAMSIZ);
 285         else
 286                 strcpy(name, "tunl%d");
 287
 288         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
 289         if (dev == NULL)
 290                 return NULL;
 291
 292         dev_net_set(dev, net);
 293
 294         nt = netdev_priv(dev);
 295         nt->parms = *parms;
 296
 297         if (ipip_tunnel_init(dev) < 0)
 298                 goto failed_free;
 299
 300         if (register_netdevice(dev) < 0)
 301                 goto failed_free;
 302
 303         strcpy(nt->parms.name, dev->name);
 304
 305         dev_hold(dev);
 306         ipip_tunnel_link(ipn, nt);
 307         return nt;
 308
 309 failed_free:
 310         ipip_dev_free(dev);
 311         return NULL;
 312 }
 313
 314 /* called with RTNL */
 315 static void ipip_tunnel_uninit(struct net_device *dev)
 316 {
 317         struct net *net = dev_net(dev);
 318         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 319
 320         if (dev == ipn->fb_tunnel_dev)
 321                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
 322         else
 323                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
 324         dev_put(dev);
 325 }
 326
 327 static int ipip_err(struct sk_buff *skb, u32 info)
 328 {
 329
 330 /* All the routers (except for Linux) return only
 331    8 bytes of packet payload. It means, that precise relaying of
 332    ICMP in the real Internet is absolutely infeasible.
 333  */
 334         const struct iphdr *iph = (const struct iphdr *)skb->data;
 335         const int type = icmp_hdr(skb)->type;
 336         const int code = icmp_hdr(skb)->code;
 337         struct ip_tunnel *t;
 338         int err;
 339
 340         switch (type) {
 341         default:
 342         case ICMP_PARAMETERPROB:
 343                 return 0;
 344
 345         case ICMP_DEST_UNREACH:
 346                 switch (code) {
 347                 case ICMP_SR_FAILED:
 348                 case ICMP_PORT_UNREACH:
 349                         /* Impossible event. */
 350                         return 0;
 351                 default:
 352                         /* All others are translated to HOST_UNREACH.
 353                            rfc2003 contains "deep thoughts" about NET_UNREACH,
 354                            I believe they are just ether pollution. --ANK
 355                          */
 356                         break;
 357                 }
 358                 break;
 359         case ICMP_TIME_EXCEEDED:
 360                 if (code != ICMP_EXC_TTL)
 361                         return 0;
 362                 break;
 363         case ICMP_REDIRECT:
 364                 break;
 365         }
 366
 367         err = -ENOENT;
 368         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
 369         if (t == NULL)
 370                 goto out;
 371
 372         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 373                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 374                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
 375                 err = 0;
 376                 goto out;
 377         }
 378
 379         if (type == ICMP_REDIRECT) {
 380                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
 381                               IPPROTO_IPIP, 0);
 382                 err = 0;
 383                 goto out;
 384         }
 385
 386         if (t->parms.iph.daddr == 0)
 387                 goto out;
 388
 389         err = 0;
 390         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 391                 goto out;
 392
 393         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 394                 t->err_count++;
 395         else
 396                 t->err_count = 1;
 397         t->err_time = jiffies;
 398 out:
 399
 400         return err;
 401 }
 402
 403 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
 404                                         struct sk_buff *skb)
 405 {
 406         struct iphdr *inner_iph = ip_hdr(skb);
 407
 408         if (INET_ECN_is_ce(outer_iph->tos))
 409                 IP_ECN_set_ce(inner_iph);
 410 }
 411
 412 static int ipip_rcv(struct sk_buff *skb)
 413 {
 414         struct ip_tunnel *tunnel;
 415         const struct iphdr *iph = ip_hdr(skb);
 416
 417         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 418         if (tunnel != NULL) {
 419                 struct pcpu_tstats *tstats;
 420
 421                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 422                         kfree_skb(skb);
 423                         return 0;
 424                 }
 425
 426                 secpath_reset(skb);
 427
 428                 skb->mac_header = skb->network_header;
 429                 skb_reset_network_header(skb);
 430                 skb->protocol = htons(ETH_P_IP);
 431                 skb->pkt_type = PACKET_HOST;
 432
 433                 tstats = this_cpu_ptr(tunnel->dev->tstats);
 434                 u64_stats_update_begin(&tstats->syncp);
 435                 tstats->rx_packets++;
 436                 tstats->rx_bytes += skb->len;
 437                 u64_stats_update_end(&tstats->syncp);
 438
 439                 __skb_tunnel_rx(skb, tunnel->dev);
 440
 441                 ipip_ecn_decapsulate(iph, skb);
 442
 443                 netif_rx(skb);
 444                 return 0;
 445         }
 446
 447         return -1;
 448 }
 449
 450 /*
 451  *      This function assumes it is being called from dev_queue_xmit()
 452  *      and that skb is filled properly by that function.
 453  */
 454
 455 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 456 {
 457         struct ip_tunnel *tunnel = netdev_priv(dev);
 458         struct pcpu_tstats *tstats;
 459         const struct iphdr  *tiph = &tunnel->parms.iph;
 460         u8     tos = tunnel->parms.iph.tos;
 461         __be16 df = tiph->frag_off;
 462         struct rtable *rt;                      /* Route to the other host */
 463         struct net_device *tdev;                /* Device to other host */
 464         const struct iphdr  *old_iph = ip_hdr(skb);
 465         struct iphdr  *iph;                     /* Our new IP header */
 466         unsigned int max_headroom;              /* The extra header space needed */
 467         __be32 dst = tiph->daddr;
 468         struct flowi4 fl4;
 469         int    mtu;
 470
 471         if (skb->protocol != htons(ETH_P_IP))
 472                 goto tx_error;
 473
 474         if (tos & 1)
 475                 tos = old_iph->tos;
 476
 477         if (!dst) {
 478                 /* NBMA tunnel */
 479                 if ((rt = skb_rtable(skb)) == NULL) {
 480                         dev->stats.tx_fifo_errors++;
 481                         goto tx_error;
 482                 }
 483                 dst = rt_nexthop(rt, old_iph->daddr);
 484         }
 485
 486         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 487                                    dst, tiph->saddr,
 488                                    0, 0,
 489                                    IPPROTO_IPIP, RT_TOS(tos),
 490                                    tunnel->parms.link);
 491         if (IS_ERR(rt)) {
 492                 dev->stats.tx_carrier_errors++;
 493                 goto tx_error_icmp;
 494         }
 495         tdev = rt->dst.dev;
 496
 497         if (tdev == dev) {
 498                 ip_rt_put(rt);
 499                 dev->stats.collisions++;
 500                 goto tx_error;
 501         }
 502
 503         df |= old_iph->frag_off & htons(IP_DF);
 504
 505         if (df) {
 506                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 507
 508                 if (mtu < 68) {
 509                         dev->stats.collisions++;
 510                         ip_rt_put(rt);
 511                         goto tx_error;
 512                 }
 513
 514                 if (skb_dst(skb))
 515                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 516
 517                 if ((old_iph->frag_off & htons(IP_DF)) &&
 518                     mtu < ntohs(old_iph->tot_len)) {
 519                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 520                                   htonl(mtu));
 521                         ip_rt_put(rt);
 522                         goto tx_error;
 523                 }
 524         }
 525
 526         if (tunnel->err_count > 0) {
 527                 if (time_before(jiffies,
 528                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
 529                         tunnel->err_count--;
 530                         dst_link_failure(skb);
 531                 } else
 532                         tunnel->err_count = 0;
 533         }
 534
 535         /*
 536          * Okay, now see if we can stuff it in the buffer as-is.
 537          */
 538         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
 539
 540         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
 541             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
 542                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
 543                 if (!new_skb) {
 544                         ip_rt_put(rt);
 545                         dev->stats.tx_dropped++;
 546                         dev_kfree_skb(skb);
 547                         return NETDEV_TX_OK;
 548                 }
 549                 if (skb->sk)
 550                         skb_set_owner_w(new_skb, skb->sk);
 551                 dev_kfree_skb(skb);
 552                 skb = new_skb;
 553                 old_iph = ip_hdr(skb);
 554         }
 555
 556         skb->transport_header = skb->network_header;
 557         skb_push(skb, sizeof(struct iphdr));
 558         skb_reset_network_header(skb);
 559         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 560         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 561                               IPSKB_REROUTED);
 562         skb_dst_drop(skb);
 563         skb_dst_set(skb, &rt->dst);
 564
 565         /*
 566          *      Push down and install the IPIP header.
 567          */
 568
 569         iph                     =       ip_hdr(skb);
 570         iph->version            =       4;
 571         iph->ihl                =       sizeof(struct iphdr)>>2;
 572         iph->frag_off           =       df;
 573         iph->protocol           =       IPPROTO_IPIP;
 574         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
 575         iph->daddr              =       fl4.daddr;
 576         iph->saddr              =       fl4.saddr;
 577
 578         if ((iph->ttl = tiph->ttl) == 0)
 579                 iph->ttl        =       old_iph->ttl;
 580
 581         nf_reset(skb);
 582         tstats = this_cpu_ptr(dev->tstats);
 583         __IPTUNNEL_XMIT(tstats, &dev->stats);
 584         return NETDEV_TX_OK;
 585
 586 tx_error_icmp:
 587         dst_link_failure(skb);
 588 tx_error:
 589         dev->stats.tx_errors++;
 590         dev_kfree_skb(skb);
 591         return NETDEV_TX_OK;
 592 }
 593
 594 static void ipip_tunnel_bind_dev(struct net_device *dev)
 595 {
 596         struct net_device *tdev = NULL;
 597         struct ip_tunnel *tunnel;
 598         const struct iphdr *iph;
 599
 600         tunnel = netdev_priv(dev);
 601         iph = &tunnel->parms.iph;
 602
 603         if (iph->daddr) {
 604                 struct rtable *rt;
 605                 struct flowi4 fl4;
 606
 607                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
 608                                            iph->daddr, iph->saddr,
 609                                            0, 0,
 610                                            IPPROTO_IPIP,
 611                                            RT_TOS(iph->tos),
 612                                            tunnel->parms.link);
 613                 if (!IS_ERR(rt)) {
 614                         tdev = rt->dst.dev;
 615                         ip_rt_put(rt);
 616                 }
 617                 dev->flags |= IFF_POINTOPOINT;
 618         }
 619
 620         if (!tdev && tunnel->parms.link)
 621                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
 622
 623         if (tdev) {
 624                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
 625                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
 626         }
 627         dev->iflink = tunnel->parms.link;
 628 }
 629
 630 static int
 631 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 632 {
 633         int err = 0;
 634         struct ip_tunnel_parm p;
 635         struct ip_tunnel *t;
 636         struct net *net = dev_net(dev);
 637         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 638
 639         switch (cmd) {
 640         case SIOCGETTUNNEL:
 641                 t = NULL;
 642                 if (dev == ipn->fb_tunnel_dev) {
 643                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
 644                                 err = -EFAULT;
 645                                 break;
 646                         }
 647                         t = ipip_tunnel_locate(net, &p, 0);
 648                 }
 649                 if (t == NULL)
 650                         t = netdev_priv(dev);
 651                 memcpy(&p, &t->parms, sizeof(p));
 652                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 653                         err = -EFAULT;
 654                 break;
 655
 656         case SIOCADDTUNNEL:
 657         case SIOCCHGTUNNEL:
 658                 err = -EPERM;
 659                 if (!capable(CAP_NET_ADMIN))
 660                         goto done;
 661
 662                 err = -EFAULT;
 663                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 664                         goto done;
 665
 666                 err = -EINVAL;
 667                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
 668                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 669                         goto done;
 670                 if (p.iph.ttl)
 671                         p.iph.frag_off |= htons(IP_DF);
 672
 673                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
 674
 675                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
 676                         if (t != NULL) {
 677                                 if (t->dev != dev) {
 678                                         err = -EEXIST;
 679                                         break;
 680                                 }
 681                         } else {
 682                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
 683                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
 684                                         err = -EINVAL;
 685                                         break;
 686                                 }
 687                                 t = netdev_priv(dev);
 688                                 ipip_tunnel_unlink(ipn, t);
 689                                 synchronize_net();
 690                                 t->parms.iph.saddr = p.iph.saddr;
 691                                 t->parms.iph.daddr = p.iph.daddr;
 692                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
 693                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
 694                                 ipip_tunnel_link(ipn, t);
 695                                 netdev_state_change(dev);
 696                         }
 697                 }
 698
 699                 if (t) {
 700                         err = 0;
 701                         if (cmd == SIOCCHGTUNNEL) {
 702                                 t->parms.iph.ttl = p.iph.ttl;
 703                                 t->parms.iph.tos = p.iph.tos;
 704                                 t->parms.iph.frag_off = p.iph.frag_off;
 705                                 if (t->parms.link != p.link) {
 706                                         t->parms.link = p.link;
 707                                         ipip_tunnel_bind_dev(dev);
 708                                         netdev_state_change(dev);
 709                                 }
 710                         }
 711                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
 712                                 err = -EFAULT;
 713                 } else
 714                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
 715                 break;
 716
 717         case SIOCDELTUNNEL:
 718                 err = -EPERM;
 719                 if (!capable(CAP_NET_ADMIN))
 720                         goto done;
 721
 722                 if (dev == ipn->fb_tunnel_dev) {
 723                         err = -EFAULT;
 724                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 725                                 goto done;
 726                         err = -ENOENT;
 727                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
 728                                 goto done;
 729                         err = -EPERM;
 730                         if (t->dev == ipn->fb_tunnel_dev)
 731                                 goto done;
 732                         dev = t->dev;
 733                 }
 734                 unregister_netdevice(dev);
 735                 err = 0;
 736                 break;
 737
 738         default:
 739                 err = -EINVAL;
 740         }
 741
 742 done:
 743         return err;
 744 }
 745
 746 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 747 {
 748         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
 749                 return -EINVAL;
 750         dev->mtu = new_mtu;
 751         return 0;
 752 }
 753
 754 static const struct net_device_ops ipip_netdev_ops = {
 755         .ndo_uninit     = ipip_tunnel_uninit,
 756         .ndo_start_xmit = ipip_tunnel_xmit,
 757         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 758         .ndo_change_mtu = ipip_tunnel_change_mtu,
 759         .ndo_get_stats64 = ipip_get_stats64,
 760 };
 761
 762 static void ipip_dev_free(struct net_device *dev)
 763 {
 764         free_percpu(dev->tstats);
 765         free_netdev(dev);
 766 }
 767
 768 static void ipip_tunnel_setup(struct net_device *dev)
 769 {
 770         dev->netdev_ops         = &ipip_netdev_ops;
 771         dev->destructor         = ipip_dev_free;
 772
 773         dev->type               = ARPHRD_TUNNEL;
 774         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
 775         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
 776         dev->flags              = IFF_NOARP;
 777         dev->iflink             = 0;
 778         dev->addr_len           = 4;
 779         dev->features           |= NETIF_F_NETNS_LOCAL;
 780         dev->features           |= NETIF_F_LLTX;
 781         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 782 }
 783
 784 static int ipip_tunnel_init(struct net_device *dev)
 785 {
 786         struct ip_tunnel *tunnel = netdev_priv(dev);
 787
 788         tunnel->dev = dev;
 789
 790         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 791         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 792
 793         ipip_tunnel_bind_dev(dev);
 794
 795         dev->tstats = alloc_percpu(struct pcpu_tstats);
 796         if (!dev->tstats)
 797                 return -ENOMEM;
 798
 799         return 0;
 800 }
 801
 802 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
 803 {
 804         struct ip_tunnel *tunnel = netdev_priv(dev);
 805         struct iphdr *iph = &tunnel->parms.iph;
 806         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
 807
 808         tunnel->dev = dev;
 809         strcpy(tunnel->parms.name, dev->name);
 810
 811         iph->version            = 4;
 812         iph->protocol           = IPPROTO_IPIP;
 813         iph->ihl                = 5;
 814
 815         dev->tstats = alloc_percpu(struct pcpu_tstats);
 816         if (!dev->tstats)
 817                 return -ENOMEM;
 818
 819         dev_hold(dev);
 820         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 821         return 0;
 822 }
 823
 824 static struct xfrm_tunnel ipip_handler __read_mostly = {
 825         .handler        =       ipip_rcv,
 826         .err_handler    =       ipip_err,
 827         .priority       =       1,
 828 };
 829
 830 static const char banner[] __initconst =
 831         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 832
 833 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 834 {
 835         int prio;
 836
 837         for (prio = 1; prio < 4; prio++) {
 838                 int h;
 839                 for (h = 0; h < HASH_SIZE; h++) {
 840                         struct ip_tunnel *t;
 841
 842                         t = rtnl_dereference(ipn->tunnels[prio][h]);
 843                         while (t != NULL) {
 844                                 unregister_netdevice_queue(t->dev, head);
 845                                 t = rtnl_dereference(t->next);
 846                         }
 847                 }
 848         }
 849 }
 850
 851 static int __net_init ipip_init_net(struct net *net)
 852 {
 853         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 854         struct ip_tunnel *t;
 855         int err;
 856
 857         ipn->tunnels[0] = ipn->tunnels_wc;
 858         ipn->tunnels[1] = ipn->tunnels_l;
 859         ipn->tunnels[2] = ipn->tunnels_r;
 860         ipn->tunnels[3] = ipn->tunnels_r_l;
 861
 862         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
 863                                            "tunl0",
 864                                            ipip_tunnel_setup);
 865         if (!ipn->fb_tunnel_dev) {
 866                 err = -ENOMEM;
 867                 goto err_alloc_dev;
 868         }
 869         dev_net_set(ipn->fb_tunnel_dev, net);
 870
 871         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
 872         if (err)
 873                 goto err_reg_dev;
 874
 875         if ((err = register_netdev(ipn->fb_tunnel_dev)))
 876                 goto err_reg_dev;
 877
 878         t = netdev_priv(ipn->fb_tunnel_dev);
 879
 880         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
 881         return 0;
 882
 883 err_reg_dev:
 884         ipip_dev_free(ipn->fb_tunnel_dev);
 885 err_alloc_dev:
 886         /* nothing */
 887         return err;
 888 }
 889
 890 static void __net_exit ipip_exit_net(struct net *net)
 891 {
 892         struct ipip_net *ipn = net_generic(net, ipip_net_id);
 893         LIST_HEAD(list);
 894
 895         rtnl_lock();
 896         ipip_destroy_tunnels(ipn, &list);
 897         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
 898         unregister_netdevice_many(&list);
 899         rtnl_unlock();
 900 }
 901
 902 static struct pernet_operations ipip_net_ops = {
 903         .init = ipip_init_net,
 904         .exit = ipip_exit_net,
 905         .id   = &ipip_net_id,
 906         .size = sizeof(struct ipip_net),
 907 };
 908
 909 static int __init ipip_init(void)
 910 {
 911         int err;
 912
 913         printk(banner);
 914
 915         err = register_pernet_device(&ipip_net_ops);
 916         if (err < 0)
 917                 return err;
 918         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 919         if (err < 0) {
 920                 unregister_pernet_device(&ipip_net_ops);
 921                 pr_info("%s: can't register tunnel\n", __func__);
 922         }
 923         return err;
 924 }
 925
 926 static void __exit ipip_fini(void)
 927 {
 928         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 929                 pr_info("%s: can't deregister tunnel\n", __func__);
 930
 931         unregister_pernet_device(&ipip_net_ops);
 932 }
 933
 934 module_init(ipip_init);
 935 module_exit(ipip_fini);
 936 MODULE_LICENSE("GPL");
 937 MODULE_ALIAS_NETDEV("tunl0");