2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
51 #if IS_ENABLED(CONFIG_IPV6)
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
69 and silently drop packet when it expires. It is a good
70 solution, but it supposes maintaining new variable in ALL
71 skb, even if no tunneling is used.
73 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
96 taking into account fragmentation. TO be short, ttl is not solution at all.
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
103 rapidly degrades to value <68, where looping stops.
104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
123 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
124 static int ipgre_tunnel_init(struct net_device *dev);
125 static void ipgre_tunnel_setup(struct net_device *dev);
126 static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 /* Fallback tunnel: no source, no destination, no key, no options */
132 static int ipgre_net_id __read_mostly;
134 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
136 struct net_device *fb_tunnel_dev;
139 /* Tunnel hash table */
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
157 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
159 #define tunnels_r_l tunnels[3]
160 #define tunnels_r tunnels[2]
161 #define tunnels_l tunnels[1]
162 #define tunnels_wc tunnels[0]
164 * Locking : hash tables are protected by RCU and RTNL
167 #define for_each_ip_tunnel_rcu(start) \
168 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
170 /* often modified stats are per cpu, other are shared (netdev->stats) */
176 struct u64_stats_sync syncp;
179 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
180 struct rtnl_link_stats64 *tot)
184 for_each_possible_cpu(i) {
185 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
186 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
190 start = u64_stats_fetch_begin_bh(&tstats->syncp);
191 rx_packets = tstats->rx_packets;
192 tx_packets = tstats->tx_packets;
193 rx_bytes = tstats->rx_bytes;
194 tx_bytes = tstats->tx_bytes;
195 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
197 tot->rx_packets += rx_packets;
198 tot->tx_packets += tx_packets;
199 tot->rx_bytes += rx_bytes;
200 tot->tx_bytes += tx_bytes;
203 tot->multicast = dev->stats.multicast;
204 tot->rx_crc_errors = dev->stats.rx_crc_errors;
205 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
206 tot->rx_length_errors = dev->stats.rx_length_errors;
207 tot->rx_errors = dev->stats.rx_errors;
208 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
209 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
210 tot->tx_dropped = dev->stats.tx_dropped;
211 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
212 tot->tx_errors = dev->stats.tx_errors;
217 /* Given src, dst and key, find appropriate for input tunnel. */
219 static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
220 __be32 remote, __be32 local,
221 __be32 key, __be16 gre_proto)
223 struct net *net = dev_net(dev);
224 int link = dev->ifindex;
225 unsigned int h0 = HASH(remote);
226 unsigned int h1 = HASH(key);
227 struct ip_tunnel *t, *cand = NULL;
228 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
229 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
230 ARPHRD_ETHER : ARPHRD_IPGRE;
231 int score, cand_score = 4;
233 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
234 if (local != t->parms.iph.saddr ||
235 remote != t->parms.iph.daddr ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
245 if (t->parms.link != link)
247 if (t->dev->type != dev_type)
252 if (score < cand_score) {
258 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
259 if (remote != t->parms.iph.daddr ||
260 key != t->parms.i_key ||
261 !(t->dev->flags & IFF_UP))
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
269 if (t->parms.link != link)
271 if (t->dev->type != dev_type)
276 if (score < cand_score) {
282 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
283 if ((local != t->parms.iph.saddr &&
284 (local != t->parms.iph.daddr ||
285 !ipv4_is_multicast(local))) ||
286 key != t->parms.i_key ||
287 !(t->dev->flags & IFF_UP))
290 if (t->dev->type != ARPHRD_IPGRE &&
291 t->dev->type != dev_type)
295 if (t->parms.link != link)
297 if (t->dev->type != dev_type)
302 if (score < cand_score) {
308 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
309 if (t->parms.i_key != key ||
310 !(t->dev->flags & IFF_UP))
313 if (t->dev->type != ARPHRD_IPGRE &&
314 t->dev->type != dev_type)
318 if (t->parms.link != link)
320 if (t->dev->type != dev_type)
325 if (score < cand_score) {
334 dev = ign->fb_tunnel_dev;
335 if (dev->flags & IFF_UP)
336 return netdev_priv(dev);
341 static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
342 struct ip_tunnel_parm *parms)
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
347 unsigned int h = HASH(key);
352 if (remote && !ipv4_is_multicast(remote)) {
357 return &ign->tunnels[prio][h];
360 static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
363 return __ipgre_bucket(ign, &t->parms);
366 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
368 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
370 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
371 rcu_assign_pointer(*tp, t);
374 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
376 struct ip_tunnel __rcu **tp;
377 struct ip_tunnel *iter;
379 for (tp = ipgre_bucket(ign, t);
380 (iter = rtnl_dereference(*tp)) != NULL;
383 rcu_assign_pointer(*tp, t->next);
389 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
390 struct ip_tunnel_parm *parms,
393 __be32 remote = parms->iph.daddr;
394 __be32 local = parms->iph.saddr;
395 __be32 key = parms->i_key;
396 int link = parms->link;
398 struct ip_tunnel __rcu **tp;
399 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
401 for (tp = __ipgre_bucket(ign, parms);
402 (t = rtnl_dereference(*tp)) != NULL;
404 if (local == t->parms.iph.saddr &&
405 remote == t->parms.iph.daddr &&
406 key == t->parms.i_key &&
407 link == t->parms.link &&
408 type == t->dev->type)
414 static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
415 struct ip_tunnel_parm *parms, int create)
417 struct ip_tunnel *t, *nt;
418 struct net_device *dev;
420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
422 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
427 strlcpy(name, parms->name, IFNAMSIZ);
429 strcpy(name, "gre%d");
431 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
435 dev_net_set(dev, net);
437 nt = netdev_priv(dev);
439 dev->rtnl_link_ops = &ipgre_link_ops;
441 dev->mtu = ipgre_tunnel_bind_dev(dev);
443 if (register_netdevice(dev) < 0)
446 /* Can use a lockless transmit, unless we generate output sequences */
447 if (!(nt->parms.o_flags & GRE_SEQ))
448 dev->features |= NETIF_F_LLTX;
451 ipgre_tunnel_link(ign, nt);
459 static void ipgre_tunnel_uninit(struct net_device *dev)
461 struct net *net = dev_net(dev);
462 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
464 ipgre_tunnel_unlink(ign, netdev_priv(dev));
469 static void ipgre_err(struct sk_buff *skb, u32 info)
472 /* All the routers (except for Linux) return only
473 8 bytes of packet payload. It means, that precise relaying of
474 ICMP in the real Internet is absolutely infeasible.
476 Moreover, Cisco "wise men" put GRE key to the third word
477 in GRE header. It makes impossible maintaining even soft state for keyed
478 GRE tunnels with enabled checksum. Tell them "thank you".
480 Well, I wonder, rfc1812 was written by Cisco employee,
481 what the hell these idiots break standards established
485 const struct iphdr *iph = (const struct iphdr *)skb->data;
486 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
487 int grehlen = (iph->ihl<<2) + 4;
488 const int type = icmp_hdr(skb)->type;
489 const int code = icmp_hdr(skb)->code;
494 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
495 if (flags&(GRE_VERSION|GRE_ROUTING))
504 /* If only 8 bytes returned, keyed message will be dropped here */
505 if (skb_headlen(skb) < grehlen)
510 case ICMP_PARAMETERPROB:
513 case ICMP_DEST_UNREACH:
516 case ICMP_PORT_UNREACH:
517 /* Impossible event. */
520 /* All others are translated to HOST_UNREACH.
521 rfc2003 contains "deep thoughts" about NET_UNREACH,
522 I believe they are just ether pollution. --ANK
527 case ICMP_TIME_EXCEEDED:
528 if (code != ICMP_EXC_TTL)
537 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
539 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
544 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
545 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
546 t->parms.link, 0, IPPROTO_GRE, 0);
549 if (type == ICMP_REDIRECT) {
550 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
554 if (t->parms.iph.daddr == 0 ||
555 ipv4_is_multicast(t->parms.iph.daddr))
558 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
561 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
565 t->err_time = jiffies;
570 static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
572 if (INET_ECN_is_ce(iph->tos)) {
573 if (skb->protocol == htons(ETH_P_IP)) {
574 IP_ECN_set_ce(ip_hdr(skb));
575 } else if (skb->protocol == htons(ETH_P_IPV6)) {
576 IP6_ECN_set_ce(ipv6_hdr(skb));
582 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
589 return INET_ECN_encapsulate(tos, inner);
592 static int ipgre_rcv(struct sk_buff *skb)
594 const struct iphdr *iph;
600 struct ip_tunnel *tunnel;
604 if (!pskb_may_pull(skb, 16))
609 flags = *(__be16 *)h;
611 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
612 /* - Version must be 0.
613 - We do not support routing headers.
615 if (flags&(GRE_VERSION|GRE_ROUTING))
618 if (flags&GRE_CSUM) {
619 switch (skb->ip_summed) {
620 case CHECKSUM_COMPLETE:
621 csum = csum_fold(skb->csum);
627 csum = __skb_checksum_complete(skb);
628 skb->ip_summed = CHECKSUM_COMPLETE;
633 key = *(__be32 *)(h + offset);
637 seqno = ntohl(*(__be32 *)(h + offset));
642 gre_proto = *(__be16 *)(h + 2);
645 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
646 iph->saddr, iph->daddr, key,
648 struct pcpu_tstats *tstats;
652 skb->protocol = gre_proto;
653 /* WCCP version 1 and 2 protocol decoding.
654 * - Change protocol to IP
655 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
658 skb->protocol = htons(ETH_P_IP);
659 if ((*(h + offset) & 0xF0) != 0x40)
663 skb->mac_header = skb->network_header;
664 __pskb_pull(skb, offset);
665 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
666 skb->pkt_type = PACKET_HOST;
667 #ifdef CONFIG_NET_IPGRE_BROADCAST
668 if (ipv4_is_multicast(iph->daddr)) {
669 /* Looped back packet, drop it! */
670 if (rt_is_output_route(skb_rtable(skb)))
672 tunnel->dev->stats.multicast++;
673 skb->pkt_type = PACKET_BROADCAST;
677 if (((flags&GRE_CSUM) && csum) ||
678 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
679 tunnel->dev->stats.rx_crc_errors++;
680 tunnel->dev->stats.rx_errors++;
683 if (tunnel->parms.i_flags&GRE_SEQ) {
684 if (!(flags&GRE_SEQ) ||
685 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
686 tunnel->dev->stats.rx_fifo_errors++;
687 tunnel->dev->stats.rx_errors++;
690 tunnel->i_seqno = seqno + 1;
693 /* Warning: All skb pointers will be invalidated! */
694 if (tunnel->dev->type == ARPHRD_ETHER) {
695 if (!pskb_may_pull(skb, ETH_HLEN)) {
696 tunnel->dev->stats.rx_length_errors++;
697 tunnel->dev->stats.rx_errors++;
702 skb->protocol = eth_type_trans(skb, tunnel->dev);
703 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
706 tstats = this_cpu_ptr(tunnel->dev->tstats);
707 u64_stats_update_begin(&tstats->syncp);
708 tstats->rx_packets++;
709 tstats->rx_bytes += skb->len;
710 u64_stats_update_end(&tstats->syncp);
712 __skb_tunnel_rx(skb, tunnel->dev);
714 skb_reset_network_header(skb);
715 ipgre_ecn_decapsulate(iph, skb);
722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
731 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
733 struct ip_tunnel *tunnel = netdev_priv(dev);
734 struct pcpu_tstats *tstats;
735 const struct iphdr *old_iph = ip_hdr(skb);
736 const struct iphdr *tiph;
740 struct rtable *rt; /* Route to the other host */
741 struct net_device *tdev; /* Device to other host */
742 struct iphdr *iph; /* Our new IP header */
743 unsigned int max_headroom; /* The extra header space needed */
748 if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 skb_checksum_help(skb))
752 if (dev->type == ARPHRD_ETHER)
753 IPCB(skb)->flags = 0;
755 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
757 tiph = (const struct iphdr *)skb->data;
759 gre_hlen = tunnel->hlen;
760 tiph = &tunnel->parms.iph;
763 if ((dst = tiph->daddr) == 0) {
766 if (skb_dst(skb) == NULL) {
767 dev->stats.tx_fifo_errors++;
771 if (skb->protocol == htons(ETH_P_IP)) {
772 rt = skb_rtable(skb);
773 dst = rt_nexthop(rt, old_iph->daddr);
775 #if IS_ENABLED(CONFIG_IPV6)
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
777 const struct in6_addr *addr6;
778 struct neighbour *neigh;
779 bool do_tx_error_icmp;
782 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
786 addr6 = (const struct in6_addr *)&neigh->primary_key;
787 addr_type = ipv6_addr_type(addr6);
789 if (addr_type == IPV6_ADDR_ANY) {
790 addr6 = &ipv6_hdr(skb)->daddr;
791 addr_type = ipv6_addr_type(addr6);
794 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
795 do_tx_error_icmp = true;
797 do_tx_error_icmp = false;
798 dst = addr6->s6_addr32[3];
800 neigh_release(neigh);
801 if (do_tx_error_icmp)
812 if (skb->protocol == htons(ETH_P_IP))
814 else if (skb->protocol == htons(ETH_P_IPV6))
815 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
818 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
819 tunnel->parms.o_key, RT_TOS(tos),
822 dev->stats.tx_carrier_errors++;
829 dev->stats.collisions++;
835 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
837 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
840 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
842 if (skb->protocol == htons(ETH_P_IP)) {
843 df |= (old_iph->frag_off&htons(IP_DF));
845 if ((old_iph->frag_off&htons(IP_DF)) &&
846 mtu < ntohs(old_iph->tot_len)) {
847 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
852 #if IS_ENABLED(CONFIG_IPV6)
853 else if (skb->protocol == htons(ETH_P_IPV6)) {
854 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
856 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
857 if ((tunnel->parms.iph.daddr &&
858 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
859 rt6->rt6i_dst.plen == 128) {
860 rt6->rt6i_flags |= RTF_MODIFIED;
861 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
865 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
866 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
873 if (tunnel->err_count > 0) {
874 if (time_before(jiffies,
875 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
878 dst_link_failure(skb);
880 tunnel->err_count = 0;
883 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
885 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
886 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
887 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
888 if (max_headroom > dev->needed_headroom)
889 dev->needed_headroom = max_headroom;
892 dev->stats.tx_dropped++;
897 skb_set_owner_w(new_skb, skb->sk);
900 old_iph = ip_hdr(skb);
903 skb_reset_transport_header(skb);
904 skb_push(skb, gre_hlen);
905 skb_reset_network_header(skb);
906 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
907 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
910 skb_dst_set(skb, &rt->dst);
913 * Push down and install the IPIP header.
918 iph->ihl = sizeof(struct iphdr) >> 2;
920 iph->protocol = IPPROTO_GRE;
921 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
922 iph->daddr = fl4.daddr;
923 iph->saddr = fl4.saddr;
925 if ((iph->ttl = tiph->ttl) == 0) {
926 if (skb->protocol == htons(ETH_P_IP))
927 iph->ttl = old_iph->ttl;
928 #if IS_ENABLED(CONFIG_IPV6)
929 else if (skb->protocol == htons(ETH_P_IPV6))
930 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
933 iph->ttl = ip4_dst_hoplimit(&rt->dst);
936 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
937 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
938 htons(ETH_P_TEB) : skb->protocol;
940 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
941 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
943 if (tunnel->parms.o_flags&GRE_SEQ) {
945 *ptr = htonl(tunnel->o_seqno);
948 if (tunnel->parms.o_flags&GRE_KEY) {
949 *ptr = tunnel->parms.o_key;
952 if (tunnel->parms.o_flags&GRE_CSUM) {
954 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
959 tstats = this_cpu_ptr(dev->tstats);
960 __IPTUNNEL_XMIT(tstats, &dev->stats);
963 #if IS_ENABLED(CONFIG_IPV6)
965 dst_link_failure(skb);
968 dev->stats.tx_errors++;
973 static int ipgre_tunnel_bind_dev(struct net_device *dev)
975 struct net_device *tdev = NULL;
976 struct ip_tunnel *tunnel;
977 const struct iphdr *iph;
978 int hlen = LL_MAX_HEADER;
979 int mtu = ETH_DATA_LEN;
980 int addend = sizeof(struct iphdr) + 4;
982 tunnel = netdev_priv(dev);
983 iph = &tunnel->parms.iph;
985 /* Guess output device to choose reasonable mtu and needed_headroom */
991 rt = ip_route_output_gre(dev_net(dev), &fl4,
992 iph->daddr, iph->saddr,
1001 if (dev->type != ARPHRD_ETHER)
1002 dev->flags |= IFF_POINTOPOINT;
1005 if (!tdev && tunnel->parms.link)
1006 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1009 hlen = tdev->hard_header_len + tdev->needed_headroom;
1012 dev->iflink = tunnel->parms.link;
1014 /* Precalculate GRE options length */
1015 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1016 if (tunnel->parms.o_flags&GRE_CSUM)
1018 if (tunnel->parms.o_flags&GRE_KEY)
1020 if (tunnel->parms.o_flags&GRE_SEQ)
1023 dev->needed_headroom = addend + hlen;
1024 mtu -= dev->hard_header_len + addend;
1029 tunnel->hlen = addend;
1035 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1038 struct ip_tunnel_parm p;
1039 struct ip_tunnel *t;
1040 struct net *net = dev_net(dev);
1041 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1046 if (dev == ign->fb_tunnel_dev) {
1047 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1051 t = ipgre_tunnel_locate(net, &p, 0);
1054 t = netdev_priv(dev);
1055 memcpy(&p, &t->parms, sizeof(p));
1056 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1063 if (!capable(CAP_NET_ADMIN))
1067 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1071 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1072 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1073 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1076 p.iph.frag_off |= htons(IP_DF);
1078 if (!(p.i_flags&GRE_KEY))
1080 if (!(p.o_flags&GRE_KEY))
1083 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1085 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1087 if (t->dev != dev) {
1092 unsigned int nflags = 0;
1094 t = netdev_priv(dev);
1096 if (ipv4_is_multicast(p.iph.daddr))
1097 nflags = IFF_BROADCAST;
1098 else if (p.iph.daddr)
1099 nflags = IFF_POINTOPOINT;
1101 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1105 ipgre_tunnel_unlink(ign, t);
1107 t->parms.iph.saddr = p.iph.saddr;
1108 t->parms.iph.daddr = p.iph.daddr;
1109 t->parms.i_key = p.i_key;
1110 t->parms.o_key = p.o_key;
1111 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1112 memcpy(dev->broadcast, &p.iph.daddr, 4);
1113 ipgre_tunnel_link(ign, t);
1114 netdev_state_change(dev);
1120 if (cmd == SIOCCHGTUNNEL) {
1121 t->parms.iph.ttl = p.iph.ttl;
1122 t->parms.iph.tos = p.iph.tos;
1123 t->parms.iph.frag_off = p.iph.frag_off;
1124 if (t->parms.link != p.link) {
1125 t->parms.link = p.link;
1126 dev->mtu = ipgre_tunnel_bind_dev(dev);
1127 netdev_state_change(dev);
1130 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1133 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1138 if (!capable(CAP_NET_ADMIN))
1141 if (dev == ign->fb_tunnel_dev) {
1143 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1146 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1149 if (t == netdev_priv(ign->fb_tunnel_dev))
1153 unregister_netdevice(dev);
1165 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1167 struct ip_tunnel *tunnel = netdev_priv(dev);
1169 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1175 /* Nice toy. Unfortunately, useless in real life :-)
1176 It allows to construct virtual multiprotocol broadcast "LAN"
1177 over the Internet, provided multicast routing is tuned.
1180 I have no idea was this bicycle invented before me,
1181 so that I had to set ARPHRD_IPGRE to a random value.
1182 I have an impression, that Cisco could make something similar,
1183 but this feature is apparently missing in IOS<=11.2(8).
1185 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1186 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1188 ping -t 255 224.66.66.66
1190 If nobody answers, mbone does not work.
1192 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1193 ip addr add 10.66.66.<somewhat>/24 dev Universe
1194 ifconfig Universe up
1195 ifconfig Universe add fe80::<Your_real_addr>/10
1196 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1199 ftp fec0:6666:6666::193.233.7.65
1204 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1205 unsigned short type,
1206 const void *daddr, const void *saddr, unsigned int len)
1208 struct ip_tunnel *t = netdev_priv(dev);
1209 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1210 __be16 *p = (__be16 *)(iph+1);
1212 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1213 p[0] = t->parms.o_flags;
1217 * Set the source hardware address.
1221 memcpy(&iph->saddr, saddr, 4);
1223 memcpy(&iph->daddr, daddr, 4);
1230 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1232 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1233 memcpy(haddr, &iph->saddr, 4);
1237 static const struct header_ops ipgre_header_ops = {
1238 .create = ipgre_header,
1239 .parse = ipgre_header_parse,
1242 #ifdef CONFIG_NET_IPGRE_BROADCAST
1243 static int ipgre_open(struct net_device *dev)
1245 struct ip_tunnel *t = netdev_priv(dev);
1247 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1251 rt = ip_route_output_gre(dev_net(dev), &fl4,
1255 RT_TOS(t->parms.iph.tos),
1258 return -EADDRNOTAVAIL;
1261 if (__in_dev_get_rtnl(dev) == NULL)
1262 return -EADDRNOTAVAIL;
1263 t->mlink = dev->ifindex;
1264 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1269 static int ipgre_close(struct net_device *dev)
1271 struct ip_tunnel *t = netdev_priv(dev);
1273 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1274 struct in_device *in_dev;
1275 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1277 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1284 static const struct net_device_ops ipgre_netdev_ops = {
1285 .ndo_init = ipgre_tunnel_init,
1286 .ndo_uninit = ipgre_tunnel_uninit,
1287 #ifdef CONFIG_NET_IPGRE_BROADCAST
1288 .ndo_open = ipgre_open,
1289 .ndo_stop = ipgre_close,
1291 .ndo_start_xmit = ipgre_tunnel_xmit,
1292 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1293 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1294 .ndo_get_stats64 = ipgre_get_stats64,
1297 static void ipgre_dev_free(struct net_device *dev)
1299 free_percpu(dev->tstats);
1303 #define GRE_FEATURES (NETIF_F_SG | \
1304 NETIF_F_FRAGLIST | \
1308 static void ipgre_tunnel_setup(struct net_device *dev)
1310 dev->netdev_ops = &ipgre_netdev_ops;
1311 dev->destructor = ipgre_dev_free;
1313 dev->type = ARPHRD_IPGRE;
1314 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1315 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1316 dev->flags = IFF_NOARP;
1319 dev->features |= NETIF_F_NETNS_LOCAL;
1320 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1322 dev->features |= GRE_FEATURES;
1323 dev->hw_features |= GRE_FEATURES;
1326 static int ipgre_tunnel_init(struct net_device *dev)
1328 struct ip_tunnel *tunnel;
1331 tunnel = netdev_priv(dev);
1332 iph = &tunnel->parms.iph;
1335 strcpy(tunnel->parms.name, dev->name);
1337 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1338 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1341 #ifdef CONFIG_NET_IPGRE_BROADCAST
1342 if (ipv4_is_multicast(iph->daddr)) {
1345 dev->flags = IFF_BROADCAST;
1346 dev->header_ops = &ipgre_header_ops;
1350 dev->header_ops = &ipgre_header_ops;
1352 dev->tstats = alloc_percpu(struct pcpu_tstats);
1359 static void ipgre_fb_tunnel_init(struct net_device *dev)
1361 struct ip_tunnel *tunnel = netdev_priv(dev);
1362 struct iphdr *iph = &tunnel->parms.iph;
1365 strcpy(tunnel->parms.name, dev->name);
1368 iph->protocol = IPPROTO_GRE;
1370 tunnel->hlen = sizeof(struct iphdr) + 4;
1376 static const struct gre_protocol ipgre_protocol = {
1377 .handler = ipgre_rcv,
1378 .err_handler = ipgre_err,
1381 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1385 for (prio = 0; prio < 4; prio++) {
1387 for (h = 0; h < HASH_SIZE; h++) {
1388 struct ip_tunnel *t;
1390 t = rtnl_dereference(ign->tunnels[prio][h]);
1393 unregister_netdevice_queue(t->dev, head);
1394 t = rtnl_dereference(t->next);
1400 static int __net_init ipgre_init_net(struct net *net)
1402 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1405 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1406 ipgre_tunnel_setup);
1407 if (!ign->fb_tunnel_dev) {
1411 dev_net_set(ign->fb_tunnel_dev, net);
1413 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1414 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1416 if ((err = register_netdev(ign->fb_tunnel_dev)))
1419 rcu_assign_pointer(ign->tunnels_wc[0],
1420 netdev_priv(ign->fb_tunnel_dev));
1424 ipgre_dev_free(ign->fb_tunnel_dev);
1429 static void __net_exit ipgre_exit_net(struct net *net)
1431 struct ipgre_net *ign;
1434 ign = net_generic(net, ipgre_net_id);
1436 ipgre_destroy_tunnels(ign, &list);
1437 unregister_netdevice_many(&list);
1441 static struct pernet_operations ipgre_net_ops = {
1442 .init = ipgre_init_net,
1443 .exit = ipgre_exit_net,
1444 .id = &ipgre_net_id,
1445 .size = sizeof(struct ipgre_net),
1448 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1456 if (data[IFLA_GRE_IFLAGS])
1457 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1458 if (data[IFLA_GRE_OFLAGS])
1459 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1460 if (flags & (GRE_VERSION|GRE_ROUTING))
1466 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1470 if (tb[IFLA_ADDRESS]) {
1471 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1473 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1474 return -EADDRNOTAVAIL;
1480 if (data[IFLA_GRE_REMOTE]) {
1481 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1487 return ipgre_tunnel_validate(tb, data);
1490 static void ipgre_netlink_parms(struct nlattr *data[],
1491 struct ip_tunnel_parm *parms)
1493 memset(parms, 0, sizeof(*parms));
1495 parms->iph.protocol = IPPROTO_GRE;
1500 if (data[IFLA_GRE_LINK])
1501 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1503 if (data[IFLA_GRE_IFLAGS])
1504 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1506 if (data[IFLA_GRE_OFLAGS])
1507 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1509 if (data[IFLA_GRE_IKEY])
1510 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1512 if (data[IFLA_GRE_OKEY])
1513 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1515 if (data[IFLA_GRE_LOCAL])
1516 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1518 if (data[IFLA_GRE_REMOTE])
1519 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1521 if (data[IFLA_GRE_TTL])
1522 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1524 if (data[IFLA_GRE_TOS])
1525 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1527 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1528 parms->iph.frag_off = htons(IP_DF);
1531 static int ipgre_tap_init(struct net_device *dev)
1533 struct ip_tunnel *tunnel;
1535 tunnel = netdev_priv(dev);
1538 strcpy(tunnel->parms.name, dev->name);
1540 ipgre_tunnel_bind_dev(dev);
1542 dev->tstats = alloc_percpu(struct pcpu_tstats);
1549 static const struct net_device_ops ipgre_tap_netdev_ops = {
1550 .ndo_init = ipgre_tap_init,
1551 .ndo_uninit = ipgre_tunnel_uninit,
1552 .ndo_start_xmit = ipgre_tunnel_xmit,
1553 .ndo_set_mac_address = eth_mac_addr,
1554 .ndo_validate_addr = eth_validate_addr,
1555 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1556 .ndo_get_stats64 = ipgre_get_stats64,
1559 static void ipgre_tap_setup(struct net_device *dev)
1564 dev->netdev_ops = &ipgre_tap_netdev_ops;
1565 dev->destructor = ipgre_dev_free;
1568 dev->features |= NETIF_F_NETNS_LOCAL;
1571 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1572 struct nlattr *data[])
1574 struct ip_tunnel *nt;
1575 struct net *net = dev_net(dev);
1576 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1580 nt = netdev_priv(dev);
1581 ipgre_netlink_parms(data, &nt->parms);
1583 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1586 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1587 eth_hw_addr_random(dev);
1589 mtu = ipgre_tunnel_bind_dev(dev);
1593 /* Can use a lockless transmit, unless we generate output sequences */
1594 if (!(nt->parms.o_flags & GRE_SEQ))
1595 dev->features |= NETIF_F_LLTX;
1597 err = register_netdevice(dev);
1602 ipgre_tunnel_link(ign, nt);
1608 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1609 struct nlattr *data[])
1611 struct ip_tunnel *t, *nt;
1612 struct net *net = dev_net(dev);
1613 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1614 struct ip_tunnel_parm p;
1617 if (dev == ign->fb_tunnel_dev)
1620 nt = netdev_priv(dev);
1621 ipgre_netlink_parms(data, &p);
1623 t = ipgre_tunnel_locate(net, &p, 0);
1631 if (dev->type != ARPHRD_ETHER) {
1632 unsigned int nflags = 0;
1634 if (ipv4_is_multicast(p.iph.daddr))
1635 nflags = IFF_BROADCAST;
1636 else if (p.iph.daddr)
1637 nflags = IFF_POINTOPOINT;
1639 if ((dev->flags ^ nflags) &
1640 (IFF_POINTOPOINT | IFF_BROADCAST))
1644 ipgre_tunnel_unlink(ign, t);
1645 t->parms.iph.saddr = p.iph.saddr;
1646 t->parms.iph.daddr = p.iph.daddr;
1647 t->parms.i_key = p.i_key;
1648 if (dev->type != ARPHRD_ETHER) {
1649 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1650 memcpy(dev->broadcast, &p.iph.daddr, 4);
1652 ipgre_tunnel_link(ign, t);
1653 netdev_state_change(dev);
1656 t->parms.o_key = p.o_key;
1657 t->parms.iph.ttl = p.iph.ttl;
1658 t->parms.iph.tos = p.iph.tos;
1659 t->parms.iph.frag_off = p.iph.frag_off;
1661 if (t->parms.link != p.link) {
1662 t->parms.link = p.link;
1663 mtu = ipgre_tunnel_bind_dev(dev);
1666 netdev_state_change(dev);
1672 static size_t ipgre_get_size(const struct net_device *dev)
1677 /* IFLA_GRE_IFLAGS */
1679 /* IFLA_GRE_OFLAGS */
1685 /* IFLA_GRE_LOCAL */
1687 /* IFLA_GRE_REMOTE */
1693 /* IFLA_GRE_PMTUDISC */
1698 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1700 struct ip_tunnel *t = netdev_priv(dev);
1701 struct ip_tunnel_parm *p = &t->parms;
1703 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1704 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1705 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1706 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1707 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1708 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1709 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1710 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1711 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1712 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1713 !!(p->iph.frag_off & htons(IP_DF))))
1714 goto nla_put_failure;
1721 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1722 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1723 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1724 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1725 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1726 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1727 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1728 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1729 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1730 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1731 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1734 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1736 .maxtype = IFLA_GRE_MAX,
1737 .policy = ipgre_policy,
1738 .priv_size = sizeof(struct ip_tunnel),
1739 .setup = ipgre_tunnel_setup,
1740 .validate = ipgre_tunnel_validate,
1741 .newlink = ipgre_newlink,
1742 .changelink = ipgre_changelink,
1743 .get_size = ipgre_get_size,
1744 .fill_info = ipgre_fill_info,
1747 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1749 .maxtype = IFLA_GRE_MAX,
1750 .policy = ipgre_policy,
1751 .priv_size = sizeof(struct ip_tunnel),
1752 .setup = ipgre_tap_setup,
1753 .validate = ipgre_tap_validate,
1754 .newlink = ipgre_newlink,
1755 .changelink = ipgre_changelink,
1756 .get_size = ipgre_get_size,
1757 .fill_info = ipgre_fill_info,
1761 * And now the modules code and kernel interface.
1764 static int __init ipgre_init(void)
1768 pr_info("GRE over IPv4 tunneling driver\n");
1770 err = register_pernet_device(&ipgre_net_ops);
1774 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1776 pr_info("%s: can't add protocol\n", __func__);
1777 goto add_proto_failed;
1780 err = rtnl_link_register(&ipgre_link_ops);
1782 goto rtnl_link_failed;
1784 err = rtnl_link_register(&ipgre_tap_ops);
1786 goto tap_ops_failed;
1792 rtnl_link_unregister(&ipgre_link_ops);
1794 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1796 unregister_pernet_device(&ipgre_net_ops);
1800 static void __exit ipgre_fini(void)
1802 rtnl_link_unregister(&ipgre_tap_ops);
1803 rtnl_link_unregister(&ipgre_link_ops);
1804 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1805 pr_info("%s: can't remove protocol\n", __func__);
1806 unregister_pernet_device(&ipgre_net_ops);
1809 module_init(ipgre_init);
1810 module_exit(ipgre_fini);
1811 MODULE_LICENSE("GPL");
1812 MODULE_ALIAS_RTNL_LINK("gre");
1813 MODULE_ALIAS_RTNL_LINK("gretap");
1814 MODULE_ALIAS_NETDEV("gre0");