]> rtime.felk.cvut.cz Git - can-eth-gw-linux.git/blob - net/ipv4/ip_gre.c
gre: fix sparse warning
[can-eth-gw-linux.git] / net / ipv4 / ip_gre.c
1 /*
2  *      Linux NET3:     GRE over IP protocol decoder.
3  *
4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5  *
6  *      This program is free software; you can redistribute it and/or
7  *      modify it under the terms of the GNU General Public License
8  *      as published by the Free Software Foundation; either version
9  *      2 of the License, or (at your option) any later version.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <asm/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
23 #include <linux/in.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/mroute.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
35
36 #include <net/sock.h>
37 #include <net/ip.h>
38 #include <net/icmp.h>
39 #include <net/protocol.h>
40 #include <net/ipip.h>
41 #include <net/arp.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
45 #include <net/xfrm.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
49 #include <net/gre.h>
50
51 #if IS_ENABLED(CONFIG_IPV6)
52 #include <net/ipv6.h>
53 #include <net/ip6_fib.h>
54 #include <net/ip6_route.h>
55 #endif
56
57 /*
58    Problems & solutions
59    --------------------
60
61    1. The most important issue is detecting local dead loops.
62    They would cause complete host lockup in transmit, which
63    would be "resolved" by stack overflow or, if queueing is enabled,
64    with infinite looping in net_bh.
65
66    We cannot track such dead loops during route installation,
67    it is infeasible task. The most general solutions would be
68    to keep skb->encapsulation counter (sort of local ttl),
69    and silently drop packet when it expires. It is a good
70    solution, but it supposes maintaining new variable in ALL
71    skb, even if no tunneling is used.
72
73    Current solution: xmit_recursion breaks dead loops. This is a percpu
74    counter, since when we enter the first ndo_xmit(), cpu migration is
75    forbidden. We force an exit if this counter reaches RECURSION_LIMIT
76
77    2. Networking dead loops would not kill routers, but would really
78    kill network. IP hop limit plays role of "t->recursion" in this case,
79    if we copy it from packet being encapsulated to upper header.
80    It is very good solution, but it introduces two problems:
81
82    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83      do not work over tunnels.
84    - traceroute does not work. I planned to relay ICMP from tunnel,
85      so that this problem would be solved and traceroute output
86      would even more informative. This idea appeared to be wrong:
87      only Linux complies to rfc1812 now (yes, guys, Linux is the only
88      true router now :-)), all routers (at least, in neighbourhood of mine)
89      return only 8 bytes of payload. It is the end.
90
91    Hence, if we want that OSPF worked or traceroute said something reasonable,
92    we should search for another solution.
93
94    One of them is to parse packet trying to detect inner encapsulation
95    made by our node. It is difficult or even impossible, especially,
96    taking into account fragmentation. TO be short, ttl is not solution at all.
97
98    Current solution: The solution was UNEXPECTEDLY SIMPLE.
99    We force DF flag on tunnels with preconfigured hop limit,
100    that is ALL. :-) Well, it does not remove the problem completely,
101    but exponential growth of network traffic is changed to linear
102    (branches, that exceed pmtu are pruned) and tunnel mtu
103    rapidly degrades to value <68, where looping stops.
104    Yes, it is not good if there exists a router in the loop,
105    which does not force DF, even when encapsulating packets have DF set.
106    But it is not our problem! Nobody could accuse us, we made
107    all that we could make. Even if it is your gated who injected
108    fatal route to network, even if it were you who configured
109    fatal static route: you are innocent. :-)
110
111
112
113    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114    practically identical code. It would be good to glue them
115    together, but it is not very evident, how to make them modular.
116    sit is integral part of IPv6, ipip and gre are naturally modular.
117    We could extract common parts (hash table, ioctl etc)
118    to a separate module (ip_tunnel.c).
119
120    Alexey Kuznetsov.
121  */
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128 static int ipgre_tunnel_init(struct net_device *dev);
129 static void ipgre_tunnel_setup(struct net_device *dev);
130 static int ipgre_tunnel_bind_dev(struct net_device *dev);
131
132 /* Fallback tunnel: no source, no destination, no key, no options */
133
134 #define HASH_SIZE  16
135
136 static int ipgre_net_id __read_mostly;
137 struct ipgre_net {
138         struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
139
140         struct net_device *fb_tunnel_dev;
141 };
142
143 /* Tunnel hash table */
144
145 /*
146    4 hash tables:
147
148    3: (remote,local)
149    2: (remote,*)
150    1: (*,local)
151    0: (*,*)
152
153    We require exact key match i.e. if a key is present in packet
154    it will match only tunnel with the same key; if it is not present,
155    it will match only keyless tunnel.
156
157    All keysless packets, if not matched configured keyless tunnels
158    will match fallback tunnel.
159  */
160
161 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
162
163 #define tunnels_r_l     tunnels[3]
164 #define tunnels_r       tunnels[2]
165 #define tunnels_l       tunnels[1]
166 #define tunnels_wc      tunnels[0]
167 /*
168  * Locking : hash tables are protected by RCU and RTNL
169  */
170
171 #define for_each_ip_tunnel_rcu(start) \
172         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
173
174 /* often modified stats are per cpu, other are shared (netdev->stats) */
175 struct pcpu_tstats {
176         u64     rx_packets;
177         u64     rx_bytes;
178         u64     tx_packets;
179         u64     tx_bytes;
180         struct u64_stats_sync   syncp;
181 };
182
183 static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
184                                                    struct rtnl_link_stats64 *tot)
185 {
186         int i;
187
188         for_each_possible_cpu(i) {
189                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
190                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
191                 unsigned int start;
192
193                 do {
194                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
195                         rx_packets = tstats->rx_packets;
196                         tx_packets = tstats->tx_packets;
197                         rx_bytes = tstats->rx_bytes;
198                         tx_bytes = tstats->tx_bytes;
199                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
200
201                 tot->rx_packets += rx_packets;
202                 tot->tx_packets += tx_packets;
203                 tot->rx_bytes   += rx_bytes;
204                 tot->tx_bytes   += tx_bytes;
205         }
206
207         tot->multicast = dev->stats.multicast;
208         tot->rx_crc_errors = dev->stats.rx_crc_errors;
209         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
210         tot->rx_length_errors = dev->stats.rx_length_errors;
211         tot->rx_frame_errors = dev->stats.rx_frame_errors;
212         tot->rx_errors = dev->stats.rx_errors;
213
214         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
215         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
216         tot->tx_dropped = dev->stats.tx_dropped;
217         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
218         tot->tx_errors = dev->stats.tx_errors;
219
220         return tot;
221 }
222
223 /* Does key in tunnel parameters match packet */
224 static bool ipgre_key_match(const struct ip_tunnel_parm *p,
225                             __be16 flags, __be32 key)
226 {
227         if (p->i_flags & GRE_KEY) {
228                 if (flags & GRE_KEY)
229                         return key == p->i_key;
230                 else
231                         return false;   /* key expected, none present */
232         } else
233                 return !(flags & GRE_KEY);
234 }
235
236 /* Given src, dst and key, find appropriate for input tunnel. */
237
238 static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
239                                              __be32 remote, __be32 local,
240                                              __be16 flags, __be32 key,
241                                              __be16 gre_proto)
242 {
243         struct net *net = dev_net(dev);
244         int link = dev->ifindex;
245         unsigned int h0 = HASH(remote);
246         unsigned int h1 = HASH(key);
247         struct ip_tunnel *t, *cand = NULL;
248         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
249         int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
250                        ARPHRD_ETHER : ARPHRD_IPGRE;
251         int score, cand_score = 4;
252
253         for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
254                 if (local != t->parms.iph.saddr ||
255                     remote != t->parms.iph.daddr ||
256                     !(t->dev->flags & IFF_UP))
257                         continue;
258
259                 if (!ipgre_key_match(&t->parms, flags, key))
260                         continue;
261
262                 if (t->dev->type != ARPHRD_IPGRE &&
263                     t->dev->type != dev_type)
264                         continue;
265
266                 score = 0;
267                 if (t->parms.link != link)
268                         score |= 1;
269                 if (t->dev->type != dev_type)
270                         score |= 2;
271                 if (score == 0)
272                         return t;
273
274                 if (score < cand_score) {
275                         cand = t;
276                         cand_score = score;
277                 }
278         }
279
280         for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
281                 if (remote != t->parms.iph.daddr ||
282                     !(t->dev->flags & IFF_UP))
283                         continue;
284
285                 if (!ipgre_key_match(&t->parms, flags, key))
286                         continue;
287
288                 if (t->dev->type != ARPHRD_IPGRE &&
289                     t->dev->type != dev_type)
290                         continue;
291
292                 score = 0;
293                 if (t->parms.link != link)
294                         score |= 1;
295                 if (t->dev->type != dev_type)
296                         score |= 2;
297                 if (score == 0)
298                         return t;
299
300                 if (score < cand_score) {
301                         cand = t;
302                         cand_score = score;
303                 }
304         }
305
306         for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
307                 if ((local != t->parms.iph.saddr &&
308                      (local != t->parms.iph.daddr ||
309                       !ipv4_is_multicast(local))) ||
310                     !(t->dev->flags & IFF_UP))
311                         continue;
312
313                 if (!ipgre_key_match(&t->parms, flags, key))
314                         continue;
315
316                 if (t->dev->type != ARPHRD_IPGRE &&
317                     t->dev->type != dev_type)
318                         continue;
319
320                 score = 0;
321                 if (t->parms.link != link)
322                         score |= 1;
323                 if (t->dev->type != dev_type)
324                         score |= 2;
325                 if (score == 0)
326                         return t;
327
328                 if (score < cand_score) {
329                         cand = t;
330                         cand_score = score;
331                 }
332         }
333
334         for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
335                 if (t->parms.i_key != key ||
336                     !(t->dev->flags & IFF_UP))
337                         continue;
338
339                 if (t->dev->type != ARPHRD_IPGRE &&
340                     t->dev->type != dev_type)
341                         continue;
342
343                 score = 0;
344                 if (t->parms.link != link)
345                         score |= 1;
346                 if (t->dev->type != dev_type)
347                         score |= 2;
348                 if (score == 0)
349                         return t;
350
351                 if (score < cand_score) {
352                         cand = t;
353                         cand_score = score;
354                 }
355         }
356
357         if (cand != NULL)
358                 return cand;
359
360         dev = ign->fb_tunnel_dev;
361         if (dev->flags & IFF_UP)
362                 return netdev_priv(dev);
363
364         return NULL;
365 }
366
367 static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
368                 struct ip_tunnel_parm *parms)
369 {
370         __be32 remote = parms->iph.daddr;
371         __be32 local = parms->iph.saddr;
372         __be32 key = parms->i_key;
373         unsigned int h = HASH(key);
374         int prio = 0;
375
376         if (local)
377                 prio |= 1;
378         if (remote && !ipv4_is_multicast(remote)) {
379                 prio |= 2;
380                 h ^= HASH(remote);
381         }
382
383         return &ign->tunnels[prio][h];
384 }
385
386 static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
387                 struct ip_tunnel *t)
388 {
389         return __ipgre_bucket(ign, &t->parms);
390 }
391
392 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
393 {
394         struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
395
396         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
397         rcu_assign_pointer(*tp, t);
398 }
399
400 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
401 {
402         struct ip_tunnel __rcu **tp;
403         struct ip_tunnel *iter;
404
405         for (tp = ipgre_bucket(ign, t);
406              (iter = rtnl_dereference(*tp)) != NULL;
407              tp = &iter->next) {
408                 if (t == iter) {
409                         rcu_assign_pointer(*tp, t->next);
410                         break;
411                 }
412         }
413 }
414
415 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
416                                            struct ip_tunnel_parm *parms,
417                                            int type)
418 {
419         __be32 remote = parms->iph.daddr;
420         __be32 local = parms->iph.saddr;
421         __be32 key = parms->i_key;
422         int link = parms->link;
423         struct ip_tunnel *t;
424         struct ip_tunnel __rcu **tp;
425         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
426
427         for (tp = __ipgre_bucket(ign, parms);
428              (t = rtnl_dereference(*tp)) != NULL;
429              tp = &t->next)
430                 if (local == t->parms.iph.saddr &&
431                     remote == t->parms.iph.daddr &&
432                     key == t->parms.i_key &&
433                     link == t->parms.link &&
434                     type == t->dev->type)
435                         break;
436
437         return t;
438 }
439
440 static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
441                 struct ip_tunnel_parm *parms, int create)
442 {
443         struct ip_tunnel *t, *nt;
444         struct net_device *dev;
445         char name[IFNAMSIZ];
446         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
447
448         t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
449         if (t || !create)
450                 return t;
451
452         if (parms->name[0])
453                 strlcpy(name, parms->name, IFNAMSIZ);
454         else
455                 strcpy(name, "gre%d");
456
457         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
458         if (!dev)
459                 return NULL;
460
461         dev_net_set(dev, net);
462
463         nt = netdev_priv(dev);
464         nt->parms = *parms;
465         dev->rtnl_link_ops = &ipgre_link_ops;
466
467         dev->mtu = ipgre_tunnel_bind_dev(dev);
468
469         if (register_netdevice(dev) < 0)
470                 goto failed_free;
471
472         /* Can use a lockless transmit, unless we generate output sequences */
473         if (!(nt->parms.o_flags & GRE_SEQ))
474                 dev->features |= NETIF_F_LLTX;
475
476         dev_hold(dev);
477         ipgre_tunnel_link(ign, nt);
478         return nt;
479
480 failed_free:
481         free_netdev(dev);
482         return NULL;
483 }
484
485 static void ipgre_tunnel_uninit(struct net_device *dev)
486 {
487         struct net *net = dev_net(dev);
488         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
489
490         ipgre_tunnel_unlink(ign, netdev_priv(dev));
491         dev_put(dev);
492 }
493
494
495 static void ipgre_err(struct sk_buff *skb, u32 info)
496 {
497
498 /* All the routers (except for Linux) return only
499    8 bytes of packet payload. It means, that precise relaying of
500    ICMP in the real Internet is absolutely infeasible.
501
502    Moreover, Cisco "wise men" put GRE key to the third word
503    in GRE header. It makes impossible maintaining even soft state for keyed
504    GRE tunnels with enabled checksum. Tell them "thank you".
505
506    Well, I wonder, rfc1812 was written by Cisco employee,
507    what the hell these idiots break standards established
508    by themselves???
509  */
510
511         const struct iphdr *iph = (const struct iphdr *)skb->data;
512         __be16       *p = (__be16 *)(skb->data+(iph->ihl<<2));
513         int grehlen = (iph->ihl<<2) + 4;
514         const int type = icmp_hdr(skb)->type;
515         const int code = icmp_hdr(skb)->code;
516         struct ip_tunnel *t;
517         __be16 flags;
518         __be32 key = 0;
519
520         flags = p[0];
521         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
522                 if (flags&(GRE_VERSION|GRE_ROUTING))
523                         return;
524                 if (flags&GRE_KEY) {
525                         grehlen += 4;
526                         if (flags&GRE_CSUM)
527                                 grehlen += 4;
528                 }
529         }
530
531         /* If only 8 bytes returned, keyed message will be dropped here */
532         if (skb_headlen(skb) < grehlen)
533                 return;
534
535         if (flags & GRE_KEY)
536                 key = *(((__be32 *)p) + (grehlen / 4) - 1);
537
538         switch (type) {
539         default:
540         case ICMP_PARAMETERPROB:
541                 return;
542
543         case ICMP_DEST_UNREACH:
544                 switch (code) {
545                 case ICMP_SR_FAILED:
546                 case ICMP_PORT_UNREACH:
547                         /* Impossible event. */
548                         return;
549                 default:
550                         /* All others are translated to HOST_UNREACH.
551                            rfc2003 contains "deep thoughts" about NET_UNREACH,
552                            I believe they are just ether pollution. --ANK
553                          */
554                         break;
555                 }
556                 break;
557         case ICMP_TIME_EXCEEDED:
558                 if (code != ICMP_EXC_TTL)
559                         return;
560                 break;
561
562         case ICMP_REDIRECT:
563                 break;
564         }
565
566         t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
567                                 flags, key, p[1]);
568
569         if (t == NULL)
570                 return;
571
572         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
573                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
574                                  t->parms.link, 0, IPPROTO_GRE, 0);
575                 return;
576         }
577         if (type == ICMP_REDIRECT) {
578                 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
579                               IPPROTO_GRE, 0);
580                 return;
581         }
582         if (t->parms.iph.daddr == 0 ||
583             ipv4_is_multicast(t->parms.iph.daddr))
584                 return;
585
586         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
587                 return;
588
589         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
590                 t->err_count++;
591         else
592                 t->err_count = 1;
593         t->err_time = jiffies;
594 }
595
596 static inline u8
597 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
598 {
599         u8 inner = 0;
600         if (skb->protocol == htons(ETH_P_IP))
601                 inner = old_iph->tos;
602         else if (skb->protocol == htons(ETH_P_IPV6))
603                 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
604         return INET_ECN_encapsulate(tos, inner);
605 }
606
607 static int ipgre_rcv(struct sk_buff *skb)
608 {
609         const struct iphdr *iph;
610         u8     *h;
611         __be16    flags;
612         __sum16   csum = 0;
613         __be32 key = 0;
614         u32    seqno = 0;
615         struct ip_tunnel *tunnel;
616         int    offset = 4;
617         __be16 gre_proto;
618         int    err;
619
620         if (!pskb_may_pull(skb, 16))
621                 goto drop;
622
623         iph = ip_hdr(skb);
624         h = skb->data;
625         flags = *(__be16 *)h;
626
627         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
628                 /* - Version must be 0.
629                    - We do not support routing headers.
630                  */
631                 if (flags&(GRE_VERSION|GRE_ROUTING))
632                         goto drop;
633
634                 if (flags&GRE_CSUM) {
635                         switch (skb->ip_summed) {
636                         case CHECKSUM_COMPLETE:
637                                 csum = csum_fold(skb->csum);
638                                 if (!csum)
639                                         break;
640                                 /* fall through */
641                         case CHECKSUM_NONE:
642                                 skb->csum = 0;
643                                 csum = __skb_checksum_complete(skb);
644                                 skb->ip_summed = CHECKSUM_COMPLETE;
645                         }
646                         offset += 4;
647                 }
648                 if (flags&GRE_KEY) {
649                         key = *(__be32 *)(h + offset);
650                         offset += 4;
651                 }
652                 if (flags&GRE_SEQ) {
653                         seqno = ntohl(*(__be32 *)(h + offset));
654                         offset += 4;
655                 }
656         }
657
658         gre_proto = *(__be16 *)(h + 2);
659
660         tunnel = ipgre_tunnel_lookup(skb->dev,
661                                      iph->saddr, iph->daddr, flags, key,
662                                      gre_proto);
663         if (tunnel) {
664                 struct pcpu_tstats *tstats;
665
666                 secpath_reset(skb);
667
668                 skb->protocol = gre_proto;
669                 /* WCCP version 1 and 2 protocol decoding.
670                  * - Change protocol to IP
671                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
672                  */
673                 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
674                         skb->protocol = htons(ETH_P_IP);
675                         if ((*(h + offset) & 0xF0) != 0x40)
676                                 offset += 4;
677                 }
678
679                 skb->mac_header = skb->network_header;
680                 __pskb_pull(skb, offset);
681                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
682                 skb->pkt_type = PACKET_HOST;
683 #ifdef CONFIG_NET_IPGRE_BROADCAST
684                 if (ipv4_is_multicast(iph->daddr)) {
685                         /* Looped back packet, drop it! */
686                         if (rt_is_output_route(skb_rtable(skb)))
687                                 goto drop;
688                         tunnel->dev->stats.multicast++;
689                         skb->pkt_type = PACKET_BROADCAST;
690                 }
691 #endif
692
693                 if (((flags&GRE_CSUM) && csum) ||
694                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
695                         tunnel->dev->stats.rx_crc_errors++;
696                         tunnel->dev->stats.rx_errors++;
697                         goto drop;
698                 }
699                 if (tunnel->parms.i_flags&GRE_SEQ) {
700                         if (!(flags&GRE_SEQ) ||
701                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
702                                 tunnel->dev->stats.rx_fifo_errors++;
703                                 tunnel->dev->stats.rx_errors++;
704                                 goto drop;
705                         }
706                         tunnel->i_seqno = seqno + 1;
707                 }
708
709                 /* Warning: All skb pointers will be invalidated! */
710                 if (tunnel->dev->type == ARPHRD_ETHER) {
711                         if (!pskb_may_pull(skb, ETH_HLEN)) {
712                                 tunnel->dev->stats.rx_length_errors++;
713                                 tunnel->dev->stats.rx_errors++;
714                                 goto drop;
715                         }
716
717                         iph = ip_hdr(skb);
718                         skb->protocol = eth_type_trans(skb, tunnel->dev);
719                         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
720                 }
721
722                 __skb_tunnel_rx(skb, tunnel->dev);
723
724                 skb_reset_network_header(skb);
725                 err = IP_ECN_decapsulate(iph, skb);
726                 if (unlikely(err)) {
727                         if (log_ecn_error)
728                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
729                                                      &iph->saddr, iph->tos);
730                         if (err > 1) {
731                                 ++tunnel->dev->stats.rx_frame_errors;
732                                 ++tunnel->dev->stats.rx_errors;
733                                 goto drop;
734                         }
735                 }
736
737                 tstats = this_cpu_ptr(tunnel->dev->tstats);
738                 u64_stats_update_begin(&tstats->syncp);
739                 tstats->rx_packets++;
740                 tstats->rx_bytes += skb->len;
741                 u64_stats_update_end(&tstats->syncp);
742
743                 gro_cells_receive(&tunnel->gro_cells, skb);
744                 return 0;
745         }
746         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
747
748 drop:
749         kfree_skb(skb);
750         return 0;
751 }
752
753 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
754 {
755         struct ip_tunnel *tunnel = netdev_priv(dev);
756         struct pcpu_tstats *tstats;
757         const struct iphdr  *old_iph = ip_hdr(skb);
758         const struct iphdr  *tiph;
759         struct flowi4 fl4;
760         u8     tos;
761         __be16 df;
762         struct rtable *rt;                      /* Route to the other host */
763         struct net_device *tdev;                /* Device to other host */
764         struct iphdr  *iph;                     /* Our new IP header */
765         unsigned int max_headroom;              /* The extra header space needed */
766         int    gre_hlen;
767         __be32 dst;
768         int    mtu;
769
770         if (skb->ip_summed == CHECKSUM_PARTIAL &&
771             skb_checksum_help(skb))
772                 goto tx_error;
773
774         if (dev->type == ARPHRD_ETHER)
775                 IPCB(skb)->flags = 0;
776
777         if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
778                 gre_hlen = 0;
779                 tiph = (const struct iphdr *)skb->data;
780         } else {
781                 gre_hlen = tunnel->hlen;
782                 tiph = &tunnel->parms.iph;
783         }
784
785         if ((dst = tiph->daddr) == 0) {
786                 /* NBMA tunnel */
787
788                 if (skb_dst(skb) == NULL) {
789                         dev->stats.tx_fifo_errors++;
790                         goto tx_error;
791                 }
792
793                 if (skb->protocol == htons(ETH_P_IP)) {
794                         rt = skb_rtable(skb);
795                         dst = rt_nexthop(rt, old_iph->daddr);
796                 }
797 #if IS_ENABLED(CONFIG_IPV6)
798                 else if (skb->protocol == htons(ETH_P_IPV6)) {
799                         const struct in6_addr *addr6;
800                         struct neighbour *neigh;
801                         bool do_tx_error_icmp;
802                         int addr_type;
803
804                         neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
805                         if (neigh == NULL)
806                                 goto tx_error;
807
808                         addr6 = (const struct in6_addr *)&neigh->primary_key;
809                         addr_type = ipv6_addr_type(addr6);
810
811                         if (addr_type == IPV6_ADDR_ANY) {
812                                 addr6 = &ipv6_hdr(skb)->daddr;
813                                 addr_type = ipv6_addr_type(addr6);
814                         }
815
816                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
817                                 do_tx_error_icmp = true;
818                         else {
819                                 do_tx_error_icmp = false;
820                                 dst = addr6->s6_addr32[3];
821                         }
822                         neigh_release(neigh);
823                         if (do_tx_error_icmp)
824                                 goto tx_error_icmp;
825                 }
826 #endif
827                 else
828                         goto tx_error;
829         }
830
831         tos = tiph->tos;
832         if (tos == 1) {
833                 tos = 0;
834                 if (skb->protocol == htons(ETH_P_IP))
835                         tos = old_iph->tos;
836                 else if (skb->protocol == htons(ETH_P_IPV6))
837                         tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
838         }
839
840         rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
841                                  tunnel->parms.o_key, RT_TOS(tos),
842                                  tunnel->parms.link);
843         if (IS_ERR(rt)) {
844                 dev->stats.tx_carrier_errors++;
845                 goto tx_error;
846         }
847         tdev = rt->dst.dev;
848
849         if (tdev == dev) {
850                 ip_rt_put(rt);
851                 dev->stats.collisions++;
852                 goto tx_error;
853         }
854
855         df = tiph->frag_off;
856         if (df)
857                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
858         else
859                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
860
861         if (skb_dst(skb))
862                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
863
864         if (skb->protocol == htons(ETH_P_IP)) {
865                 df |= (old_iph->frag_off&htons(IP_DF));
866
867                 if ((old_iph->frag_off&htons(IP_DF)) &&
868                     mtu < ntohs(old_iph->tot_len)) {
869                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
870                         ip_rt_put(rt);
871                         goto tx_error;
872                 }
873         }
874 #if IS_ENABLED(CONFIG_IPV6)
875         else if (skb->protocol == htons(ETH_P_IPV6)) {
876                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
877
878                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
879                         if ((tunnel->parms.iph.daddr &&
880                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
881                             rt6->rt6i_dst.plen == 128) {
882                                 rt6->rt6i_flags |= RTF_MODIFIED;
883                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
884                         }
885                 }
886
887                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
888                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
889                         ip_rt_put(rt);
890                         goto tx_error;
891                 }
892         }
893 #endif
894
895         if (tunnel->err_count > 0) {
896                 if (time_before(jiffies,
897                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
898                         tunnel->err_count--;
899
900                         dst_link_failure(skb);
901                 } else
902                         tunnel->err_count = 0;
903         }
904
905         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
906
907         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
908             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
909                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
910                 if (max_headroom > dev->needed_headroom)
911                         dev->needed_headroom = max_headroom;
912                 if (!new_skb) {
913                         ip_rt_put(rt);
914                         dev->stats.tx_dropped++;
915                         dev_kfree_skb(skb);
916                         return NETDEV_TX_OK;
917                 }
918                 if (skb->sk)
919                         skb_set_owner_w(new_skb, skb->sk);
920                 dev_kfree_skb(skb);
921                 skb = new_skb;
922                 old_iph = ip_hdr(skb);
923         }
924
925         skb_reset_transport_header(skb);
926         skb_push(skb, gre_hlen);
927         skb_reset_network_header(skb);
928         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
929         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
930                               IPSKB_REROUTED);
931         skb_dst_drop(skb);
932         skb_dst_set(skb, &rt->dst);
933
934         /*
935          *      Push down and install the IPIP header.
936          */
937
938         iph                     =       ip_hdr(skb);
939         iph->version            =       4;
940         iph->ihl                =       sizeof(struct iphdr) >> 2;
941         iph->frag_off           =       df;
942         iph->protocol           =       IPPROTO_GRE;
943         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
944         iph->daddr              =       fl4.daddr;
945         iph->saddr              =       fl4.saddr;
946
947         if ((iph->ttl = tiph->ttl) == 0) {
948                 if (skb->protocol == htons(ETH_P_IP))
949                         iph->ttl = old_iph->ttl;
950 #if IS_ENABLED(CONFIG_IPV6)
951                 else if (skb->protocol == htons(ETH_P_IPV6))
952                         iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
953 #endif
954                 else
955                         iph->ttl = ip4_dst_hoplimit(&rt->dst);
956         }
957
958         ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
959         ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
960                                    htons(ETH_P_TEB) : skb->protocol;
961
962         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
963                 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
964
965                 if (tunnel->parms.o_flags&GRE_SEQ) {
966                         ++tunnel->o_seqno;
967                         *ptr = htonl(tunnel->o_seqno);
968                         ptr--;
969                 }
970                 if (tunnel->parms.o_flags&GRE_KEY) {
971                         *ptr = tunnel->parms.o_key;
972                         ptr--;
973                 }
974                 if (tunnel->parms.o_flags&GRE_CSUM) {
975                         *ptr = 0;
976                         *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
977                 }
978         }
979
980         nf_reset(skb);
981         tstats = this_cpu_ptr(dev->tstats);
982         __IPTUNNEL_XMIT(tstats, &dev->stats);
983         return NETDEV_TX_OK;
984
985 #if IS_ENABLED(CONFIG_IPV6)
986 tx_error_icmp:
987         dst_link_failure(skb);
988 #endif
989 tx_error:
990         dev->stats.tx_errors++;
991         dev_kfree_skb(skb);
992         return NETDEV_TX_OK;
993 }
994
995 static int ipgre_tunnel_bind_dev(struct net_device *dev)
996 {
997         struct net_device *tdev = NULL;
998         struct ip_tunnel *tunnel;
999         const struct iphdr *iph;
1000         int hlen = LL_MAX_HEADER;
1001         int mtu = ETH_DATA_LEN;
1002         int addend = sizeof(struct iphdr) + 4;
1003
1004         tunnel = netdev_priv(dev);
1005         iph = &tunnel->parms.iph;
1006
1007         /* Guess output device to choose reasonable mtu and needed_headroom */
1008
1009         if (iph->daddr) {
1010                 struct flowi4 fl4;
1011                 struct rtable *rt;
1012
1013                 rt = ip_route_output_gre(dev_net(dev), &fl4,
1014                                          iph->daddr, iph->saddr,
1015                                          tunnel->parms.o_key,
1016                                          RT_TOS(iph->tos),
1017                                          tunnel->parms.link);
1018                 if (!IS_ERR(rt)) {
1019                         tdev = rt->dst.dev;
1020                         ip_rt_put(rt);
1021                 }
1022
1023                 if (dev->type != ARPHRD_ETHER)
1024                         dev->flags |= IFF_POINTOPOINT;
1025         }
1026
1027         if (!tdev && tunnel->parms.link)
1028                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
1029
1030         if (tdev) {
1031                 hlen = tdev->hard_header_len + tdev->needed_headroom;
1032                 mtu = tdev->mtu;
1033         }
1034         dev->iflink = tunnel->parms.link;
1035
1036         /* Precalculate GRE options length */
1037         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1038                 if (tunnel->parms.o_flags&GRE_CSUM)
1039                         addend += 4;
1040                 if (tunnel->parms.o_flags&GRE_KEY)
1041                         addend += 4;
1042                 if (tunnel->parms.o_flags&GRE_SEQ)
1043                         addend += 4;
1044         }
1045         dev->needed_headroom = addend + hlen;
1046         mtu -= dev->hard_header_len + addend;
1047
1048         if (mtu < 68)
1049                 mtu = 68;
1050
1051         tunnel->hlen = addend;
1052
1053         return mtu;
1054 }
1055
1056 static int
1057 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1058 {
1059         int err = 0;
1060         struct ip_tunnel_parm p;
1061         struct ip_tunnel *t;
1062         struct net *net = dev_net(dev);
1063         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1064
1065         switch (cmd) {
1066         case SIOCGETTUNNEL:
1067                 t = NULL;
1068                 if (dev == ign->fb_tunnel_dev) {
1069                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1070                                 err = -EFAULT;
1071                                 break;
1072                         }
1073                         t = ipgre_tunnel_locate(net, &p, 0);
1074                 }
1075                 if (t == NULL)
1076                         t = netdev_priv(dev);
1077                 memcpy(&p, &t->parms, sizeof(p));
1078                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1079                         err = -EFAULT;
1080                 break;
1081
1082         case SIOCADDTUNNEL:
1083         case SIOCCHGTUNNEL:
1084                 err = -EPERM;
1085                 if (!capable(CAP_NET_ADMIN))
1086                         goto done;
1087
1088                 err = -EFAULT;
1089                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1090                         goto done;
1091
1092                 err = -EINVAL;
1093                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1094                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1095                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1096                         goto done;
1097                 if (p.iph.ttl)
1098                         p.iph.frag_off |= htons(IP_DF);
1099
1100                 if (!(p.i_flags&GRE_KEY))
1101                         p.i_key = 0;
1102                 if (!(p.o_flags&GRE_KEY))
1103                         p.o_key = 0;
1104
1105                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1106
1107                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1108                         if (t != NULL) {
1109                                 if (t->dev != dev) {
1110                                         err = -EEXIST;
1111                                         break;
1112                                 }
1113                         } else {
1114                                 unsigned int nflags = 0;
1115
1116                                 t = netdev_priv(dev);
1117
1118                                 if (ipv4_is_multicast(p.iph.daddr))
1119                                         nflags = IFF_BROADCAST;
1120                                 else if (p.iph.daddr)
1121                                         nflags = IFF_POINTOPOINT;
1122
1123                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1124                                         err = -EINVAL;
1125                                         break;
1126                                 }
1127                                 ipgre_tunnel_unlink(ign, t);
1128                                 synchronize_net();
1129                                 t->parms.iph.saddr = p.iph.saddr;
1130                                 t->parms.iph.daddr = p.iph.daddr;
1131                                 t->parms.i_key = p.i_key;
1132                                 t->parms.o_key = p.o_key;
1133                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1134                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1135                                 ipgre_tunnel_link(ign, t);
1136                                 netdev_state_change(dev);
1137                         }
1138                 }
1139
1140                 if (t) {
1141                         err = 0;
1142                         if (cmd == SIOCCHGTUNNEL) {
1143                                 t->parms.iph.ttl = p.iph.ttl;
1144                                 t->parms.iph.tos = p.iph.tos;
1145                                 t->parms.iph.frag_off = p.iph.frag_off;
1146                                 if (t->parms.link != p.link) {
1147                                         t->parms.link = p.link;
1148                                         dev->mtu = ipgre_tunnel_bind_dev(dev);
1149                                         netdev_state_change(dev);
1150                                 }
1151                         }
1152                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1153                                 err = -EFAULT;
1154                 } else
1155                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1156                 break;
1157
1158         case SIOCDELTUNNEL:
1159                 err = -EPERM;
1160                 if (!capable(CAP_NET_ADMIN))
1161                         goto done;
1162
1163                 if (dev == ign->fb_tunnel_dev) {
1164                         err = -EFAULT;
1165                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1166                                 goto done;
1167                         err = -ENOENT;
1168                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1169                                 goto done;
1170                         err = -EPERM;
1171                         if (t == netdev_priv(ign->fb_tunnel_dev))
1172                                 goto done;
1173                         dev = t->dev;
1174                 }
1175                 unregister_netdevice(dev);
1176                 err = 0;
1177                 break;
1178
1179         default:
1180                 err = -EINVAL;
1181         }
1182
1183 done:
1184         return err;
1185 }
1186
1187 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1188 {
1189         struct ip_tunnel *tunnel = netdev_priv(dev);
1190         if (new_mtu < 68 ||
1191             new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1192                 return -EINVAL;
1193         dev->mtu = new_mtu;
1194         return 0;
1195 }
1196
1197 /* Nice toy. Unfortunately, useless in real life :-)
1198    It allows to construct virtual multiprotocol broadcast "LAN"
1199    over the Internet, provided multicast routing is tuned.
1200
1201
1202    I have no idea was this bicycle invented before me,
1203    so that I had to set ARPHRD_IPGRE to a random value.
1204    I have an impression, that Cisco could make something similar,
1205    but this feature is apparently missing in IOS<=11.2(8).
1206
1207    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1208    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1209
1210    ping -t 255 224.66.66.66
1211
1212    If nobody answers, mbone does not work.
1213
1214    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1215    ip addr add 10.66.66.<somewhat>/24 dev Universe
1216    ifconfig Universe up
1217    ifconfig Universe add fe80::<Your_real_addr>/10
1218    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1219    ftp 10.66.66.66
1220    ...
1221    ftp fec0:6666:6666::193.233.7.65
1222    ...
1223
1224  */
1225
1226 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1227                         unsigned short type,
1228                         const void *daddr, const void *saddr, unsigned int len)
1229 {
1230         struct ip_tunnel *t = netdev_priv(dev);
1231         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1232         __be16 *p = (__be16 *)(iph+1);
1233
1234         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1235         p[0]            = t->parms.o_flags;
1236         p[1]            = htons(type);
1237
1238         /*
1239          *      Set the source hardware address.
1240          */
1241
1242         if (saddr)
1243                 memcpy(&iph->saddr, saddr, 4);
1244         if (daddr)
1245                 memcpy(&iph->daddr, daddr, 4);
1246         if (iph->daddr)
1247                 return t->hlen;
1248
1249         return -t->hlen;
1250 }
1251
1252 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1253 {
1254         const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
1255         memcpy(haddr, &iph->saddr, 4);
1256         return 4;
1257 }
1258
1259 static const struct header_ops ipgre_header_ops = {
1260         .create = ipgre_header,
1261         .parse  = ipgre_header_parse,
1262 };
1263
1264 #ifdef CONFIG_NET_IPGRE_BROADCAST
1265 static int ipgre_open(struct net_device *dev)
1266 {
1267         struct ip_tunnel *t = netdev_priv(dev);
1268
1269         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1270                 struct flowi4 fl4;
1271                 struct rtable *rt;
1272
1273                 rt = ip_route_output_gre(dev_net(dev), &fl4,
1274                                          t->parms.iph.daddr,
1275                                          t->parms.iph.saddr,
1276                                          t->parms.o_key,
1277                                          RT_TOS(t->parms.iph.tos),
1278                                          t->parms.link);
1279                 if (IS_ERR(rt))
1280                         return -EADDRNOTAVAIL;
1281                 dev = rt->dst.dev;
1282                 ip_rt_put(rt);
1283                 if (__in_dev_get_rtnl(dev) == NULL)
1284                         return -EADDRNOTAVAIL;
1285                 t->mlink = dev->ifindex;
1286                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1287         }
1288         return 0;
1289 }
1290
1291 static int ipgre_close(struct net_device *dev)
1292 {
1293         struct ip_tunnel *t = netdev_priv(dev);
1294
1295         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1296                 struct in_device *in_dev;
1297                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1298                 if (in_dev)
1299                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1300         }
1301         return 0;
1302 }
1303
1304 #endif
1305
1306 static const struct net_device_ops ipgre_netdev_ops = {
1307         .ndo_init               = ipgre_tunnel_init,
1308         .ndo_uninit             = ipgre_tunnel_uninit,
1309 #ifdef CONFIG_NET_IPGRE_BROADCAST
1310         .ndo_open               = ipgre_open,
1311         .ndo_stop               = ipgre_close,
1312 #endif
1313         .ndo_start_xmit         = ipgre_tunnel_xmit,
1314         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
1315         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1316         .ndo_get_stats64        = ipgre_get_stats64,
1317 };
1318
1319 static void ipgre_dev_free(struct net_device *dev)
1320 {
1321         struct ip_tunnel *tunnel = netdev_priv(dev);
1322
1323         gro_cells_destroy(&tunnel->gro_cells);
1324         free_percpu(dev->tstats);
1325         free_netdev(dev);
1326 }
1327
1328 #define GRE_FEATURES (NETIF_F_SG |              \
1329                       NETIF_F_FRAGLIST |        \
1330                       NETIF_F_HIGHDMA |         \
1331                       NETIF_F_HW_CSUM)
1332
1333 static void ipgre_tunnel_setup(struct net_device *dev)
1334 {
1335         dev->netdev_ops         = &ipgre_netdev_ops;
1336         dev->destructor         = ipgre_dev_free;
1337
1338         dev->type               = ARPHRD_IPGRE;
1339         dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1340         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1341         dev->flags              = IFF_NOARP;
1342         dev->iflink             = 0;
1343         dev->addr_len           = 4;
1344         dev->features           |= NETIF_F_NETNS_LOCAL;
1345         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
1346
1347         dev->features           |= GRE_FEATURES;
1348         dev->hw_features        |= GRE_FEATURES;
1349 }
1350
1351 static int ipgre_tunnel_init(struct net_device *dev)
1352 {
1353         struct ip_tunnel *tunnel;
1354         struct iphdr *iph;
1355         int err;
1356
1357         tunnel = netdev_priv(dev);
1358         iph = &tunnel->parms.iph;
1359
1360         tunnel->dev = dev;
1361         strcpy(tunnel->parms.name, dev->name);
1362
1363         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1364         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1365
1366         if (iph->daddr) {
1367 #ifdef CONFIG_NET_IPGRE_BROADCAST
1368                 if (ipv4_is_multicast(iph->daddr)) {
1369                         if (!iph->saddr)
1370                                 return -EINVAL;
1371                         dev->flags = IFF_BROADCAST;
1372                         dev->header_ops = &ipgre_header_ops;
1373                 }
1374 #endif
1375         } else
1376                 dev->header_ops = &ipgre_header_ops;
1377
1378         dev->tstats = alloc_percpu(struct pcpu_tstats);
1379         if (!dev->tstats)
1380                 return -ENOMEM;
1381
1382         err = gro_cells_init(&tunnel->gro_cells, dev);
1383         if (err) {
1384                 free_percpu(dev->tstats);
1385                 return err;
1386         }
1387
1388         return 0;
1389 }
1390
1391 static void ipgre_fb_tunnel_init(struct net_device *dev)
1392 {
1393         struct ip_tunnel *tunnel = netdev_priv(dev);
1394         struct iphdr *iph = &tunnel->parms.iph;
1395
1396         tunnel->dev = dev;
1397         strcpy(tunnel->parms.name, dev->name);
1398
1399         iph->version            = 4;
1400         iph->protocol           = IPPROTO_GRE;
1401         iph->ihl                = 5;
1402         tunnel->hlen            = sizeof(struct iphdr) + 4;
1403
1404         dev_hold(dev);
1405 }
1406
1407
1408 static const struct gre_protocol ipgre_protocol = {
1409         .handler     = ipgre_rcv,
1410         .err_handler = ipgre_err,
1411 };
1412
1413 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1414 {
1415         int prio;
1416
1417         for (prio = 0; prio < 4; prio++) {
1418                 int h;
1419                 for (h = 0; h < HASH_SIZE; h++) {
1420                         struct ip_tunnel *t;
1421
1422                         t = rtnl_dereference(ign->tunnels[prio][h]);
1423
1424                         while (t != NULL) {
1425                                 unregister_netdevice_queue(t->dev, head);
1426                                 t = rtnl_dereference(t->next);
1427                         }
1428                 }
1429         }
1430 }
1431
1432 static int __net_init ipgre_init_net(struct net *net)
1433 {
1434         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1435         int err;
1436
1437         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1438                                            ipgre_tunnel_setup);
1439         if (!ign->fb_tunnel_dev) {
1440                 err = -ENOMEM;
1441                 goto err_alloc_dev;
1442         }
1443         dev_net_set(ign->fb_tunnel_dev, net);
1444
1445         ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1446         ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1447
1448         if ((err = register_netdev(ign->fb_tunnel_dev)))
1449                 goto err_reg_dev;
1450
1451         rcu_assign_pointer(ign->tunnels_wc[0],
1452                            netdev_priv(ign->fb_tunnel_dev));
1453         return 0;
1454
1455 err_reg_dev:
1456         ipgre_dev_free(ign->fb_tunnel_dev);
1457 err_alloc_dev:
1458         return err;
1459 }
1460
1461 static void __net_exit ipgre_exit_net(struct net *net)
1462 {
1463         struct ipgre_net *ign;
1464         LIST_HEAD(list);
1465
1466         ign = net_generic(net, ipgre_net_id);
1467         rtnl_lock();
1468         ipgre_destroy_tunnels(ign, &list);
1469         unregister_netdevice_many(&list);
1470         rtnl_unlock();
1471 }
1472
1473 static struct pernet_operations ipgre_net_ops = {
1474         .init = ipgre_init_net,
1475         .exit = ipgre_exit_net,
1476         .id   = &ipgre_net_id,
1477         .size = sizeof(struct ipgre_net),
1478 };
1479
1480 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1481 {
1482         __be16 flags;
1483
1484         if (!data)
1485                 return 0;
1486
1487         flags = 0;
1488         if (data[IFLA_GRE_IFLAGS])
1489                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1490         if (data[IFLA_GRE_OFLAGS])
1491                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1492         if (flags & (GRE_VERSION|GRE_ROUTING))
1493                 return -EINVAL;
1494
1495         return 0;
1496 }
1497
1498 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1499 {
1500         __be32 daddr;
1501
1502         if (tb[IFLA_ADDRESS]) {
1503                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1504                         return -EINVAL;
1505                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1506                         return -EADDRNOTAVAIL;
1507         }
1508
1509         if (!data)
1510                 goto out;
1511
1512         if (data[IFLA_GRE_REMOTE]) {
1513                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1514                 if (!daddr)
1515                         return -EINVAL;
1516         }
1517
1518 out:
1519         return ipgre_tunnel_validate(tb, data);
1520 }
1521
1522 static void ipgre_netlink_parms(struct nlattr *data[],
1523                                 struct ip_tunnel_parm *parms)
1524 {
1525         memset(parms, 0, sizeof(*parms));
1526
1527         parms->iph.protocol = IPPROTO_GRE;
1528
1529         if (!data)
1530                 return;
1531
1532         if (data[IFLA_GRE_LINK])
1533                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1534
1535         if (data[IFLA_GRE_IFLAGS])
1536                 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1537
1538         if (data[IFLA_GRE_OFLAGS])
1539                 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1540
1541         if (data[IFLA_GRE_IKEY])
1542                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1543
1544         if (data[IFLA_GRE_OKEY])
1545                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1546
1547         if (data[IFLA_GRE_LOCAL])
1548                 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1549
1550         if (data[IFLA_GRE_REMOTE])
1551                 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1552
1553         if (data[IFLA_GRE_TTL])
1554                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1555
1556         if (data[IFLA_GRE_TOS])
1557                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1558
1559         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1560                 parms->iph.frag_off = htons(IP_DF);
1561 }
1562
1563 static int ipgre_tap_init(struct net_device *dev)
1564 {
1565         struct ip_tunnel *tunnel;
1566
1567         tunnel = netdev_priv(dev);
1568
1569         tunnel->dev = dev;
1570         strcpy(tunnel->parms.name, dev->name);
1571
1572         ipgre_tunnel_bind_dev(dev);
1573
1574         dev->tstats = alloc_percpu(struct pcpu_tstats);
1575         if (!dev->tstats)
1576                 return -ENOMEM;
1577
1578         return 0;
1579 }
1580
1581 static const struct net_device_ops ipgre_tap_netdev_ops = {
1582         .ndo_init               = ipgre_tap_init,
1583         .ndo_uninit             = ipgre_tunnel_uninit,
1584         .ndo_start_xmit         = ipgre_tunnel_xmit,
1585         .ndo_set_mac_address    = eth_mac_addr,
1586         .ndo_validate_addr      = eth_validate_addr,
1587         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1588         .ndo_get_stats64        = ipgre_get_stats64,
1589 };
1590
1591 static void ipgre_tap_setup(struct net_device *dev)
1592 {
1593
1594         ether_setup(dev);
1595
1596         dev->netdev_ops         = &ipgre_tap_netdev_ops;
1597         dev->destructor         = ipgre_dev_free;
1598
1599         dev->iflink             = 0;
1600         dev->features           |= NETIF_F_NETNS_LOCAL;
1601 }
1602
1603 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1604                          struct nlattr *data[])
1605 {
1606         struct ip_tunnel *nt;
1607         struct net *net = dev_net(dev);
1608         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1609         int mtu;
1610         int err;
1611
1612         nt = netdev_priv(dev);
1613         ipgre_netlink_parms(data, &nt->parms);
1614
1615         if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1616                 return -EEXIST;
1617
1618         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1619                 eth_hw_addr_random(dev);
1620
1621         mtu = ipgre_tunnel_bind_dev(dev);
1622         if (!tb[IFLA_MTU])
1623                 dev->mtu = mtu;
1624
1625         /* Can use a lockless transmit, unless we generate output sequences */
1626         if (!(nt->parms.o_flags & GRE_SEQ))
1627                 dev->features |= NETIF_F_LLTX;
1628
1629         err = register_netdevice(dev);
1630         if (err)
1631                 goto out;
1632
1633         dev_hold(dev);
1634         ipgre_tunnel_link(ign, nt);
1635
1636 out:
1637         return err;
1638 }
1639
1640 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1641                             struct nlattr *data[])
1642 {
1643         struct ip_tunnel *t, *nt;
1644         struct net *net = dev_net(dev);
1645         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1646         struct ip_tunnel_parm p;
1647         int mtu;
1648
1649         if (dev == ign->fb_tunnel_dev)
1650                 return -EINVAL;
1651
1652         nt = netdev_priv(dev);
1653         ipgre_netlink_parms(data, &p);
1654
1655         t = ipgre_tunnel_locate(net, &p, 0);
1656
1657         if (t) {
1658                 if (t->dev != dev)
1659                         return -EEXIST;
1660         } else {
1661                 t = nt;
1662
1663                 if (dev->type != ARPHRD_ETHER) {
1664                         unsigned int nflags = 0;
1665
1666                         if (ipv4_is_multicast(p.iph.daddr))
1667                                 nflags = IFF_BROADCAST;
1668                         else if (p.iph.daddr)
1669                                 nflags = IFF_POINTOPOINT;
1670
1671                         if ((dev->flags ^ nflags) &
1672                             (IFF_POINTOPOINT | IFF_BROADCAST))
1673                                 return -EINVAL;
1674                 }
1675
1676                 ipgre_tunnel_unlink(ign, t);
1677                 t->parms.iph.saddr = p.iph.saddr;
1678                 t->parms.iph.daddr = p.iph.daddr;
1679                 t->parms.i_key = p.i_key;
1680                 if (dev->type != ARPHRD_ETHER) {
1681                         memcpy(dev->dev_addr, &p.iph.saddr, 4);
1682                         memcpy(dev->broadcast, &p.iph.daddr, 4);
1683                 }
1684                 ipgre_tunnel_link(ign, t);
1685                 netdev_state_change(dev);
1686         }
1687
1688         t->parms.o_key = p.o_key;
1689         t->parms.iph.ttl = p.iph.ttl;
1690         t->parms.iph.tos = p.iph.tos;
1691         t->parms.iph.frag_off = p.iph.frag_off;
1692
1693         if (t->parms.link != p.link) {
1694                 t->parms.link = p.link;
1695                 mtu = ipgre_tunnel_bind_dev(dev);
1696                 if (!tb[IFLA_MTU])
1697                         dev->mtu = mtu;
1698                 netdev_state_change(dev);
1699         }
1700
1701         return 0;
1702 }
1703
1704 static size_t ipgre_get_size(const struct net_device *dev)
1705 {
1706         return
1707                 /* IFLA_GRE_LINK */
1708                 nla_total_size(4) +
1709                 /* IFLA_GRE_IFLAGS */
1710                 nla_total_size(2) +
1711                 /* IFLA_GRE_OFLAGS */
1712                 nla_total_size(2) +
1713                 /* IFLA_GRE_IKEY */
1714                 nla_total_size(4) +
1715                 /* IFLA_GRE_OKEY */
1716                 nla_total_size(4) +
1717                 /* IFLA_GRE_LOCAL */
1718                 nla_total_size(4) +
1719                 /* IFLA_GRE_REMOTE */
1720                 nla_total_size(4) +
1721                 /* IFLA_GRE_TTL */
1722                 nla_total_size(1) +
1723                 /* IFLA_GRE_TOS */
1724                 nla_total_size(1) +
1725                 /* IFLA_GRE_PMTUDISC */
1726                 nla_total_size(1) +
1727                 0;
1728 }
1729
1730 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1731 {
1732         struct ip_tunnel *t = netdev_priv(dev);
1733         struct ip_tunnel_parm *p = &t->parms;
1734
1735         if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1736             nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1737             nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1738             nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1739             nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1740             nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1741             nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1742             nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1743             nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1744             nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1745                        !!(p->iph.frag_off & htons(IP_DF))))
1746                 goto nla_put_failure;
1747         return 0;
1748
1749 nla_put_failure:
1750         return -EMSGSIZE;
1751 }
1752
1753 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1754         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1755         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1756         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1757         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1758         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1759         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1760         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1761         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1762         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1763         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1764 };
1765
1766 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1767         .kind           = "gre",
1768         .maxtype        = IFLA_GRE_MAX,
1769         .policy         = ipgre_policy,
1770         .priv_size      = sizeof(struct ip_tunnel),
1771         .setup          = ipgre_tunnel_setup,
1772         .validate       = ipgre_tunnel_validate,
1773         .newlink        = ipgre_newlink,
1774         .changelink     = ipgre_changelink,
1775         .get_size       = ipgre_get_size,
1776         .fill_info      = ipgre_fill_info,
1777 };
1778
1779 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1780         .kind           = "gretap",
1781         .maxtype        = IFLA_GRE_MAX,
1782         .policy         = ipgre_policy,
1783         .priv_size      = sizeof(struct ip_tunnel),
1784         .setup          = ipgre_tap_setup,
1785         .validate       = ipgre_tap_validate,
1786         .newlink        = ipgre_newlink,
1787         .changelink     = ipgre_changelink,
1788         .get_size       = ipgre_get_size,
1789         .fill_info      = ipgre_fill_info,
1790 };
1791
1792 /*
1793  *      And now the modules code and kernel interface.
1794  */
1795
1796 static int __init ipgre_init(void)
1797 {
1798         int err;
1799
1800         pr_info("GRE over IPv4 tunneling driver\n");
1801
1802         err = register_pernet_device(&ipgre_net_ops);
1803         if (err < 0)
1804                 return err;
1805
1806         err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1807         if (err < 0) {
1808                 pr_info("%s: can't add protocol\n", __func__);
1809                 goto add_proto_failed;
1810         }
1811
1812         err = rtnl_link_register(&ipgre_link_ops);
1813         if (err < 0)
1814                 goto rtnl_link_failed;
1815
1816         err = rtnl_link_register(&ipgre_tap_ops);
1817         if (err < 0)
1818                 goto tap_ops_failed;
1819
1820 out:
1821         return err;
1822
1823 tap_ops_failed:
1824         rtnl_link_unregister(&ipgre_link_ops);
1825 rtnl_link_failed:
1826         gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1827 add_proto_failed:
1828         unregister_pernet_device(&ipgre_net_ops);
1829         goto out;
1830 }
1831
1832 static void __exit ipgre_fini(void)
1833 {
1834         rtnl_link_unregister(&ipgre_tap_ops);
1835         rtnl_link_unregister(&ipgre_link_ops);
1836         if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1837                 pr_info("%s: can't remove protocol\n", __func__);
1838         unregister_pernet_device(&ipgre_net_ops);
1839 }
1840
1841 module_init(ipgre_init);
1842 module_exit(ipgre_fini);
1843 MODULE_LICENSE("GPL");
1844 MODULE_ALIAS_RTNL_LINK("gre");
1845 MODULE_ALIAS_RTNL_LINK("gretap");
1846 MODULE_ALIAS_NETDEV("gre0");