Merge tag 'tty-4.15-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
[sfrench/cifs-2.6.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58 #include <net/dst_metadata.h>
59
60 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ipv6.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
64 #endif
65
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 {
68         return hash_32((__force u32)key ^ (__force u32)remote,
69                          IP_TNL_HASH_BITS);
70 }
71
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73                                 __be16 flags, __be32 key)
74 {
75         if (p->i_flags & TUNNEL_KEY) {
76                 if (flags & TUNNEL_KEY)
77                         return key == p->i_key;
78                 else
79                         /* key expected, none present */
80                         return false;
81         } else
82                 return !(flags & TUNNEL_KEY);
83 }
84
85 /* Fallback tunnel: no source, no destination, no key, no options
86
87    Tunnel hash table:
88    We require exact key match i.e. if a key is present in packet
89    it will match only tunnel with the same key; if it is not present,
90    it will match only keyless tunnel.
91
92    All keysless packets, if not matched configured keyless tunnels
93    will match fallback tunnel.
94    Given src, dst and key, find appropriate for input tunnel.
95 */
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97                                    int link, __be16 flags,
98                                    __be32 remote, __be32 local,
99                                    __be32 key)
100 {
101         unsigned int hash;
102         struct ip_tunnel *t, *cand = NULL;
103         struct hlist_head *head;
104
105         hash = ip_tunnel_hash(key, remote);
106         head = &itn->tunnels[hash];
107
108         hlist_for_each_entry_rcu(t, head, hash_node) {
109                 if (local != t->parms.iph.saddr ||
110                     remote != t->parms.iph.daddr ||
111                     !(t->dev->flags & IFF_UP))
112                         continue;
113
114                 if (!ip_tunnel_key_match(&t->parms, flags, key))
115                         continue;
116
117                 if (t->parms.link == link)
118                         return t;
119                 else
120                         cand = t;
121         }
122
123         hlist_for_each_entry_rcu(t, head, hash_node) {
124                 if (remote != t->parms.iph.daddr ||
125                     t->parms.iph.saddr != 0 ||
126                     !(t->dev->flags & IFF_UP))
127                         continue;
128
129                 if (!ip_tunnel_key_match(&t->parms, flags, key))
130                         continue;
131
132                 if (t->parms.link == link)
133                         return t;
134                 else if (!cand)
135                         cand = t;
136         }
137
138         hash = ip_tunnel_hash(key, 0);
139         head = &itn->tunnels[hash];
140
141         hlist_for_each_entry_rcu(t, head, hash_node) {
142                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
143                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
144                         continue;
145
146                 if (!(t->dev->flags & IFF_UP))
147                         continue;
148
149                 if (!ip_tunnel_key_match(&t->parms, flags, key))
150                         continue;
151
152                 if (t->parms.link == link)
153                         return t;
154                 else if (!cand)
155                         cand = t;
156         }
157
158         if (flags & TUNNEL_NO_KEY)
159                 goto skip_key_lookup;
160
161         hlist_for_each_entry_rcu(t, head, hash_node) {
162                 if (t->parms.i_key != key ||
163                     t->parms.iph.saddr != 0 ||
164                     t->parms.iph.daddr != 0 ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else if (!cand)
171                         cand = t;
172         }
173
174 skip_key_lookup:
175         if (cand)
176                 return cand;
177
178         t = rcu_dereference(itn->collect_md_tun);
179         if (t && t->dev->flags & IFF_UP)
180                 return t;
181
182         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
183                 return netdev_priv(itn->fb_tunnel_dev);
184
185         return NULL;
186 }
187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188
189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
190                                     struct ip_tunnel_parm *parms)
191 {
192         unsigned int h;
193         __be32 remote;
194         __be32 i_key = parms->i_key;
195
196         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
197                 remote = parms->iph.daddr;
198         else
199                 remote = 0;
200
201         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202                 i_key = 0;
203
204         h = ip_tunnel_hash(i_key, remote);
205         return &itn->tunnels[h];
206 }
207
208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209 {
210         struct hlist_head *head = ip_bucket(itn, &t->parms);
211
212         if (t->collect_md)
213                 rcu_assign_pointer(itn->collect_md_tun, t);
214         hlist_add_head_rcu(&t->hash_node, head);
215 }
216
217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 {
219         if (t->collect_md)
220                 rcu_assign_pointer(itn->collect_md_tun, NULL);
221         hlist_del_init_rcu(&t->hash_node);
222 }
223
224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
225                                         struct ip_tunnel_parm *parms,
226                                         int type)
227 {
228         __be32 remote = parms->iph.daddr;
229         __be32 local = parms->iph.saddr;
230         __be32 key = parms->i_key;
231         __be16 flags = parms->i_flags;
232         int link = parms->link;
233         struct ip_tunnel *t = NULL;
234         struct hlist_head *head = ip_bucket(itn, parms);
235
236         hlist_for_each_entry_rcu(t, head, hash_node) {
237                 if (local == t->parms.iph.saddr &&
238                     remote == t->parms.iph.daddr &&
239                     link == t->parms.link &&
240                     type == t->dev->type &&
241                     ip_tunnel_key_match(&t->parms, flags, key))
242                         break;
243         }
244         return t;
245 }
246
247 static struct net_device *__ip_tunnel_create(struct net *net,
248                                              const struct rtnl_link_ops *ops,
249                                              struct ip_tunnel_parm *parms)
250 {
251         int err;
252         struct ip_tunnel *tunnel;
253         struct net_device *dev;
254         char name[IFNAMSIZ];
255
256         if (parms->name[0])
257                 strlcpy(name, parms->name, IFNAMSIZ);
258         else {
259                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
260                         err = -E2BIG;
261                         goto failed;
262                 }
263                 strlcpy(name, ops->kind, IFNAMSIZ);
264                 strncat(name, "%d", 2);
265         }
266
267         ASSERT_RTNL();
268         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
269         if (!dev) {
270                 err = -ENOMEM;
271                 goto failed;
272         }
273         dev_net_set(dev, net);
274
275         dev->rtnl_link_ops = ops;
276
277         tunnel = netdev_priv(dev);
278         tunnel->parms = *parms;
279         tunnel->net = net;
280
281         err = register_netdevice(dev);
282         if (err)
283                 goto failed_free;
284
285         return dev;
286
287 failed_free:
288         free_netdev(dev);
289 failed:
290         return ERR_PTR(err);
291 }
292
293 static inline void init_tunnel_flow(struct flowi4 *fl4,
294                                     int proto,
295                                     __be32 daddr, __be32 saddr,
296                                     __be32 key, __u8 tos, int oif,
297                                     __u32 mark)
298 {
299         memset(fl4, 0, sizeof(*fl4));
300         fl4->flowi4_oif = oif;
301         fl4->daddr = daddr;
302         fl4->saddr = saddr;
303         fl4->flowi4_tos = tos;
304         fl4->flowi4_proto = proto;
305         fl4->fl4_gre_key = key;
306         fl4->flowi4_mark = mark;
307 }
308
309 static int ip_tunnel_bind_dev(struct net_device *dev)
310 {
311         struct net_device *tdev = NULL;
312         struct ip_tunnel *tunnel = netdev_priv(dev);
313         const struct iphdr *iph;
314         int hlen = LL_MAX_HEADER;
315         int mtu = ETH_DATA_LEN;
316         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
317
318         iph = &tunnel->parms.iph;
319
320         /* Guess output device to choose reasonable mtu and needed_headroom */
321         if (iph->daddr) {
322                 struct flowi4 fl4;
323                 struct rtable *rt;
324
325                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
326                                  iph->saddr, tunnel->parms.o_key,
327                                  RT_TOS(iph->tos), tunnel->parms.link,
328                                  tunnel->fwmark);
329                 rt = ip_route_output_key(tunnel->net, &fl4);
330
331                 if (!IS_ERR(rt)) {
332                         tdev = rt->dst.dev;
333                         ip_rt_put(rt);
334                 }
335                 if (dev->type != ARPHRD_ETHER)
336                         dev->flags |= IFF_POINTOPOINT;
337
338                 dst_cache_reset(&tunnel->dst_cache);
339         }
340
341         if (!tdev && tunnel->parms.link)
342                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
343
344         if (tdev) {
345                 hlen = tdev->hard_header_len + tdev->needed_headroom;
346                 mtu = tdev->mtu;
347         }
348
349         dev->needed_headroom = t_hlen + hlen;
350         mtu -= (dev->hard_header_len + t_hlen);
351
352         if (mtu < IPV4_MIN_MTU)
353                 mtu = IPV4_MIN_MTU;
354
355         return mtu;
356 }
357
358 static struct ip_tunnel *ip_tunnel_create(struct net *net,
359                                           struct ip_tunnel_net *itn,
360                                           struct ip_tunnel_parm *parms)
361 {
362         struct ip_tunnel *nt;
363         struct net_device *dev;
364         int t_hlen;
365
366         BUG_ON(!itn->fb_tunnel_dev);
367         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
368         if (IS_ERR(dev))
369                 return ERR_CAST(dev);
370
371         dev->mtu = ip_tunnel_bind_dev(dev);
372
373         nt = netdev_priv(dev);
374         t_hlen = nt->hlen + sizeof(struct iphdr);
375         dev->min_mtu = ETH_MIN_MTU;
376         dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
377         ip_tunnel_add(itn, nt);
378         return nt;
379 }
380
381 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
382                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
383                   bool log_ecn_error)
384 {
385         struct pcpu_sw_netstats *tstats;
386         const struct iphdr *iph = ip_hdr(skb);
387         int err;
388
389 #ifdef CONFIG_NET_IPGRE_BROADCAST
390         if (ipv4_is_multicast(iph->daddr)) {
391                 tunnel->dev->stats.multicast++;
392                 skb->pkt_type = PACKET_BROADCAST;
393         }
394 #endif
395
396         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
397              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
398                 tunnel->dev->stats.rx_crc_errors++;
399                 tunnel->dev->stats.rx_errors++;
400                 goto drop;
401         }
402
403         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
404                 if (!(tpi->flags&TUNNEL_SEQ) ||
405                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
406                         tunnel->dev->stats.rx_fifo_errors++;
407                         tunnel->dev->stats.rx_errors++;
408                         goto drop;
409                 }
410                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
411         }
412
413         skb_reset_network_header(skb);
414
415         err = IP_ECN_decapsulate(iph, skb);
416         if (unlikely(err)) {
417                 if (log_ecn_error)
418                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419                                         &iph->saddr, iph->tos);
420                 if (err > 1) {
421                         ++tunnel->dev->stats.rx_frame_errors;
422                         ++tunnel->dev->stats.rx_errors;
423                         goto drop;
424                 }
425         }
426
427         tstats = this_cpu_ptr(tunnel->dev->tstats);
428         u64_stats_update_begin(&tstats->syncp);
429         tstats->rx_packets++;
430         tstats->rx_bytes += skb->len;
431         u64_stats_update_end(&tstats->syncp);
432
433         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
434
435         if (tunnel->dev->type == ARPHRD_ETHER) {
436                 skb->protocol = eth_type_trans(skb, tunnel->dev);
437                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
438         } else {
439                 skb->dev = tunnel->dev;
440         }
441
442         if (tun_dst)
443                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
444
445         gro_cells_receive(&tunnel->gro_cells, skb);
446         return 0;
447
448 drop:
449         if (tun_dst)
450                 dst_release((struct dst_entry *)tun_dst);
451         kfree_skb(skb);
452         return 0;
453 }
454 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
455
456 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
457                             unsigned int num)
458 {
459         if (num >= MAX_IPTUN_ENCAP_OPS)
460                 return -ERANGE;
461
462         return !cmpxchg((const struct ip_tunnel_encap_ops **)
463                         &iptun_encaps[num],
464                         NULL, ops) ? 0 : -1;
465 }
466 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
467
468 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
469                             unsigned int num)
470 {
471         int ret;
472
473         if (num >= MAX_IPTUN_ENCAP_OPS)
474                 return -ERANGE;
475
476         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
477                        &iptun_encaps[num],
478                        ops, NULL) == ops) ? 0 : -1;
479
480         synchronize_net();
481
482         return ret;
483 }
484 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
485
486 int ip_tunnel_encap_setup(struct ip_tunnel *t,
487                           struct ip_tunnel_encap *ipencap)
488 {
489         int hlen;
490
491         memset(&t->encap, 0, sizeof(t->encap));
492
493         hlen = ip_encap_hlen(ipencap);
494         if (hlen < 0)
495                 return hlen;
496
497         t->encap.type = ipencap->type;
498         t->encap.sport = ipencap->sport;
499         t->encap.dport = ipencap->dport;
500         t->encap.flags = ipencap->flags;
501
502         t->encap_hlen = hlen;
503         t->hlen = t->encap_hlen + t->tun_hlen;
504
505         return 0;
506 }
507 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
508
509 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
510                             struct rtable *rt, __be16 df,
511                             const struct iphdr *inner_iph)
512 {
513         struct ip_tunnel *tunnel = netdev_priv(dev);
514         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
515         int mtu;
516
517         if (df)
518                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
519                                         - sizeof(struct iphdr) - tunnel->hlen;
520         else
521                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
522
523         if (skb_dst(skb))
524                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
525
526         if (skb->protocol == htons(ETH_P_IP)) {
527                 if (!skb_is_gso(skb) &&
528                     (inner_iph->frag_off & htons(IP_DF)) &&
529                     mtu < pkt_size) {
530                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
531                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
532                         return -E2BIG;
533                 }
534         }
535 #if IS_ENABLED(CONFIG_IPV6)
536         else if (skb->protocol == htons(ETH_P_IPV6)) {
537                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
538
539                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
540                            mtu >= IPV6_MIN_MTU) {
541                         if ((tunnel->parms.iph.daddr &&
542                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
543                             rt6->rt6i_dst.plen == 128) {
544                                 rt6->rt6i_flags |= RTF_MODIFIED;
545                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
546                         }
547                 }
548
549                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
550                                         mtu < pkt_size) {
551                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
552                         return -E2BIG;
553                 }
554         }
555 #endif
556         return 0;
557 }
558
559 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
560 {
561         struct ip_tunnel *tunnel = netdev_priv(dev);
562         u32 headroom = sizeof(struct iphdr);
563         struct ip_tunnel_info *tun_info;
564         const struct ip_tunnel_key *key;
565         const struct iphdr *inner_iph;
566         struct rtable *rt;
567         struct flowi4 fl4;
568         __be16 df = 0;
569         u8 tos, ttl;
570
571         tun_info = skb_tunnel_info(skb);
572         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573                      ip_tunnel_info_af(tun_info) != AF_INET))
574                 goto tx_error;
575         key = &tun_info->key;
576         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578         tos = key->tos;
579         if (tos == 1) {
580                 if (skb->protocol == htons(ETH_P_IP))
581                         tos = inner_iph->tos;
582                 else if (skb->protocol == htons(ETH_P_IPV6))
583                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584         }
585         init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
586                          RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
587         if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
588                 goto tx_error;
589         rt = ip_route_output_key(tunnel->net, &fl4);
590         if (IS_ERR(rt)) {
591                 dev->stats.tx_carrier_errors++;
592                 goto tx_error;
593         }
594         if (rt->dst.dev == dev) {
595                 ip_rt_put(rt);
596                 dev->stats.collisions++;
597                 goto tx_error;
598         }
599         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
600         ttl = key->ttl;
601         if (ttl == 0) {
602                 if (skb->protocol == htons(ETH_P_IP))
603                         ttl = inner_iph->ttl;
604                 else if (skb->protocol == htons(ETH_P_IPV6))
605                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
606                 else
607                         ttl = ip4_dst_hoplimit(&rt->dst);
608         }
609         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
610                 df = htons(IP_DF);
611         else if (skb->protocol == htons(ETH_P_IP))
612                 df = inner_iph->frag_off & htons(IP_DF);
613         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
614         if (headroom > dev->needed_headroom)
615                 dev->needed_headroom = headroom;
616
617         if (skb_cow_head(skb, dev->needed_headroom)) {
618                 ip_rt_put(rt);
619                 goto tx_dropped;
620         }
621         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
622                       df, !net_eq(tunnel->net, dev_net(dev)));
623         return;
624 tx_error:
625         dev->stats.tx_errors++;
626         goto kfree;
627 tx_dropped:
628         dev->stats.tx_dropped++;
629 kfree:
630         kfree_skb(skb);
631 }
632 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
633
634 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
635                     const struct iphdr *tnl_params, u8 protocol)
636 {
637         struct ip_tunnel *tunnel = netdev_priv(dev);
638         const struct iphdr *inner_iph;
639         struct flowi4 fl4;
640         u8     tos, ttl;
641         __be16 df;
642         struct rtable *rt;              /* Route to the other host */
643         unsigned int max_headroom;      /* The extra header space needed */
644         __be32 dst;
645         bool connected;
646
647         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
648         connected = (tunnel->parms.iph.daddr != 0);
649
650         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
651
652         dst = tnl_params->daddr;
653         if (dst == 0) {
654                 /* NBMA tunnel */
655
656                 if (!skb_dst(skb)) {
657                         dev->stats.tx_fifo_errors++;
658                         goto tx_error;
659                 }
660
661                 if (skb->protocol == htons(ETH_P_IP)) {
662                         rt = skb_rtable(skb);
663                         dst = rt_nexthop(rt, inner_iph->daddr);
664                 }
665 #if IS_ENABLED(CONFIG_IPV6)
666                 else if (skb->protocol == htons(ETH_P_IPV6)) {
667                         const struct in6_addr *addr6;
668                         struct neighbour *neigh;
669                         bool do_tx_error_icmp;
670                         int addr_type;
671
672                         neigh = dst_neigh_lookup(skb_dst(skb),
673                                                  &ipv6_hdr(skb)->daddr);
674                         if (!neigh)
675                                 goto tx_error;
676
677                         addr6 = (const struct in6_addr *)&neigh->primary_key;
678                         addr_type = ipv6_addr_type(addr6);
679
680                         if (addr_type == IPV6_ADDR_ANY) {
681                                 addr6 = &ipv6_hdr(skb)->daddr;
682                                 addr_type = ipv6_addr_type(addr6);
683                         }
684
685                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
686                                 do_tx_error_icmp = true;
687                         else {
688                                 do_tx_error_icmp = false;
689                                 dst = addr6->s6_addr32[3];
690                         }
691                         neigh_release(neigh);
692                         if (do_tx_error_icmp)
693                                 goto tx_error_icmp;
694                 }
695 #endif
696                 else
697                         goto tx_error;
698
699                 connected = false;
700         }
701
702         tos = tnl_params->tos;
703         if (tos & 0x1) {
704                 tos &= ~0x1;
705                 if (skb->protocol == htons(ETH_P_IP)) {
706                         tos = inner_iph->tos;
707                         connected = false;
708                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
709                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
710                         connected = false;
711                 }
712         }
713
714         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
715                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
716                          tunnel->fwmark);
717
718         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
719                 goto tx_error;
720
721         rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
722                          NULL;
723
724         if (!rt) {
725                 rt = ip_route_output_key(tunnel->net, &fl4);
726
727                 if (IS_ERR(rt)) {
728                         dev->stats.tx_carrier_errors++;
729                         goto tx_error;
730                 }
731                 if (connected)
732                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
733                                           fl4.saddr);
734         }
735
736         if (rt->dst.dev == dev) {
737                 ip_rt_put(rt);
738                 dev->stats.collisions++;
739                 goto tx_error;
740         }
741
742         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
743                 ip_rt_put(rt);
744                 goto tx_error;
745         }
746
747         if (tunnel->err_count > 0) {
748                 if (time_before(jiffies,
749                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
750                         tunnel->err_count--;
751
752                         dst_link_failure(skb);
753                 } else
754                         tunnel->err_count = 0;
755         }
756
757         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
758         ttl = tnl_params->ttl;
759         if (ttl == 0) {
760                 if (skb->protocol == htons(ETH_P_IP))
761                         ttl = inner_iph->ttl;
762 #if IS_ENABLED(CONFIG_IPV6)
763                 else if (skb->protocol == htons(ETH_P_IPV6))
764                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
765 #endif
766                 else
767                         ttl = ip4_dst_hoplimit(&rt->dst);
768         }
769
770         df = tnl_params->frag_off;
771         if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
772                 df |= (inner_iph->frag_off&htons(IP_DF));
773
774         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
775                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
776         if (max_headroom > dev->needed_headroom)
777                 dev->needed_headroom = max_headroom;
778
779         if (skb_cow_head(skb, dev->needed_headroom)) {
780                 ip_rt_put(rt);
781                 dev->stats.tx_dropped++;
782                 kfree_skb(skb);
783                 return;
784         }
785
786         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
787                       df, !net_eq(tunnel->net, dev_net(dev)));
788         return;
789
790 #if IS_ENABLED(CONFIG_IPV6)
791 tx_error_icmp:
792         dst_link_failure(skb);
793 #endif
794 tx_error:
795         dev->stats.tx_errors++;
796         kfree_skb(skb);
797 }
798 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
799
800 static void ip_tunnel_update(struct ip_tunnel_net *itn,
801                              struct ip_tunnel *t,
802                              struct net_device *dev,
803                              struct ip_tunnel_parm *p,
804                              bool set_mtu,
805                              __u32 fwmark)
806 {
807         ip_tunnel_del(itn, t);
808         t->parms.iph.saddr = p->iph.saddr;
809         t->parms.iph.daddr = p->iph.daddr;
810         t->parms.i_key = p->i_key;
811         t->parms.o_key = p->o_key;
812         if (dev->type != ARPHRD_ETHER) {
813                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
814                 memcpy(dev->broadcast, &p->iph.daddr, 4);
815         }
816         ip_tunnel_add(itn, t);
817
818         t->parms.iph.ttl = p->iph.ttl;
819         t->parms.iph.tos = p->iph.tos;
820         t->parms.iph.frag_off = p->iph.frag_off;
821
822         if (t->parms.link != p->link || t->fwmark != fwmark) {
823                 int mtu;
824
825                 t->parms.link = p->link;
826                 t->fwmark = fwmark;
827                 mtu = ip_tunnel_bind_dev(dev);
828                 if (set_mtu)
829                         dev->mtu = mtu;
830         }
831         dst_cache_reset(&t->dst_cache);
832         netdev_state_change(dev);
833 }
834
835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
836 {
837         int err = 0;
838         struct ip_tunnel *t = netdev_priv(dev);
839         struct net *net = t->net;
840         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
841
842         BUG_ON(!itn->fb_tunnel_dev);
843         switch (cmd) {
844         case SIOCGETTUNNEL:
845                 if (dev == itn->fb_tunnel_dev) {
846                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
847                         if (!t)
848                                 t = netdev_priv(dev);
849                 }
850                 memcpy(p, &t->parms, sizeof(*p));
851                 break;
852
853         case SIOCADDTUNNEL:
854         case SIOCCHGTUNNEL:
855                 err = -EPERM;
856                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
857                         goto done;
858                 if (p->iph.ttl)
859                         p->iph.frag_off |= htons(IP_DF);
860                 if (!(p->i_flags & VTI_ISVTI)) {
861                         if (!(p->i_flags & TUNNEL_KEY))
862                                 p->i_key = 0;
863                         if (!(p->o_flags & TUNNEL_KEY))
864                                 p->o_key = 0;
865                 }
866
867                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
868
869                 if (cmd == SIOCADDTUNNEL) {
870                         if (!t) {
871                                 t = ip_tunnel_create(net, itn, p);
872                                 err = PTR_ERR_OR_ZERO(t);
873                                 break;
874                         }
875
876                         err = -EEXIST;
877                         break;
878                 }
879                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
880                         if (t) {
881                                 if (t->dev != dev) {
882                                         err = -EEXIST;
883                                         break;
884                                 }
885                         } else {
886                                 unsigned int nflags = 0;
887
888                                 if (ipv4_is_multicast(p->iph.daddr))
889                                         nflags = IFF_BROADCAST;
890                                 else if (p->iph.daddr)
891                                         nflags = IFF_POINTOPOINT;
892
893                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
894                                         err = -EINVAL;
895                                         break;
896                                 }
897
898                                 t = netdev_priv(dev);
899                         }
900                 }
901
902                 if (t) {
903                         err = 0;
904                         ip_tunnel_update(itn, t, dev, p, true, 0);
905                 } else {
906                         err = -ENOENT;
907                 }
908                 break;
909
910         case SIOCDELTUNNEL:
911                 err = -EPERM;
912                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913                         goto done;
914
915                 if (dev == itn->fb_tunnel_dev) {
916                         err = -ENOENT;
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (!t)
919                                 goto done;
920                         err = -EPERM;
921                         if (t == netdev_priv(itn->fb_tunnel_dev))
922                                 goto done;
923                         dev = t->dev;
924                 }
925                 unregister_netdevice(dev);
926                 err = 0;
927                 break;
928
929         default:
930                 err = -EINVAL;
931         }
932
933 done:
934         return err;
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
937
938 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
939 {
940         struct ip_tunnel *tunnel = netdev_priv(dev);
941         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
942         int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
943
944         if (new_mtu < ETH_MIN_MTU)
945                 return -EINVAL;
946
947         if (new_mtu > max_mtu) {
948                 if (strict)
949                         return -EINVAL;
950
951                 new_mtu = max_mtu;
952         }
953
954         dev->mtu = new_mtu;
955         return 0;
956 }
957 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
958
959 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
960 {
961         return __ip_tunnel_change_mtu(dev, new_mtu, true);
962 }
963 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
964
965 static void ip_tunnel_dev_free(struct net_device *dev)
966 {
967         struct ip_tunnel *tunnel = netdev_priv(dev);
968
969         gro_cells_destroy(&tunnel->gro_cells);
970         dst_cache_destroy(&tunnel->dst_cache);
971         free_percpu(dev->tstats);
972 }
973
974 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
975 {
976         struct ip_tunnel *tunnel = netdev_priv(dev);
977         struct ip_tunnel_net *itn;
978
979         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
980
981         if (itn->fb_tunnel_dev != dev) {
982                 ip_tunnel_del(itn, netdev_priv(dev));
983                 unregister_netdevice_queue(dev, head);
984         }
985 }
986 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
987
988 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
989 {
990         struct ip_tunnel *tunnel = netdev_priv(dev);
991
992         return tunnel->net;
993 }
994 EXPORT_SYMBOL(ip_tunnel_get_link_net);
995
996 int ip_tunnel_get_iflink(const struct net_device *dev)
997 {
998         struct ip_tunnel *tunnel = netdev_priv(dev);
999
1000         return tunnel->parms.link;
1001 }
1002 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1003
1004 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1005                                   struct rtnl_link_ops *ops, char *devname)
1006 {
1007         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1008         struct ip_tunnel_parm parms;
1009         unsigned int i;
1010
1011         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1012                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1013
1014         if (!ops) {
1015                 itn->fb_tunnel_dev = NULL;
1016                 return 0;
1017         }
1018
1019         memset(&parms, 0, sizeof(parms));
1020         if (devname)
1021                 strlcpy(parms.name, devname, IFNAMSIZ);
1022
1023         rtnl_lock();
1024         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1025         /* FB netdevice is special: we have one, and only one per netns.
1026          * Allowing to move it to another netns is clearly unsafe.
1027          */
1028         if (!IS_ERR(itn->fb_tunnel_dev)) {
1029                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1030                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1031                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1032         }
1033         rtnl_unlock();
1034
1035         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1036 }
1037 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1038
1039 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1040                               struct rtnl_link_ops *ops)
1041 {
1042         struct net *net = dev_net(itn->fb_tunnel_dev);
1043         struct net_device *dev, *aux;
1044         int h;
1045
1046         for_each_netdev_safe(net, dev, aux)
1047                 if (dev->rtnl_link_ops == ops)
1048                         unregister_netdevice_queue(dev, head);
1049
1050         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1051                 struct ip_tunnel *t;
1052                 struct hlist_node *n;
1053                 struct hlist_head *thead = &itn->tunnels[h];
1054
1055                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1056                         /* If dev is in the same netns, it has already
1057                          * been added to the list by the previous loop.
1058                          */
1059                         if (!net_eq(dev_net(t->dev), net))
1060                                 unregister_netdevice_queue(t->dev, head);
1061         }
1062 }
1063
1064 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1065                            struct rtnl_link_ops *ops)
1066 {
1067         struct ip_tunnel_net *itn;
1068         struct net *net;
1069         LIST_HEAD(list);
1070
1071         rtnl_lock();
1072         list_for_each_entry(net, net_list, exit_list) {
1073                 itn = net_generic(net, id);
1074                 ip_tunnel_destroy(itn, &list, ops);
1075         }
1076         unregister_netdevice_many(&list);
1077         rtnl_unlock();
1078 }
1079 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1080
1081 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1082                       struct ip_tunnel_parm *p, __u32 fwmark)
1083 {
1084         struct ip_tunnel *nt;
1085         struct net *net = dev_net(dev);
1086         struct ip_tunnel_net *itn;
1087         int mtu;
1088         int err;
1089
1090         nt = netdev_priv(dev);
1091         itn = net_generic(net, nt->ip_tnl_net_id);
1092
1093         if (nt->collect_md) {
1094                 if (rtnl_dereference(itn->collect_md_tun))
1095                         return -EEXIST;
1096         } else {
1097                 if (ip_tunnel_find(itn, p, dev->type))
1098                         return -EEXIST;
1099         }
1100
1101         nt->net = net;
1102         nt->parms = *p;
1103         nt->fwmark = fwmark;
1104         err = register_netdevice(dev);
1105         if (err)
1106                 goto out;
1107
1108         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1109                 eth_hw_addr_random(dev);
1110
1111         mtu = ip_tunnel_bind_dev(dev);
1112         if (!tb[IFLA_MTU])
1113                 dev->mtu = mtu;
1114
1115         ip_tunnel_add(itn, nt);
1116 out:
1117         return err;
1118 }
1119 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1120
1121 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1122                          struct ip_tunnel_parm *p, __u32 fwmark)
1123 {
1124         struct ip_tunnel *t;
1125         struct ip_tunnel *tunnel = netdev_priv(dev);
1126         struct net *net = tunnel->net;
1127         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1128
1129         if (dev == itn->fb_tunnel_dev)
1130                 return -EINVAL;
1131
1132         t = ip_tunnel_find(itn, p, dev->type);
1133
1134         if (t) {
1135                 if (t->dev != dev)
1136                         return -EEXIST;
1137         } else {
1138                 t = tunnel;
1139
1140                 if (dev->type != ARPHRD_ETHER) {
1141                         unsigned int nflags = 0;
1142
1143                         if (ipv4_is_multicast(p->iph.daddr))
1144                                 nflags = IFF_BROADCAST;
1145                         else if (p->iph.daddr)
1146                                 nflags = IFF_POINTOPOINT;
1147
1148                         if ((dev->flags ^ nflags) &
1149                             (IFF_POINTOPOINT | IFF_BROADCAST))
1150                                 return -EINVAL;
1151                 }
1152         }
1153
1154         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1155         return 0;
1156 }
1157 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1158
1159 int ip_tunnel_init(struct net_device *dev)
1160 {
1161         struct ip_tunnel *tunnel = netdev_priv(dev);
1162         struct iphdr *iph = &tunnel->parms.iph;
1163         int err;
1164
1165         dev->needs_free_netdev = true;
1166         dev->priv_destructor = ip_tunnel_dev_free;
1167         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1168         if (!dev->tstats)
1169                 return -ENOMEM;
1170
1171         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1172         if (err) {
1173                 free_percpu(dev->tstats);
1174                 return err;
1175         }
1176
1177         err = gro_cells_init(&tunnel->gro_cells, dev);
1178         if (err) {
1179                 dst_cache_destroy(&tunnel->dst_cache);
1180                 free_percpu(dev->tstats);
1181                 return err;
1182         }
1183
1184         tunnel->dev = dev;
1185         tunnel->net = dev_net(dev);
1186         strcpy(tunnel->parms.name, dev->name);
1187         iph->version            = 4;
1188         iph->ihl                = 5;
1189
1190         if (tunnel->collect_md) {
1191                 dev->features |= NETIF_F_NETNS_LOCAL;
1192                 netif_keep_dst(dev);
1193         }
1194         return 0;
1195 }
1196 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1197
1198 void ip_tunnel_uninit(struct net_device *dev)
1199 {
1200         struct ip_tunnel *tunnel = netdev_priv(dev);
1201         struct net *net = tunnel->net;
1202         struct ip_tunnel_net *itn;
1203
1204         itn = net_generic(net, tunnel->ip_tnl_net_id);
1205         /* fb_tunnel_dev will be unregisted in net-exit call. */
1206         if (itn->fb_tunnel_dev != dev)
1207                 ip_tunnel_del(itn, netdev_priv(dev));
1208
1209         dst_cache_reset(&tunnel->dst_cache);
1210 }
1211 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1212
1213 /* Do least required initialization, rest of init is done in tunnel_init call */
1214 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1215 {
1216         struct ip_tunnel *tunnel = netdev_priv(dev);
1217         tunnel->ip_tnl_net_id = net_id;
1218 }
1219 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1220
1221 MODULE_LICENSE("GPL");