ipvlan: protect against concurrent link removal
[sfrench/cifs-2.6.git] / drivers / net / ipvlan / ipvlan_core.c
1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License as
5  * published by the Free Software Foundation; either version 2 of
6  * the License, or (at your option) any later version.
7  *
8  */
9
10 #include "ipvlan.h"
11
12 static u32 ipvlan_jhash_secret __read_mostly;
13
14 void ipvlan_init_secret(void)
15 {
16         net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
17 }
18
19 static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
20                             unsigned int len, bool success, bool mcast)
21 {
22         if (!ipvlan)
23                 return;
24
25         if (likely(success)) {
26                 struct ipvl_pcpu_stats *pcptr;
27
28                 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
29                 u64_stats_update_begin(&pcptr->syncp);
30                 pcptr->rx_pkts++;
31                 pcptr->rx_bytes += len;
32                 if (mcast)
33                         pcptr->rx_mcast++;
34                 u64_stats_update_end(&pcptr->syncp);
35         } else {
36                 this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
37         }
38 }
39
40 static u8 ipvlan_get_v6_hash(const void *iaddr)
41 {
42         const struct in6_addr *ip6_addr = iaddr;
43
44         return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
45                IPVLAN_HASH_MASK;
46 }
47
48 static u8 ipvlan_get_v4_hash(const void *iaddr)
49 {
50         const struct in_addr *ip4_addr = iaddr;
51
52         return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
53                IPVLAN_HASH_MASK;
54 }
55
56 struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
57                                         const void *iaddr, bool is_v6)
58 {
59         struct ipvl_addr *addr;
60         u8 hash;
61
62         hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
63                ipvlan_get_v4_hash(iaddr);
64         hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
65                 if (is_v6 && addr->atype == IPVL_IPV6 &&
66                     ipv6_addr_equal(&addr->ip6addr, iaddr))
67                         return addr;
68                 else if (!is_v6 && addr->atype == IPVL_IPV4 &&
69                          addr->ip4addr.s_addr ==
70                                 ((struct in_addr *)iaddr)->s_addr)
71                         return addr;
72         }
73         return NULL;
74 }
75
76 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
77 {
78         struct ipvl_port *port = ipvlan->port;
79         u8 hash;
80
81         hash = (addr->atype == IPVL_IPV6) ?
82                ipvlan_get_v6_hash(&addr->ip6addr) :
83                ipvlan_get_v4_hash(&addr->ip4addr);
84         if (hlist_unhashed(&addr->hlnode))
85                 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
86 }
87
88 void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync)
89 {
90         hlist_del_init_rcu(&addr->hlnode);
91         if (sync)
92                 synchronize_rcu();
93 }
94
95 bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
96 {
97         struct ipvl_port *port = ipvlan->port;
98         struct ipvl_addr *addr;
99
100         list_for_each_entry(addr, &ipvlan->addrs, anode) {
101                 if ((is_v6 && addr->atype == IPVL_IPV6 &&
102                     ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
103                     (!is_v6 && addr->atype == IPVL_IPV4 &&
104                     addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
105                         return true;
106         }
107
108         if (ipvlan_ht_addr_lookup(port, iaddr, is_v6))
109                 return true;
110
111         return false;
112 }
113
114 static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
115 {
116         void *lyr3h = NULL;
117
118         switch (skb->protocol) {
119         case htons(ETH_P_ARP): {
120                 struct arphdr *arph;
121
122                 if (unlikely(!pskb_may_pull(skb, sizeof(*arph))))
123                         return NULL;
124
125                 arph = arp_hdr(skb);
126                 *type = IPVL_ARP;
127                 lyr3h = arph;
128                 break;
129         }
130         case htons(ETH_P_IP): {
131                 u32 pktlen;
132                 struct iphdr *ip4h;
133
134                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
135                         return NULL;
136
137                 ip4h = ip_hdr(skb);
138                 pktlen = ntohs(ip4h->tot_len);
139                 if (ip4h->ihl < 5 || ip4h->version != 4)
140                         return NULL;
141                 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
142                         return NULL;
143
144                 *type = IPVL_IPV4;
145                 lyr3h = ip4h;
146                 break;
147         }
148         case htons(ETH_P_IPV6): {
149                 struct ipv6hdr *ip6h;
150
151                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
152                         return NULL;
153
154                 ip6h = ipv6_hdr(skb);
155                 if (ip6h->version != 6)
156                         return NULL;
157
158                 *type = IPVL_IPV6;
159                 lyr3h = ip6h;
160                 /* Only Neighbour Solicitation pkts need different treatment */
161                 if (ipv6_addr_any(&ip6h->saddr) &&
162                     ip6h->nexthdr == NEXTHDR_ICMP) {
163                         *type = IPVL_ICMPV6;
164                         lyr3h = ip6h + 1;
165                 }
166                 break;
167         }
168         default:
169                 return NULL;
170         }
171
172         return lyr3h;
173 }
174
175 unsigned int ipvlan_mac_hash(const unsigned char *addr)
176 {
177         u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
178                                ipvlan_jhash_secret);
179
180         return hash & IPVLAN_MAC_FILTER_MASK;
181 }
182
183 static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
184                                    const struct ipvl_dev *in_dev, bool local)
185 {
186         struct ethhdr *eth = eth_hdr(skb);
187         struct ipvl_dev *ipvlan;
188         struct sk_buff *nskb;
189         unsigned int len;
190         unsigned int mac_hash;
191         int ret;
192
193         if (skb->protocol == htons(ETH_P_PAUSE))
194                 return;
195
196         rcu_read_lock();
197         list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
198                 if (local && (ipvlan == in_dev))
199                         continue;
200
201                 mac_hash = ipvlan_mac_hash(eth->h_dest);
202                 if (!test_bit(mac_hash, ipvlan->mac_filters))
203                         continue;
204
205                 ret = NET_RX_DROP;
206                 len = skb->len + ETH_HLEN;
207                 nskb = skb_clone(skb, GFP_ATOMIC);
208                 if (!nskb)
209                         goto mcast_acct;
210
211                 if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast))
212                         nskb->pkt_type = PACKET_BROADCAST;
213                 else
214                         nskb->pkt_type = PACKET_MULTICAST;
215
216                 nskb->dev = ipvlan->dev;
217                 if (local)
218                         ret = dev_forward_skb(ipvlan->dev, nskb);
219                 else
220                         ret = netif_rx(nskb);
221 mcast_acct:
222                 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
223         }
224         rcu_read_unlock();
225
226         /* Locally generated? ...Forward a copy to the main-device as
227          * well. On the RX side we'll ignore it (wont give it to any
228          * of the virtual devices.
229          */
230         if (local) {
231                 nskb = skb_clone(skb, GFP_ATOMIC);
232                 if (nskb) {
233                         if (ether_addr_equal(eth->h_dest, port->dev->broadcast))
234                                 nskb->pkt_type = PACKET_BROADCAST;
235                         else
236                                 nskb->pkt_type = PACKET_MULTICAST;
237
238                         dev_forward_skb(port->dev, nskb);
239                 }
240         }
241 }
242
243 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
244                             bool local)
245 {
246         struct ipvl_dev *ipvlan = addr->master;
247         struct net_device *dev = ipvlan->dev;
248         unsigned int len;
249         rx_handler_result_t ret = RX_HANDLER_CONSUMED;
250         bool success = false;
251
252         len = skb->len + ETH_HLEN;
253         if (unlikely(!(dev->flags & IFF_UP))) {
254                 kfree_skb(skb);
255                 goto out;
256         }
257
258         skb = skb_share_check(skb, GFP_ATOMIC);
259         if (!skb)
260                 goto out;
261
262         skb->dev = dev;
263         skb->pkt_type = PACKET_HOST;
264
265         if (local) {
266                 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
267                         success = true;
268         } else {
269                 ret = RX_HANDLER_ANOTHER;
270                 success = true;
271         }
272
273 out:
274         ipvlan_count_rx(ipvlan, len, success, false);
275         return ret;
276 }
277
278 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
279                                             void *lyr3h, int addr_type,
280                                             bool use_dest)
281 {
282         struct ipvl_addr *addr = NULL;
283
284         if (addr_type == IPVL_IPV6) {
285                 struct ipv6hdr *ip6h;
286                 struct in6_addr *i6addr;
287
288                 ip6h = (struct ipv6hdr *)lyr3h;
289                 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
290                 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
291         } else if (addr_type == IPVL_ICMPV6) {
292                 struct nd_msg *ndmh;
293                 struct in6_addr *i6addr;
294
295                 /* Make sure that the NeighborSolicitation ICMPv6 packets
296                  * are handled to avoid DAD issue.
297                  */
298                 ndmh = (struct nd_msg *)lyr3h;
299                 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
300                         i6addr = &ndmh->target;
301                         addr = ipvlan_ht_addr_lookup(port, i6addr, true);
302                 }
303         } else if (addr_type == IPVL_IPV4) {
304                 struct iphdr *ip4h;
305                 __be32 *i4addr;
306
307                 ip4h = (struct iphdr *)lyr3h;
308                 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
309                 addr = ipvlan_ht_addr_lookup(port, i4addr, false);
310         } else if (addr_type == IPVL_ARP) {
311                 struct arphdr *arph;
312                 unsigned char *arp_ptr;
313                 __be32 dip;
314
315                 arph = (struct arphdr *)lyr3h;
316                 arp_ptr = (unsigned char *)(arph + 1);
317                 if (use_dest)
318                         arp_ptr += (2 * port->dev->addr_len) + 4;
319                 else
320                         arp_ptr += port->dev->addr_len;
321
322                 memcpy(&dip, arp_ptr, 4);
323                 addr = ipvlan_ht_addr_lookup(port, &dip, false);
324         }
325
326         return addr;
327 }
328
329 static int ipvlan_process_v4_outbound(struct sk_buff *skb)
330 {
331         const struct iphdr *ip4h = ip_hdr(skb);
332         struct net_device *dev = skb->dev;
333         struct rtable *rt;
334         int err, ret = NET_XMIT_DROP;
335         struct flowi4 fl4 = {
336                 .flowi4_oif = dev->iflink,
337                 .flowi4_tos = RT_TOS(ip4h->tos),
338                 .flowi4_flags = FLOWI_FLAG_ANYSRC,
339                 .daddr = ip4h->daddr,
340                 .saddr = ip4h->saddr,
341         };
342
343         rt = ip_route_output_flow(dev_net(dev), &fl4, NULL);
344         if (IS_ERR(rt))
345                 goto err;
346
347         if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
348                 ip_rt_put(rt);
349                 goto err;
350         }
351         skb_dst_drop(skb);
352         skb_dst_set(skb, &rt->dst);
353         err = ip_local_out(skb);
354         if (unlikely(net_xmit_eval(err)))
355                 dev->stats.tx_errors++;
356         else
357                 ret = NET_XMIT_SUCCESS;
358         goto out;
359 err:
360         dev->stats.tx_errors++;
361         kfree_skb(skb);
362 out:
363         return ret;
364 }
365
366 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
367 {
368         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
369         struct net_device *dev = skb->dev;
370         struct dst_entry *dst;
371         int err, ret = NET_XMIT_DROP;
372         struct flowi6 fl6 = {
373                 .flowi6_iif = skb->dev->ifindex,
374                 .daddr = ip6h->daddr,
375                 .saddr = ip6h->saddr,
376                 .flowi6_flags = FLOWI_FLAG_ANYSRC,
377                 .flowlabel = ip6_flowinfo(ip6h),
378                 .flowi6_mark = skb->mark,
379                 .flowi6_proto = ip6h->nexthdr,
380         };
381
382         dst = ip6_route_output(dev_net(dev), NULL, &fl6);
383         if (dst->error) {
384                 ret = dst->error;
385                 dst_release(dst);
386                 goto err;
387         }
388         skb_dst_drop(skb);
389         skb_dst_set(skb, dst);
390         err = ip6_local_out(skb);
391         if (unlikely(net_xmit_eval(err)))
392                 dev->stats.tx_errors++;
393         else
394                 ret = NET_XMIT_SUCCESS;
395         goto out;
396 err:
397         dev->stats.tx_errors++;
398         kfree_skb(skb);
399 out:
400         return ret;
401 }
402
403 static int ipvlan_process_outbound(struct sk_buff *skb,
404                                    const struct ipvl_dev *ipvlan)
405 {
406         struct ethhdr *ethh = eth_hdr(skb);
407         int ret = NET_XMIT_DROP;
408
409         /* In this mode we dont care about multicast and broadcast traffic */
410         if (is_multicast_ether_addr(ethh->h_dest)) {
411                 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
412                                     ntohs(skb->protocol));
413                 kfree_skb(skb);
414                 goto out;
415         }
416
417         /* The ipvlan is a pseudo-L2 device, so the packets that we receive
418          * will have L2; which need to discarded and processed further
419          * in the net-ns of the main-device.
420          */
421         if (skb_mac_header_was_set(skb)) {
422                 skb_pull(skb, sizeof(*ethh));
423                 skb->mac_header = (typeof(skb->mac_header))~0U;
424                 skb_reset_network_header(skb);
425         }
426
427         if (skb->protocol == htons(ETH_P_IPV6))
428                 ret = ipvlan_process_v6_outbound(skb);
429         else if (skb->protocol == htons(ETH_P_IP))
430                 ret = ipvlan_process_v4_outbound(skb);
431         else {
432                 pr_warn_ratelimited("Dropped outbound packet type=%x\n",
433                                     ntohs(skb->protocol));
434                 kfree_skb(skb);
435         }
436 out:
437         return ret;
438 }
439
440 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
441 {
442         const struct ipvl_dev *ipvlan = netdev_priv(dev);
443         void *lyr3h;
444         struct ipvl_addr *addr;
445         int addr_type;
446
447         lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
448         if (!lyr3h)
449                 goto out;
450
451         addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
452         if (addr)
453                 return ipvlan_rcv_frame(addr, skb, true);
454
455 out:
456         skb->dev = ipvlan->phy_dev;
457         return ipvlan_process_outbound(skb, ipvlan);
458 }
459
460 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
461 {
462         const struct ipvl_dev *ipvlan = netdev_priv(dev);
463         struct ethhdr *eth = eth_hdr(skb);
464         struct ipvl_addr *addr;
465         void *lyr3h;
466         int addr_type;
467
468         if (ether_addr_equal(eth->h_dest, eth->h_source)) {
469                 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
470                 if (lyr3h) {
471                         addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
472                         if (addr)
473                                 return ipvlan_rcv_frame(addr, skb, true);
474                 }
475                 skb = skb_share_check(skb, GFP_ATOMIC);
476                 if (!skb)
477                         return NET_XMIT_DROP;
478
479                 /* Packet definitely does not belong to any of the
480                  * virtual devices, but the dest is local. So forward
481                  * the skb for the main-dev. At the RX side we just return
482                  * RX_PASS for it to be processed further on the stack.
483                  */
484                 return dev_forward_skb(ipvlan->phy_dev, skb);
485
486         } else if (is_multicast_ether_addr(eth->h_dest)) {
487                 u8 ip_summed = skb->ip_summed;
488
489                 skb->ip_summed = CHECKSUM_UNNECESSARY;
490                 ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true);
491                 skb->ip_summed = ip_summed;
492         }
493
494         skb->dev = ipvlan->phy_dev;
495         return dev_queue_xmit(skb);
496 }
497
498 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
499 {
500         struct ipvl_dev *ipvlan = netdev_priv(dev);
501         struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev);
502
503         if (!port)
504                 goto out;
505
506         if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
507                 goto out;
508
509         switch(port->mode) {
510         case IPVLAN_MODE_L2:
511                 return ipvlan_xmit_mode_l2(skb, dev);
512         case IPVLAN_MODE_L3:
513                 return ipvlan_xmit_mode_l3(skb, dev);
514         }
515
516         /* Should not reach here */
517         WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
518                           port->mode);
519 out:
520         kfree_skb(skb);
521         return NET_XMIT_DROP;
522 }
523
524 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
525 {
526         struct ethhdr *eth = eth_hdr(skb);
527         struct ipvl_addr *addr;
528         void *lyr3h;
529         int addr_type;
530
531         if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
532                 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
533                 if (!lyr3h)
534                         return true;
535
536                 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
537                 if (addr)
538                         return false;
539         }
540
541         return true;
542 }
543
544 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
545                                                  struct ipvl_port *port)
546 {
547         void *lyr3h;
548         int addr_type;
549         struct ipvl_addr *addr;
550         struct sk_buff *skb = *pskb;
551         rx_handler_result_t ret = RX_HANDLER_PASS;
552
553         lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
554         if (!lyr3h)
555                 goto out;
556
557         addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
558         if (addr)
559                 ret = ipvlan_rcv_frame(addr, skb, false);
560
561 out:
562         return ret;
563 }
564
565 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
566                                                  struct ipvl_port *port)
567 {
568         struct sk_buff *skb = *pskb;
569         struct ethhdr *eth = eth_hdr(skb);
570         rx_handler_result_t ret = RX_HANDLER_PASS;
571         void *lyr3h;
572         int addr_type;
573
574         if (is_multicast_ether_addr(eth->h_dest)) {
575                 if (ipvlan_external_frame(skb, port))
576                         ipvlan_multicast_frame(port, skb, NULL, false);
577         } else {
578                 struct ipvl_addr *addr;
579
580                 lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
581                 if (!lyr3h)
582                         return ret;
583
584                 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
585                 if (addr)
586                         ret = ipvlan_rcv_frame(addr, skb, false);
587         }
588
589         return ret;
590 }
591
592 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
593 {
594         struct sk_buff *skb = *pskb;
595         struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
596
597         if (!port)
598                 return RX_HANDLER_PASS;
599
600         switch (port->mode) {
601         case IPVLAN_MODE_L2:
602                 return ipvlan_handle_mode_l2(pskb, port);
603         case IPVLAN_MODE_L3:
604                 return ipvlan_handle_mode_l3(pskb, port);
605         }
606
607         /* Should not reach here */
608         WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
609                           port->mode);
610         kfree_skb(skb);
611         return NET_RX_DROP;
612 }