Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[sfrench/cifs-2.6.git] / drivers / net / veth.c
1 /*
2  *  drivers/net/veth.c
3  *
4  *  Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5  *
6  * Author: Pavel Emelianov <xemul@openvz.org>
7  * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8  *
9  */
10
11 #include <linux/netdevice.h>
12 #include <linux/slab.h>
13 #include <linux/ethtool.h>
14 #include <linux/etherdevice.h>
15 #include <linux/u64_stats_sync.h>
16
17 #include <net/rtnetlink.h>
18 #include <net/dst.h>
19 #include <net/xfrm.h>
20 #include <linux/veth.h>
21 #include <linux/module.h>
22
23 #define DRV_NAME        "veth"
24 #define DRV_VERSION     "1.0"
25
26 struct pcpu_vstats {
27         u64                     packets;
28         u64                     bytes;
29         struct u64_stats_sync   syncp;
30 };
31
32 struct veth_priv {
33         struct net_device __rcu *peer;
34         atomic64_t              dropped;
35         unsigned                requested_headroom;
36 };
37
38 /*
39  * ethtool interface
40  */
41
42 static struct {
43         const char string[ETH_GSTRING_LEN];
44 } ethtool_stats_keys[] = {
45         { "peer_ifindex" },
46 };
47
48 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
49 {
50         cmd->supported          = 0;
51         cmd->advertising        = 0;
52         ethtool_cmd_speed_set(cmd, SPEED_10000);
53         cmd->duplex             = DUPLEX_FULL;
54         cmd->port               = PORT_TP;
55         cmd->phy_address        = 0;
56         cmd->transceiver        = XCVR_INTERNAL;
57         cmd->autoneg            = AUTONEG_DISABLE;
58         cmd->maxtxpkt           = 0;
59         cmd->maxrxpkt           = 0;
60         return 0;
61 }
62
63 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
64 {
65         strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
66         strlcpy(info->version, DRV_VERSION, sizeof(info->version));
67 }
68
69 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
70 {
71         switch(stringset) {
72         case ETH_SS_STATS:
73                 memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
74                 break;
75         }
76 }
77
78 static int veth_get_sset_count(struct net_device *dev, int sset)
79 {
80         switch (sset) {
81         case ETH_SS_STATS:
82                 return ARRAY_SIZE(ethtool_stats_keys);
83         default:
84                 return -EOPNOTSUPP;
85         }
86 }
87
88 static void veth_get_ethtool_stats(struct net_device *dev,
89                 struct ethtool_stats *stats, u64 *data)
90 {
91         struct veth_priv *priv = netdev_priv(dev);
92         struct net_device *peer = rtnl_dereference(priv->peer);
93
94         data[0] = peer ? peer->ifindex : 0;
95 }
96
97 static const struct ethtool_ops veth_ethtool_ops = {
98         .get_settings           = veth_get_settings,
99         .get_drvinfo            = veth_get_drvinfo,
100         .get_link               = ethtool_op_get_link,
101         .get_strings            = veth_get_strings,
102         .get_sset_count         = veth_get_sset_count,
103         .get_ethtool_stats      = veth_get_ethtool_stats,
104 };
105
106 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
107 {
108         struct veth_priv *priv = netdev_priv(dev);
109         struct net_device *rcv;
110         int length = skb->len;
111
112         rcu_read_lock();
113         rcv = rcu_dereference(priv->peer);
114         if (unlikely(!rcv)) {
115                 kfree_skb(skb);
116                 goto drop;
117         }
118
119         if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) {
120                 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
121
122                 u64_stats_update_begin(&stats->syncp);
123                 stats->bytes += length;
124                 stats->packets++;
125                 u64_stats_update_end(&stats->syncp);
126         } else {
127 drop:
128                 atomic64_inc(&priv->dropped);
129         }
130         rcu_read_unlock();
131         return NETDEV_TX_OK;
132 }
133
134 /*
135  * general routines
136  */
137
138 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
139 {
140         struct veth_priv *priv = netdev_priv(dev);
141         int cpu;
142
143         result->packets = 0;
144         result->bytes = 0;
145         for_each_possible_cpu(cpu) {
146                 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
147                 u64 packets, bytes;
148                 unsigned int start;
149
150                 do {
151                         start = u64_stats_fetch_begin_irq(&stats->syncp);
152                         packets = stats->packets;
153                         bytes = stats->bytes;
154                 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
155                 result->packets += packets;
156                 result->bytes += bytes;
157         }
158         return atomic64_read(&priv->dropped);
159 }
160
161 static struct rtnl_link_stats64 *veth_get_stats64(struct net_device *dev,
162                                                   struct rtnl_link_stats64 *tot)
163 {
164         struct veth_priv *priv = netdev_priv(dev);
165         struct net_device *peer;
166         struct pcpu_vstats one;
167
168         tot->tx_dropped = veth_stats_one(&one, dev);
169         tot->tx_bytes = one.bytes;
170         tot->tx_packets = one.packets;
171
172         rcu_read_lock();
173         peer = rcu_dereference(priv->peer);
174         if (peer) {
175                 tot->rx_dropped = veth_stats_one(&one, peer);
176                 tot->rx_bytes = one.bytes;
177                 tot->rx_packets = one.packets;
178         }
179         rcu_read_unlock();
180
181         return tot;
182 }
183
184 /* fake multicast ability */
185 static void veth_set_multicast_list(struct net_device *dev)
186 {
187 }
188
189 static int veth_open(struct net_device *dev)
190 {
191         struct veth_priv *priv = netdev_priv(dev);
192         struct net_device *peer = rtnl_dereference(priv->peer);
193
194         if (!peer)
195                 return -ENOTCONN;
196
197         if (peer->flags & IFF_UP) {
198                 netif_carrier_on(dev);
199                 netif_carrier_on(peer);
200         }
201         return 0;
202 }
203
204 static int veth_close(struct net_device *dev)
205 {
206         struct veth_priv *priv = netdev_priv(dev);
207         struct net_device *peer = rtnl_dereference(priv->peer);
208
209         netif_carrier_off(dev);
210         if (peer)
211                 netif_carrier_off(peer);
212
213         return 0;
214 }
215
216 static int is_valid_veth_mtu(int mtu)
217 {
218         return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
219 }
220
221 static int veth_dev_init(struct net_device *dev)
222 {
223         dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
224         if (!dev->vstats)
225                 return -ENOMEM;
226         return 0;
227 }
228
229 static void veth_dev_free(struct net_device *dev)
230 {
231         free_percpu(dev->vstats);
232         free_netdev(dev);
233 }
234
235 #ifdef CONFIG_NET_POLL_CONTROLLER
236 static void veth_poll_controller(struct net_device *dev)
237 {
238         /* veth only receives frames when its peer sends one
239          * Since it's a synchronous operation, we are guaranteed
240          * never to have pending data when we poll for it so
241          * there is nothing to do here.
242          *
243          * We need this though so netpoll recognizes us as an interface that
244          * supports polling, which enables bridge devices in virt setups to
245          * still use netconsole
246          */
247 }
248 #endif  /* CONFIG_NET_POLL_CONTROLLER */
249
250 static int veth_get_iflink(const struct net_device *dev)
251 {
252         struct veth_priv *priv = netdev_priv(dev);
253         struct net_device *peer;
254         int iflink;
255
256         rcu_read_lock();
257         peer = rcu_dereference(priv->peer);
258         iflink = peer ? peer->ifindex : 0;
259         rcu_read_unlock();
260
261         return iflink;
262 }
263
264 static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
265 {
266         struct veth_priv *peer_priv, *priv = netdev_priv(dev);
267         struct net_device *peer;
268
269         if (new_hr < 0)
270                 new_hr = 0;
271
272         rcu_read_lock();
273         peer = rcu_dereference(priv->peer);
274         if (unlikely(!peer))
275                 goto out;
276
277         peer_priv = netdev_priv(peer);
278         priv->requested_headroom = new_hr;
279         new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
280         dev->needed_headroom = new_hr;
281         peer->needed_headroom = new_hr;
282
283 out:
284         rcu_read_unlock();
285 }
286
287 static const struct net_device_ops veth_netdev_ops = {
288         .ndo_init            = veth_dev_init,
289         .ndo_open            = veth_open,
290         .ndo_stop            = veth_close,
291         .ndo_start_xmit      = veth_xmit,
292         .ndo_get_stats64     = veth_get_stats64,
293         .ndo_set_rx_mode     = veth_set_multicast_list,
294         .ndo_set_mac_address = eth_mac_addr,
295 #ifdef CONFIG_NET_POLL_CONTROLLER
296         .ndo_poll_controller    = veth_poll_controller,
297 #endif
298         .ndo_get_iflink         = veth_get_iflink,
299         .ndo_features_check     = passthru_features_check,
300         .ndo_set_rx_headroom    = veth_set_rx_headroom,
301 };
302
303 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
304                        NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
305                        NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
306                        NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
307                        NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
308
309 static void veth_setup(struct net_device *dev)
310 {
311         ether_setup(dev);
312
313         dev->priv_flags &= ~IFF_TX_SKB_SHARING;
314         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
315         dev->priv_flags |= IFF_NO_QUEUE;
316         dev->priv_flags |= IFF_PHONY_HEADROOM;
317
318         dev->netdev_ops = &veth_netdev_ops;
319         dev->ethtool_ops = &veth_ethtool_ops;
320         dev->features |= NETIF_F_LLTX;
321         dev->features |= VETH_FEATURES;
322         dev->vlan_features = dev->features &
323                              ~(NETIF_F_HW_VLAN_CTAG_TX |
324                                NETIF_F_HW_VLAN_STAG_TX |
325                                NETIF_F_HW_VLAN_CTAG_RX |
326                                NETIF_F_HW_VLAN_STAG_RX);
327         dev->destructor = veth_dev_free;
328         dev->max_mtu = ETH_MAX_MTU;
329
330         dev->hw_features = VETH_FEATURES;
331         dev->hw_enc_features = VETH_FEATURES;
332         dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
333 }
334
335 /*
336  * netlink interface
337  */
338
339 static int veth_validate(struct nlattr *tb[], struct nlattr *data[])
340 {
341         if (tb[IFLA_ADDRESS]) {
342                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
343                         return -EINVAL;
344                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
345                         return -EADDRNOTAVAIL;
346         }
347         if (tb[IFLA_MTU]) {
348                 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
349                         return -EINVAL;
350         }
351         return 0;
352 }
353
354 static struct rtnl_link_ops veth_link_ops;
355
356 static int veth_newlink(struct net *src_net, struct net_device *dev,
357                          struct nlattr *tb[], struct nlattr *data[])
358 {
359         int err;
360         struct net_device *peer;
361         struct veth_priv *priv;
362         char ifname[IFNAMSIZ];
363         struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
364         unsigned char name_assign_type;
365         struct ifinfomsg *ifmp;
366         struct net *net;
367
368         /*
369          * create and register peer first
370          */
371         if (data != NULL && data[VETH_INFO_PEER] != NULL) {
372                 struct nlattr *nla_peer;
373
374                 nla_peer = data[VETH_INFO_PEER];
375                 ifmp = nla_data(nla_peer);
376                 err = rtnl_nla_parse_ifla(peer_tb,
377                                           nla_data(nla_peer) + sizeof(struct ifinfomsg),
378                                           nla_len(nla_peer) - sizeof(struct ifinfomsg));
379                 if (err < 0)
380                         return err;
381
382                 err = veth_validate(peer_tb, NULL);
383                 if (err < 0)
384                         return err;
385
386                 tbp = peer_tb;
387         } else {
388                 ifmp = NULL;
389                 tbp = tb;
390         }
391
392         if (tbp[IFLA_IFNAME]) {
393                 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
394                 name_assign_type = NET_NAME_USER;
395         } else {
396                 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
397                 name_assign_type = NET_NAME_ENUM;
398         }
399
400         net = rtnl_link_get_net(src_net, tbp);
401         if (IS_ERR(net))
402                 return PTR_ERR(net);
403
404         peer = rtnl_create_link(net, ifname, name_assign_type,
405                                 &veth_link_ops, tbp);
406         if (IS_ERR(peer)) {
407                 put_net(net);
408                 return PTR_ERR(peer);
409         }
410
411         if (tbp[IFLA_ADDRESS] == NULL)
412                 eth_hw_addr_random(peer);
413
414         if (ifmp && (dev->ifindex != 0))
415                 peer->ifindex = ifmp->ifi_index;
416
417         err = register_netdevice(peer);
418         put_net(net);
419         net = NULL;
420         if (err < 0)
421                 goto err_register_peer;
422
423         netif_carrier_off(peer);
424
425         err = rtnl_configure_link(peer, ifmp);
426         if (err < 0)
427                 goto err_configure_peer;
428
429         /*
430          * register dev last
431          *
432          * note, that since we've registered new device the dev's name
433          * should be re-allocated
434          */
435
436         if (tb[IFLA_ADDRESS] == NULL)
437                 eth_hw_addr_random(dev);
438
439         if (tb[IFLA_IFNAME])
440                 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
441         else
442                 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
443
444         err = register_netdevice(dev);
445         if (err < 0)
446                 goto err_register_dev;
447
448         netif_carrier_off(dev);
449
450         /*
451          * tie the deviced together
452          */
453
454         priv = netdev_priv(dev);
455         rcu_assign_pointer(priv->peer, peer);
456
457         priv = netdev_priv(peer);
458         rcu_assign_pointer(priv->peer, dev);
459         return 0;
460
461 err_register_dev:
462         /* nothing to do */
463 err_configure_peer:
464         unregister_netdevice(peer);
465         return err;
466
467 err_register_peer:
468         free_netdev(peer);
469         return err;
470 }
471
472 static void veth_dellink(struct net_device *dev, struct list_head *head)
473 {
474         struct veth_priv *priv;
475         struct net_device *peer;
476
477         priv = netdev_priv(dev);
478         peer = rtnl_dereference(priv->peer);
479
480         /* Note : dellink() is called from default_device_exit_batch(),
481          * before a rcu_synchronize() point. The devices are guaranteed
482          * not being freed before one RCU grace period.
483          */
484         RCU_INIT_POINTER(priv->peer, NULL);
485         unregister_netdevice_queue(dev, head);
486
487         if (peer) {
488                 priv = netdev_priv(peer);
489                 RCU_INIT_POINTER(priv->peer, NULL);
490                 unregister_netdevice_queue(peer, head);
491         }
492 }
493
494 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
495         [VETH_INFO_PEER]        = { .len = sizeof(struct ifinfomsg) },
496 };
497
498 static struct net *veth_get_link_net(const struct net_device *dev)
499 {
500         struct veth_priv *priv = netdev_priv(dev);
501         struct net_device *peer = rtnl_dereference(priv->peer);
502
503         return peer ? dev_net(peer) : dev_net(dev);
504 }
505
506 static struct rtnl_link_ops veth_link_ops = {
507         .kind           = DRV_NAME,
508         .priv_size      = sizeof(struct veth_priv),
509         .setup          = veth_setup,
510         .validate       = veth_validate,
511         .newlink        = veth_newlink,
512         .dellink        = veth_dellink,
513         .policy         = veth_policy,
514         .maxtype        = VETH_INFO_MAX,
515         .get_link_net   = veth_get_link_net,
516 };
517
518 /*
519  * init/fini
520  */
521
522 static __init int veth_init(void)
523 {
524         return rtnl_link_register(&veth_link_ops);
525 }
526
527 static __exit void veth_exit(void)
528 {
529         rtnl_link_unregister(&veth_link_ops);
530 }
531
532 module_init(veth_init);
533 module_exit(veth_exit);
534
535 MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
536 MODULE_LICENSE("GPL v2");
537 MODULE_ALIAS_RTNL_LINK(DRV_NAME);