Merge remote-tracking branch 'asoc/fix/wm8962' into asoc-linus
[sfrench/cifs-2.6.git] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      Based on linux/net/ipv4/ip_output.c
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *      Changes:
16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
17  *                              extension headers are implemented.
18  *                              route changes now work.
19  *                              ip6_forward does not confuse sniffers.
20  *                              etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *      Imran Patel     :       frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *                      :       add ip6_append_data and related functions
26  *                              for datagram xmit
27  */
28
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44
45 #include <net/sock.h>
46 #include <net/snmp.h>
47
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58
59 static int ip6_finish_output2(struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         struct neighbour *neigh;
64         struct in6_addr *nexthop;
65         int ret;
66
67         skb->protocol = htons(ETH_P_IPV6);
68         skb->dev = dev;
69
70         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
71                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
72
73                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74                     ((mroute6_socket(dev_net(dev), skb) &&
75                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
77                                          &ipv6_hdr(skb)->saddr))) {
78                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
79
80                         /* Do not check for IFF_ALLMULTI; multicast routing
81                            is not supported in any case.
82                          */
83                         if (newskb)
84                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
85                                         newskb, NULL, newskb->dev,
86                                         dev_loopback_xmit);
87
88                         if (ipv6_hdr(skb)->hop_limit == 0) {
89                                 IP6_INC_STATS(dev_net(dev), idev,
90                                               IPSTATS_MIB_OUTDISCARDS);
91                                 kfree_skb(skb);
92                                 return 0;
93                         }
94                 }
95
96                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
97                                 skb->len);
98
99                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
100                     IPV6_ADDR_SCOPE_NODELOCAL &&
101                     !(dev->flags & IFF_LOOPBACK)) {
102                         kfree_skb(skb);
103                         return 0;
104                 }
105         }
106
107         rcu_read_lock_bh();
108         nexthop = rt6_nexthop((struct rt6_info *)dst);
109         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
110         if (unlikely(!neigh))
111                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
112         if (!IS_ERR(neigh)) {
113                 ret = dst_neigh_output(dst, neigh, skb);
114                 rcu_read_unlock_bh();
115                 return ret;
116         }
117         rcu_read_unlock_bh();
118
119         IP6_INC_STATS_BH(dev_net(dst->dev),
120                          ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121         kfree_skb(skb);
122         return -EINVAL;
123 }
124
125 static int ip6_finish_output(struct sk_buff *skb)
126 {
127         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128             dst_allfrag(skb_dst(skb)))
129                 return ip6_fragment(skb, ip6_finish_output2);
130         else
131                 return ip6_finish_output2(skb);
132 }
133
134 int ip6_output(struct sk_buff *skb)
135 {
136         struct net_device *dev = skb_dst(skb)->dev;
137         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
138         if (unlikely(idev->cnf.disable_ipv6)) {
139                 IP6_INC_STATS(dev_net(dev), idev,
140                               IPSTATS_MIB_OUTDISCARDS);
141                 kfree_skb(skb);
142                 return 0;
143         }
144
145         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
146                             ip6_finish_output,
147                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
148 }
149
150 /*
151  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
152  */
153
154 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
155              struct ipv6_txoptions *opt, int tclass)
156 {
157         struct net *net = sock_net(sk);
158         struct ipv6_pinfo *np = inet6_sk(sk);
159         struct in6_addr *first_hop = &fl6->daddr;
160         struct dst_entry *dst = skb_dst(skb);
161         struct ipv6hdr *hdr;
162         u8  proto = fl6->flowi6_proto;
163         int seg_len = skb->len;
164         int hlimit = -1;
165         u32 mtu;
166
167         if (opt) {
168                 unsigned int head_room;
169
170                 /* First: exthdrs may take lots of space (~8K for now)
171                    MAX_HEADER is not enough.
172                  */
173                 head_room = opt->opt_nflen + opt->opt_flen;
174                 seg_len += head_room;
175                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
176
177                 if (skb_headroom(skb) < head_room) {
178                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
179                         if (skb2 == NULL) {
180                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
181                                               IPSTATS_MIB_OUTDISCARDS);
182                                 kfree_skb(skb);
183                                 return -ENOBUFS;
184                         }
185                         consume_skb(skb);
186                         skb = skb2;
187                         skb_set_owner_w(skb, sk);
188                 }
189                 if (opt->opt_flen)
190                         ipv6_push_frag_opts(skb, opt, &proto);
191                 if (opt->opt_nflen)
192                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
193         }
194
195         skb_push(skb, sizeof(struct ipv6hdr));
196         skb_reset_network_header(skb);
197         hdr = ipv6_hdr(skb);
198
199         /*
200          *      Fill in the IPv6 header
201          */
202         if (np)
203                 hlimit = np->hop_limit;
204         if (hlimit < 0)
205                 hlimit = ip6_dst_hoplimit(dst);
206
207         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
208
209         hdr->payload_len = htons(seg_len);
210         hdr->nexthdr = proto;
211         hdr->hop_limit = hlimit;
212
213         hdr->saddr = fl6->saddr;
214         hdr->daddr = *first_hop;
215
216         skb->protocol = htons(ETH_P_IPV6);
217         skb->priority = sk->sk_priority;
218         skb->mark = sk->sk_mark;
219
220         mtu = dst_mtu(dst);
221         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
222                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
223                               IPSTATS_MIB_OUT, skb->len);
224                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
225                                dst->dev, dst_output);
226         }
227
228         skb->dev = dst->dev;
229         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
230         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
231         kfree_skb(skb);
232         return -EMSGSIZE;
233 }
234
235 EXPORT_SYMBOL(ip6_xmit);
236
237 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
238 {
239         struct ip6_ra_chain *ra;
240         struct sock *last = NULL;
241
242         read_lock(&ip6_ra_lock);
243         for (ra = ip6_ra_chain; ra; ra = ra->next) {
244                 struct sock *sk = ra->sk;
245                 if (sk && ra->sel == sel &&
246                     (!sk->sk_bound_dev_if ||
247                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
248                         if (last) {
249                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
250                                 if (skb2)
251                                         rawv6_rcv(last, skb2);
252                         }
253                         last = sk;
254                 }
255         }
256
257         if (last) {
258                 rawv6_rcv(last, skb);
259                 read_unlock(&ip6_ra_lock);
260                 return 1;
261         }
262         read_unlock(&ip6_ra_lock);
263         return 0;
264 }
265
266 static int ip6_forward_proxy_check(struct sk_buff *skb)
267 {
268         struct ipv6hdr *hdr = ipv6_hdr(skb);
269         u8 nexthdr = hdr->nexthdr;
270         __be16 frag_off;
271         int offset;
272
273         if (ipv6_ext_hdr(nexthdr)) {
274                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
275                 if (offset < 0)
276                         return 0;
277         } else
278                 offset = sizeof(struct ipv6hdr);
279
280         if (nexthdr == IPPROTO_ICMPV6) {
281                 struct icmp6hdr *icmp6;
282
283                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
284                                          offset + 1 - skb->data)))
285                         return 0;
286
287                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
288
289                 switch (icmp6->icmp6_type) {
290                 case NDISC_ROUTER_SOLICITATION:
291                 case NDISC_ROUTER_ADVERTISEMENT:
292                 case NDISC_NEIGHBOUR_SOLICITATION:
293                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
294                 case NDISC_REDIRECT:
295                         /* For reaction involving unicast neighbor discovery
296                          * message destined to the proxied address, pass it to
297                          * input function.
298                          */
299                         return 1;
300                 default:
301                         break;
302                 }
303         }
304
305         /*
306          * The proxying router can't forward traffic sent to a link-local
307          * address, so signal the sender and discard the packet. This
308          * behavior is clarified by the MIPv6 specification.
309          */
310         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
311                 dst_link_failure(skb);
312                 return -1;
313         }
314
315         return 0;
316 }
317
318 static inline int ip6_forward_finish(struct sk_buff *skb)
319 {
320         return dst_output(skb);
321 }
322
323 int ip6_forward(struct sk_buff *skb)
324 {
325         struct dst_entry *dst = skb_dst(skb);
326         struct ipv6hdr *hdr = ipv6_hdr(skb);
327         struct inet6_skb_parm *opt = IP6CB(skb);
328         struct net *net = dev_net(dst->dev);
329         u32 mtu;
330
331         if (net->ipv6.devconf_all->forwarding == 0)
332                 goto error;
333
334         if (skb_warn_if_lro(skb))
335                 goto drop;
336
337         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
338                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
339                 goto drop;
340         }
341
342         if (skb->pkt_type != PACKET_HOST)
343                 goto drop;
344
345         skb_forward_csum(skb);
346
347         /*
348          *      We DO NOT make any processing on
349          *      RA packets, pushing them to user level AS IS
350          *      without ane WARRANTY that application will be able
351          *      to interpret them. The reason is that we
352          *      cannot make anything clever here.
353          *
354          *      We are not end-node, so that if packet contains
355          *      AH/ESP, we cannot make anything.
356          *      Defragmentation also would be mistake, RA packets
357          *      cannot be fragmented, because there is no warranty
358          *      that different fragments will go along one path. --ANK
359          */
360         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
361                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
362                         return 0;
363         }
364
365         /*
366          *      check and decrement ttl
367          */
368         if (hdr->hop_limit <= 1) {
369                 /* Force OUTPUT device used as source address */
370                 skb->dev = dst->dev;
371                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
372                 IP6_INC_STATS_BH(net,
373                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
374
375                 kfree_skb(skb);
376                 return -ETIMEDOUT;
377         }
378
379         /* XXX: idev->cnf.proxy_ndp? */
380         if (net->ipv6.devconf_all->proxy_ndp &&
381             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
382                 int proxied = ip6_forward_proxy_check(skb);
383                 if (proxied > 0)
384                         return ip6_input(skb);
385                 else if (proxied < 0) {
386                         IP6_INC_STATS(net, ip6_dst_idev(dst),
387                                       IPSTATS_MIB_INDISCARDS);
388                         goto drop;
389                 }
390         }
391
392         if (!xfrm6_route_forward(skb)) {
393                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
394                 goto drop;
395         }
396         dst = skb_dst(skb);
397
398         /* IPv6 specs say nothing about it, but it is clear that we cannot
399            send redirects to source routed frames.
400            We don't send redirects to frames decapsulated from IPsec.
401          */
402         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
403                 struct in6_addr *target = NULL;
404                 struct inet_peer *peer;
405                 struct rt6_info *rt;
406
407                 /*
408                  *      incoming and outgoing devices are the same
409                  *      send a redirect.
410                  */
411
412                 rt = (struct rt6_info *) dst;
413                 if (rt->rt6i_flags & RTF_GATEWAY)
414                         target = &rt->rt6i_gateway;
415                 else
416                         target = &hdr->daddr;
417
418                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
419
420                 /* Limit redirects both by destination (here)
421                    and by source (inside ndisc_send_redirect)
422                  */
423                 if (inet_peer_xrlim_allow(peer, 1*HZ))
424                         ndisc_send_redirect(skb, target);
425                 if (peer)
426                         inet_putpeer(peer);
427         } else {
428                 int addrtype = ipv6_addr_type(&hdr->saddr);
429
430                 /* This check is security critical. */
431                 if (addrtype == IPV6_ADDR_ANY ||
432                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
433                         goto error;
434                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
435                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
436                                     ICMPV6_NOT_NEIGHBOUR, 0);
437                         goto error;
438                 }
439         }
440
441         mtu = dst_mtu(dst);
442         if (mtu < IPV6_MIN_MTU)
443                 mtu = IPV6_MIN_MTU;
444
445         if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
446             (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
447                 /* Again, force OUTPUT device used as source address */
448                 skb->dev = dst->dev;
449                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
450                 IP6_INC_STATS_BH(net,
451                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
452                 IP6_INC_STATS_BH(net,
453                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
454                 kfree_skb(skb);
455                 return -EMSGSIZE;
456         }
457
458         if (skb_cow(skb, dst->dev->hard_header_len)) {
459                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
460                 goto drop;
461         }
462
463         hdr = ipv6_hdr(skb);
464
465         /* Mangling hops number delayed to point after skb COW */
466
467         hdr->hop_limit--;
468
469         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
470         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
471         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
472                        ip6_forward_finish);
473
474 error:
475         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
476 drop:
477         kfree_skb(skb);
478         return -EINVAL;
479 }
480
481 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
482 {
483         to->pkt_type = from->pkt_type;
484         to->priority = from->priority;
485         to->protocol = from->protocol;
486         skb_dst_drop(to);
487         skb_dst_set(to, dst_clone(skb_dst(from)));
488         to->dev = from->dev;
489         to->mark = from->mark;
490
491 #ifdef CONFIG_NET_SCHED
492         to->tc_index = from->tc_index;
493 #endif
494         nf_copy(to, from);
495 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
496         to->nf_trace = from->nf_trace;
497 #endif
498         skb_copy_secmark(to, from);
499 }
500
501 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
502 {
503         struct sk_buff *frag;
504         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
505         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
506         struct ipv6hdr *tmp_hdr;
507         struct frag_hdr *fh;
508         unsigned int mtu, hlen, left, len;
509         int hroom, troom;
510         __be32 frag_id = 0;
511         int ptr, offset = 0, err=0;
512         u8 *prevhdr, nexthdr = 0;
513         struct net *net = dev_net(skb_dst(skb)->dev);
514
515         hlen = ip6_find_1stfragopt(skb, &prevhdr);
516         nexthdr = *prevhdr;
517
518         mtu = ip6_skb_dst_mtu(skb);
519
520         /* We must not fragment if the socket is set to force MTU discovery
521          * or if the skb it not generated by a local socket.
522          */
523         if (unlikely(!skb->local_df && skb->len > mtu) ||
524                      (IP6CB(skb)->frag_max_size &&
525                       IP6CB(skb)->frag_max_size > mtu)) {
526                 if (skb->sk && dst_allfrag(skb_dst(skb)))
527                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
528
529                 skb->dev = skb_dst(skb)->dev;
530                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
532                               IPSTATS_MIB_FRAGFAILS);
533                 kfree_skb(skb);
534                 return -EMSGSIZE;
535         }
536
537         if (np && np->frag_size < mtu) {
538                 if (np->frag_size)
539                         mtu = np->frag_size;
540         }
541         mtu -= hlen + sizeof(struct frag_hdr);
542
543         if (skb_has_frag_list(skb)) {
544                 int first_len = skb_pagelen(skb);
545                 struct sk_buff *frag2;
546
547                 if (first_len - hlen > mtu ||
548                     ((first_len - hlen) & 7) ||
549                     skb_cloned(skb))
550                         goto slow_path;
551
552                 skb_walk_frags(skb, frag) {
553                         /* Correct geometry. */
554                         if (frag->len > mtu ||
555                             ((frag->len & 7) && frag->next) ||
556                             skb_headroom(frag) < hlen)
557                                 goto slow_path_clean;
558
559                         /* Partially cloned skb? */
560                         if (skb_shared(frag))
561                                 goto slow_path_clean;
562
563                         BUG_ON(frag->sk);
564                         if (skb->sk) {
565                                 frag->sk = skb->sk;
566                                 frag->destructor = sock_wfree;
567                         }
568                         skb->truesize -= frag->truesize;
569                 }
570
571                 err = 0;
572                 offset = 0;
573                 frag = skb_shinfo(skb)->frag_list;
574                 skb_frag_list_init(skb);
575                 /* BUILD HEADER */
576
577                 *prevhdr = NEXTHDR_FRAGMENT;
578                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
579                 if (!tmp_hdr) {
580                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
581                                       IPSTATS_MIB_FRAGFAILS);
582                         return -ENOMEM;
583                 }
584
585                 __skb_pull(skb, hlen);
586                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
587                 __skb_push(skb, hlen);
588                 skb_reset_network_header(skb);
589                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
590
591                 ipv6_select_ident(fh, rt);
592                 fh->nexthdr = nexthdr;
593                 fh->reserved = 0;
594                 fh->frag_off = htons(IP6_MF);
595                 frag_id = fh->identification;
596
597                 first_len = skb_pagelen(skb);
598                 skb->data_len = first_len - skb_headlen(skb);
599                 skb->len = first_len;
600                 ipv6_hdr(skb)->payload_len = htons(first_len -
601                                                    sizeof(struct ipv6hdr));
602
603                 dst_hold(&rt->dst);
604
605                 for (;;) {
606                         /* Prepare header of the next frame,
607                          * before previous one went down. */
608                         if (frag) {
609                                 frag->ip_summed = CHECKSUM_NONE;
610                                 skb_reset_transport_header(frag);
611                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
612                                 __skb_push(frag, hlen);
613                                 skb_reset_network_header(frag);
614                                 memcpy(skb_network_header(frag), tmp_hdr,
615                                        hlen);
616                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
617                                 fh->nexthdr = nexthdr;
618                                 fh->reserved = 0;
619                                 fh->frag_off = htons(offset);
620                                 if (frag->next != NULL)
621                                         fh->frag_off |= htons(IP6_MF);
622                                 fh->identification = frag_id;
623                                 ipv6_hdr(frag)->payload_len =
624                                                 htons(frag->len -
625                                                       sizeof(struct ipv6hdr));
626                                 ip6_copy_metadata(frag, skb);
627                         }
628
629                         err = output(skb);
630                         if(!err)
631                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
632                                               IPSTATS_MIB_FRAGCREATES);
633
634                         if (err || !frag)
635                                 break;
636
637                         skb = frag;
638                         frag = skb->next;
639                         skb->next = NULL;
640                 }
641
642                 kfree(tmp_hdr);
643
644                 if (err == 0) {
645                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
646                                       IPSTATS_MIB_FRAGOKS);
647                         ip6_rt_put(rt);
648                         return 0;
649                 }
650
651                 while (frag) {
652                         skb = frag->next;
653                         kfree_skb(frag);
654                         frag = skb;
655                 }
656
657                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
658                               IPSTATS_MIB_FRAGFAILS);
659                 ip6_rt_put(rt);
660                 return err;
661
662 slow_path_clean:
663                 skb_walk_frags(skb, frag2) {
664                         if (frag2 == frag)
665                                 break;
666                         frag2->sk = NULL;
667                         frag2->destructor = NULL;
668                         skb->truesize += frag2->truesize;
669                 }
670         }
671
672 slow_path:
673         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
674             skb_checksum_help(skb))
675                 goto fail;
676
677         left = skb->len - hlen;         /* Space per frame */
678         ptr = hlen;                     /* Where to start from */
679
680         /*
681          *      Fragment the datagram.
682          */
683
684         *prevhdr = NEXTHDR_FRAGMENT;
685         hroom = LL_RESERVED_SPACE(rt->dst.dev);
686         troom = rt->dst.dev->needed_tailroom;
687
688         /*
689          *      Keep copying data until we run out.
690          */
691         while(left > 0) {
692                 len = left;
693                 /* IF: it doesn't fit, use 'mtu' - the data space left */
694                 if (len > mtu)
695                         len = mtu;
696                 /* IF: we are not sending up to and including the packet end
697                    then align the next start on an eight byte boundary */
698                 if (len < left) {
699                         len &= ~7;
700                 }
701                 /*
702                  *      Allocate buffer.
703                  */
704
705                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
706                                       hroom + troom, GFP_ATOMIC)) == NULL) {
707                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
708                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
709                                       IPSTATS_MIB_FRAGFAILS);
710                         err = -ENOMEM;
711                         goto fail;
712                 }
713
714                 /*
715                  *      Set up data on packet
716                  */
717
718                 ip6_copy_metadata(frag, skb);
719                 skb_reserve(frag, hroom);
720                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
721                 skb_reset_network_header(frag);
722                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
723                 frag->transport_header = (frag->network_header + hlen +
724                                           sizeof(struct frag_hdr));
725
726                 /*
727                  *      Charge the memory for the fragment to any owner
728                  *      it might possess
729                  */
730                 if (skb->sk)
731                         skb_set_owner_w(frag, skb->sk);
732
733                 /*
734                  *      Copy the packet header into the new buffer.
735                  */
736                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
737
738                 /*
739                  *      Build fragment header.
740                  */
741                 fh->nexthdr = nexthdr;
742                 fh->reserved = 0;
743                 if (!frag_id) {
744                         ipv6_select_ident(fh, rt);
745                         frag_id = fh->identification;
746                 } else
747                         fh->identification = frag_id;
748
749                 /*
750                  *      Copy a block of the IP datagram.
751                  */
752                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
753                         BUG();
754                 left -= len;
755
756                 fh->frag_off = htons(offset);
757                 if (left > 0)
758                         fh->frag_off |= htons(IP6_MF);
759                 ipv6_hdr(frag)->payload_len = htons(frag->len -
760                                                     sizeof(struct ipv6hdr));
761
762                 ptr += len;
763                 offset += len;
764
765                 /*
766                  *      Put this fragment into the sending queue.
767                  */
768                 err = output(frag);
769                 if (err)
770                         goto fail;
771
772                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
773                               IPSTATS_MIB_FRAGCREATES);
774         }
775         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
776                       IPSTATS_MIB_FRAGOKS);
777         consume_skb(skb);
778         return err;
779
780 fail:
781         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
782                       IPSTATS_MIB_FRAGFAILS);
783         kfree_skb(skb);
784         return err;
785 }
786
787 static inline int ip6_rt_check(const struct rt6key *rt_key,
788                                const struct in6_addr *fl_addr,
789                                const struct in6_addr *addr_cache)
790 {
791         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
792                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
793 }
794
795 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
796                                           struct dst_entry *dst,
797                                           const struct flowi6 *fl6)
798 {
799         struct ipv6_pinfo *np = inet6_sk(sk);
800         struct rt6_info *rt;
801
802         if (!dst)
803                 goto out;
804
805         if (dst->ops->family != AF_INET6) {
806                 dst_release(dst);
807                 return NULL;
808         }
809
810         rt = (struct rt6_info *)dst;
811         /* Yes, checking route validity in not connected
812          * case is not very simple. Take into account,
813          * that we do not support routing by source, TOS,
814          * and MSG_DONTROUTE            --ANK (980726)
815          *
816          * 1. ip6_rt_check(): If route was host route,
817          *    check that cached destination is current.
818          *    If it is network route, we still may
819          *    check its validity using saved pointer
820          *    to the last used address: daddr_cache.
821          *    We do not want to save whole address now,
822          *    (because main consumer of this service
823          *    is tcp, which has not this problem),
824          *    so that the last trick works only on connected
825          *    sockets.
826          * 2. oif also should be the same.
827          */
828         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
829 #ifdef CONFIG_IPV6_SUBTREES
830             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
831 #endif
832             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
833                 dst_release(dst);
834                 dst = NULL;
835         }
836
837 out:
838         return dst;
839 }
840
841 static int ip6_dst_lookup_tail(struct sock *sk,
842                                struct dst_entry **dst, struct flowi6 *fl6)
843 {
844         struct net *net = sock_net(sk);
845 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
846         struct neighbour *n;
847         struct rt6_info *rt;
848 #endif
849         int err;
850
851         if (*dst == NULL)
852                 *dst = ip6_route_output(net, sk, fl6);
853
854         if ((err = (*dst)->error))
855                 goto out_err_release;
856
857         if (ipv6_addr_any(&fl6->saddr)) {
858                 struct rt6_info *rt = (struct rt6_info *) *dst;
859                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
860                                           sk ? inet6_sk(sk)->srcprefs : 0,
861                                           &fl6->saddr);
862                 if (err)
863                         goto out_err_release;
864         }
865
866 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
867         /*
868          * Here if the dst entry we've looked up
869          * has a neighbour entry that is in the INCOMPLETE
870          * state and the src address from the flow is
871          * marked as OPTIMISTIC, we release the found
872          * dst entry and replace it instead with the
873          * dst entry of the nexthop router
874          */
875         rt = (struct rt6_info *) *dst;
876         rcu_read_lock_bh();
877         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
878         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
879         rcu_read_unlock_bh();
880
881         if (err) {
882                 struct inet6_ifaddr *ifp;
883                 struct flowi6 fl_gw6;
884                 int redirect;
885
886                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
887                                       (*dst)->dev, 1);
888
889                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
890                 if (ifp)
891                         in6_ifa_put(ifp);
892
893                 if (redirect) {
894                         /*
895                          * We need to get the dst entry for the
896                          * default router instead
897                          */
898                         dst_release(*dst);
899                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
900                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
901                         *dst = ip6_route_output(net, sk, &fl_gw6);
902                         if ((err = (*dst)->error))
903                                 goto out_err_release;
904                 }
905         }
906 #endif
907
908         return 0;
909
910 out_err_release:
911         if (err == -ENETUNREACH)
912                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
913         dst_release(*dst);
914         *dst = NULL;
915         return err;
916 }
917
918 /**
919  *      ip6_dst_lookup - perform route lookup on flow
920  *      @sk: socket which provides route info
921  *      @dst: pointer to dst_entry * for result
922  *      @fl6: flow to lookup
923  *
924  *      This function performs a route lookup on the given flow.
925  *
926  *      It returns zero on success, or a standard errno code on error.
927  */
928 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
929 {
930         *dst = NULL;
931         return ip6_dst_lookup_tail(sk, dst, fl6);
932 }
933 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
934
935 /**
936  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
937  *      @sk: socket which provides route info
938  *      @fl6: flow to lookup
939  *      @final_dst: final destination address for ipsec lookup
940  *      @can_sleep: we are in a sleepable context
941  *
942  *      This function performs a route lookup on the given flow.
943  *
944  *      It returns a valid dst pointer on success, or a pointer encoded
945  *      error code.
946  */
947 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
948                                       const struct in6_addr *final_dst,
949                                       bool can_sleep)
950 {
951         struct dst_entry *dst = NULL;
952         int err;
953
954         err = ip6_dst_lookup_tail(sk, &dst, fl6);
955         if (err)
956                 return ERR_PTR(err);
957         if (final_dst)
958                 fl6->daddr = *final_dst;
959         if (can_sleep)
960                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
961
962         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
963 }
964 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
965
966 /**
967  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
968  *      @sk: socket which provides the dst cache and route info
969  *      @fl6: flow to lookup
970  *      @final_dst: final destination address for ipsec lookup
971  *      @can_sleep: we are in a sleepable context
972  *
973  *      This function performs a route lookup on the given flow with the
974  *      possibility of using the cached route in the socket if it is valid.
975  *      It will take the socket dst lock when operating on the dst cache.
976  *      As a result, this function can only be used in process context.
977  *
978  *      It returns a valid dst pointer on success, or a pointer encoded
979  *      error code.
980  */
981 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
982                                          const struct in6_addr *final_dst,
983                                          bool can_sleep)
984 {
985         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
986         int err;
987
988         dst = ip6_sk_dst_check(sk, dst, fl6);
989
990         err = ip6_dst_lookup_tail(sk, &dst, fl6);
991         if (err)
992                 return ERR_PTR(err);
993         if (final_dst)
994                 fl6->daddr = *final_dst;
995         if (can_sleep)
996                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
997
998         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
999 }
1000 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1001
1002 static inline int ip6_ufo_append_data(struct sock *sk,
1003                         int getfrag(void *from, char *to, int offset, int len,
1004                         int odd, struct sk_buff *skb),
1005                         void *from, int length, int hh_len, int fragheaderlen,
1006                         int transhdrlen, int mtu,unsigned int flags,
1007                         struct rt6_info *rt)
1008
1009 {
1010         struct sk_buff *skb;
1011         struct frag_hdr fhdr;
1012         int err;
1013
1014         /* There is support for UDP large send offload by network
1015          * device, so create one single skb packet containing complete
1016          * udp datagram
1017          */
1018         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1019                 skb = sock_alloc_send_skb(sk,
1020                         hh_len + fragheaderlen + transhdrlen + 20,
1021                         (flags & MSG_DONTWAIT), &err);
1022                 if (skb == NULL)
1023                         return err;
1024
1025                 /* reserve space for Hardware header */
1026                 skb_reserve(skb, hh_len);
1027
1028                 /* create space for UDP/IP header */
1029                 skb_put(skb,fragheaderlen + transhdrlen);
1030
1031                 /* initialize network header pointer */
1032                 skb_reset_network_header(skb);
1033
1034                 /* initialize protocol header pointer */
1035                 skb->transport_header = skb->network_header + fragheaderlen;
1036
1037                 skb->protocol = htons(ETH_P_IPV6);
1038                 skb->csum = 0;
1039
1040                 __skb_queue_tail(&sk->sk_write_queue, skb);
1041         } else if (skb_is_gso(skb)) {
1042                 goto append;
1043         }
1044
1045         skb->ip_summed = CHECKSUM_PARTIAL;
1046         /* Specify the length of each IPv6 datagram fragment.
1047          * It has to be a multiple of 8.
1048          */
1049         skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1050                                      sizeof(struct frag_hdr)) & ~7;
1051         skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1052         ipv6_select_ident(&fhdr, rt);
1053         skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1054
1055 append:
1056         return skb_append_datato_frags(sk, skb, getfrag, from,
1057                                        (length - transhdrlen));
1058 }
1059
1060 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1061                                                gfp_t gfp)
1062 {
1063         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1064 }
1065
1066 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1067                                                 gfp_t gfp)
1068 {
1069         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1070 }
1071
1072 static void ip6_append_data_mtu(unsigned int *mtu,
1073                                 int *maxfraglen,
1074                                 unsigned int fragheaderlen,
1075                                 struct sk_buff *skb,
1076                                 struct rt6_info *rt,
1077                                 bool pmtuprobe)
1078 {
1079         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1080                 if (skb == NULL) {
1081                         /* first fragment, reserve header_len */
1082                         *mtu = *mtu - rt->dst.header_len;
1083
1084                 } else {
1085                         /*
1086                          * this fragment is not first, the headers
1087                          * space is regarded as data space.
1088                          */
1089                         *mtu = min(*mtu, pmtuprobe ?
1090                                    rt->dst.dev->mtu :
1091                                    dst_mtu(rt->dst.path));
1092                 }
1093                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1094                               + fragheaderlen - sizeof(struct frag_hdr);
1095         }
1096 }
1097
1098 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1099         int offset, int len, int odd, struct sk_buff *skb),
1100         void *from, int length, int transhdrlen,
1101         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1102         struct rt6_info *rt, unsigned int flags, int dontfrag)
1103 {
1104         struct inet_sock *inet = inet_sk(sk);
1105         struct ipv6_pinfo *np = inet6_sk(sk);
1106         struct inet_cork *cork;
1107         struct sk_buff *skb, *skb_prev = NULL;
1108         unsigned int maxfraglen, fragheaderlen, mtu;
1109         int exthdrlen;
1110         int dst_exthdrlen;
1111         int hh_len;
1112         int copy;
1113         int err;
1114         int offset = 0;
1115         __u8 tx_flags = 0;
1116
1117         if (flags&MSG_PROBE)
1118                 return 0;
1119         cork = &inet->cork.base;
1120         if (skb_queue_empty(&sk->sk_write_queue)) {
1121                 /*
1122                  * setup for corking
1123                  */
1124                 if (opt) {
1125                         if (WARN_ON(np->cork.opt))
1126                                 return -EINVAL;
1127
1128                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1129                         if (unlikely(np->cork.opt == NULL))
1130                                 return -ENOBUFS;
1131
1132                         np->cork.opt->tot_len = opt->tot_len;
1133                         np->cork.opt->opt_flen = opt->opt_flen;
1134                         np->cork.opt->opt_nflen = opt->opt_nflen;
1135
1136                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1137                                                             sk->sk_allocation);
1138                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1139                                 return -ENOBUFS;
1140
1141                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1142                                                             sk->sk_allocation);
1143                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1144                                 return -ENOBUFS;
1145
1146                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1147                                                            sk->sk_allocation);
1148                         if (opt->hopopt && !np->cork.opt->hopopt)
1149                                 return -ENOBUFS;
1150
1151                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1152                                                             sk->sk_allocation);
1153                         if (opt->srcrt && !np->cork.opt->srcrt)
1154                                 return -ENOBUFS;
1155
1156                         /* need source address above miyazawa*/
1157                 }
1158                 dst_hold(&rt->dst);
1159                 cork->dst = &rt->dst;
1160                 inet->cork.fl.u.ip6 = *fl6;
1161                 np->cork.hop_limit = hlimit;
1162                 np->cork.tclass = tclass;
1163                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1164                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1165                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1166                 else
1167                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1168                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1169                 if (np->frag_size < mtu) {
1170                         if (np->frag_size)
1171                                 mtu = np->frag_size;
1172                 }
1173                 cork->fragsize = mtu;
1174                 if (dst_allfrag(rt->dst.path))
1175                         cork->flags |= IPCORK_ALLFRAG;
1176                 cork->length = 0;
1177                 exthdrlen = (opt ? opt->opt_flen : 0);
1178                 length += exthdrlen;
1179                 transhdrlen += exthdrlen;
1180                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1181         } else {
1182                 rt = (struct rt6_info *)cork->dst;
1183                 fl6 = &inet->cork.fl.u.ip6;
1184                 opt = np->cork.opt;
1185                 transhdrlen = 0;
1186                 exthdrlen = 0;
1187                 dst_exthdrlen = 0;
1188                 mtu = cork->fragsize;
1189         }
1190
1191         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1192
1193         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1194                         (opt ? opt->opt_nflen : 0);
1195         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1196
1197         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1198                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1199                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1200                         return -EMSGSIZE;
1201                 }
1202         }
1203
1204         /* For UDP, check if TX timestamp is enabled */
1205         if (sk->sk_type == SOCK_DGRAM)
1206                 sock_tx_timestamp(sk, &tx_flags);
1207
1208         /*
1209          * Let's try using as much space as possible.
1210          * Use MTU if total length of the message fits into the MTU.
1211          * Otherwise, we need to reserve fragment header and
1212          * fragment alignment (= 8-15 octects, in total).
1213          *
1214          * Note that we may need to "move" the data from the tail of
1215          * of the buffer to the new fragment when we split
1216          * the message.
1217          *
1218          * FIXME: It may be fragmented into multiple chunks
1219          *        at once if non-fragmentable extension headers
1220          *        are too large.
1221          * --yoshfuji
1222          */
1223
1224         if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1225                                            sk->sk_protocol == IPPROTO_RAW)) {
1226                 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1227                 return -EMSGSIZE;
1228         }
1229
1230         skb = skb_peek_tail(&sk->sk_write_queue);
1231         cork->length += length;
1232         if (((length > mtu) ||
1233              (skb && skb_is_gso(skb))) &&
1234             (sk->sk_protocol == IPPROTO_UDP) &&
1235             (rt->dst.dev->features & NETIF_F_UFO)) {
1236                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1237                                           hh_len, fragheaderlen,
1238                                           transhdrlen, mtu, flags, rt);
1239                 if (err)
1240                         goto error;
1241                 return 0;
1242         }
1243
1244         if (!skb)
1245                 goto alloc_new_skb;
1246
1247         while (length > 0) {
1248                 /* Check if the remaining data fits into current packet. */
1249                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1250                 if (copy < length)
1251                         copy = maxfraglen - skb->len;
1252
1253                 if (copy <= 0) {
1254                         char *data;
1255                         unsigned int datalen;
1256                         unsigned int fraglen;
1257                         unsigned int fraggap;
1258                         unsigned int alloclen;
1259 alloc_new_skb:
1260                         /* There's no room in the current skb */
1261                         if (skb)
1262                                 fraggap = skb->len - maxfraglen;
1263                         else
1264                                 fraggap = 0;
1265                         /* update mtu and maxfraglen if necessary */
1266                         if (skb == NULL || skb_prev == NULL)
1267                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1268                                                     fragheaderlen, skb, rt,
1269                                                     np->pmtudisc ==
1270                                                     IPV6_PMTUDISC_PROBE);
1271
1272                         skb_prev = skb;
1273
1274                         /*
1275                          * If remaining data exceeds the mtu,
1276                          * we know we need more fragment(s).
1277                          */
1278                         datalen = length + fraggap;
1279
1280                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1281                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1282                         if ((flags & MSG_MORE) &&
1283                             !(rt->dst.dev->features&NETIF_F_SG))
1284                                 alloclen = mtu;
1285                         else
1286                                 alloclen = datalen + fragheaderlen;
1287
1288                         alloclen += dst_exthdrlen;
1289
1290                         if (datalen != length + fraggap) {
1291                                 /*
1292                                  * this is not the last fragment, the trailer
1293                                  * space is regarded as data space.
1294                                  */
1295                                 datalen += rt->dst.trailer_len;
1296                         }
1297
1298                         alloclen += rt->dst.trailer_len;
1299                         fraglen = datalen + fragheaderlen;
1300
1301                         /*
1302                          * We just reserve space for fragment header.
1303                          * Note: this may be overallocation if the message
1304                          * (without MSG_MORE) fits into the MTU.
1305                          */
1306                         alloclen += sizeof(struct frag_hdr);
1307
1308                         if (transhdrlen) {
1309                                 skb = sock_alloc_send_skb(sk,
1310                                                 alloclen + hh_len,
1311                                                 (flags & MSG_DONTWAIT), &err);
1312                         } else {
1313                                 skb = NULL;
1314                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1315                                     2 * sk->sk_sndbuf)
1316                                         skb = sock_wmalloc(sk,
1317                                                            alloclen + hh_len, 1,
1318                                                            sk->sk_allocation);
1319                                 if (unlikely(skb == NULL))
1320                                         err = -ENOBUFS;
1321                                 else {
1322                                         /* Only the initial fragment
1323                                          * is time stamped.
1324                                          */
1325                                         tx_flags = 0;
1326                                 }
1327                         }
1328                         if (skb == NULL)
1329                                 goto error;
1330                         /*
1331                          *      Fill in the control structures
1332                          */
1333                         skb->protocol = htons(ETH_P_IPV6);
1334                         skb->ip_summed = CHECKSUM_NONE;
1335                         skb->csum = 0;
1336                         /* reserve for fragmentation and ipsec header */
1337                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1338                                     dst_exthdrlen);
1339
1340                         if (sk->sk_type == SOCK_DGRAM)
1341                                 skb_shinfo(skb)->tx_flags = tx_flags;
1342
1343                         /*
1344                          *      Find where to start putting bytes
1345                          */
1346                         data = skb_put(skb, fraglen);
1347                         skb_set_network_header(skb, exthdrlen);
1348                         data += fragheaderlen;
1349                         skb->transport_header = (skb->network_header +
1350                                                  fragheaderlen);
1351                         if (fraggap) {
1352                                 skb->csum = skb_copy_and_csum_bits(
1353                                         skb_prev, maxfraglen,
1354                                         data + transhdrlen, fraggap, 0);
1355                                 skb_prev->csum = csum_sub(skb_prev->csum,
1356                                                           skb->csum);
1357                                 data += fraggap;
1358                                 pskb_trim_unique(skb_prev, maxfraglen);
1359                         }
1360                         copy = datalen - transhdrlen - fraggap;
1361
1362                         if (copy < 0) {
1363                                 err = -EINVAL;
1364                                 kfree_skb(skb);
1365                                 goto error;
1366                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1367                                 err = -EFAULT;
1368                                 kfree_skb(skb);
1369                                 goto error;
1370                         }
1371
1372                         offset += copy;
1373                         length -= datalen - fraggap;
1374                         transhdrlen = 0;
1375                         exthdrlen = 0;
1376                         dst_exthdrlen = 0;
1377
1378                         /*
1379                          * Put the packet on the pending queue
1380                          */
1381                         __skb_queue_tail(&sk->sk_write_queue, skb);
1382                         continue;
1383                 }
1384
1385                 if (copy > length)
1386                         copy = length;
1387
1388                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1389                         unsigned int off;
1390
1391                         off = skb->len;
1392                         if (getfrag(from, skb_put(skb, copy),
1393                                                 offset, copy, off, skb) < 0) {
1394                                 __skb_trim(skb, off);
1395                                 err = -EFAULT;
1396                                 goto error;
1397                         }
1398                 } else {
1399                         int i = skb_shinfo(skb)->nr_frags;
1400                         struct page_frag *pfrag = sk_page_frag(sk);
1401
1402                         err = -ENOMEM;
1403                         if (!sk_page_frag_refill(sk, pfrag))
1404                                 goto error;
1405
1406                         if (!skb_can_coalesce(skb, i, pfrag->page,
1407                                               pfrag->offset)) {
1408                                 err = -EMSGSIZE;
1409                                 if (i == MAX_SKB_FRAGS)
1410                                         goto error;
1411
1412                                 __skb_fill_page_desc(skb, i, pfrag->page,
1413                                                      pfrag->offset, 0);
1414                                 skb_shinfo(skb)->nr_frags = ++i;
1415                                 get_page(pfrag->page);
1416                         }
1417                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1418                         if (getfrag(from,
1419                                     page_address(pfrag->page) + pfrag->offset,
1420                                     offset, copy, skb->len, skb) < 0)
1421                                 goto error_efault;
1422
1423                         pfrag->offset += copy;
1424                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1425                         skb->len += copy;
1426                         skb->data_len += copy;
1427                         skb->truesize += copy;
1428                         atomic_add(copy, &sk->sk_wmem_alloc);
1429                 }
1430                 offset += copy;
1431                 length -= copy;
1432         }
1433
1434         return 0;
1435
1436 error_efault:
1437         err = -EFAULT;
1438 error:
1439         cork->length -= length;
1440         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1441         return err;
1442 }
1443 EXPORT_SYMBOL_GPL(ip6_append_data);
1444
1445 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1446 {
1447         if (np->cork.opt) {
1448                 kfree(np->cork.opt->dst0opt);
1449                 kfree(np->cork.opt->dst1opt);
1450                 kfree(np->cork.opt->hopopt);
1451                 kfree(np->cork.opt->srcrt);
1452                 kfree(np->cork.opt);
1453                 np->cork.opt = NULL;
1454         }
1455
1456         if (inet->cork.base.dst) {
1457                 dst_release(inet->cork.base.dst);
1458                 inet->cork.base.dst = NULL;
1459                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1460         }
1461         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1462 }
1463
1464 int ip6_push_pending_frames(struct sock *sk)
1465 {
1466         struct sk_buff *skb, *tmp_skb;
1467         struct sk_buff **tail_skb;
1468         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1469         struct inet_sock *inet = inet_sk(sk);
1470         struct ipv6_pinfo *np = inet6_sk(sk);
1471         struct net *net = sock_net(sk);
1472         struct ipv6hdr *hdr;
1473         struct ipv6_txoptions *opt = np->cork.opt;
1474         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1475         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1476         unsigned char proto = fl6->flowi6_proto;
1477         int err = 0;
1478
1479         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1480                 goto out;
1481         tail_skb = &(skb_shinfo(skb)->frag_list);
1482
1483         /* move skb->data to ip header from ext header */
1484         if (skb->data < skb_network_header(skb))
1485                 __skb_pull(skb, skb_network_offset(skb));
1486         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1487                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1488                 *tail_skb = tmp_skb;
1489                 tail_skb = &(tmp_skb->next);
1490                 skb->len += tmp_skb->len;
1491                 skb->data_len += tmp_skb->len;
1492                 skb->truesize += tmp_skb->truesize;
1493                 tmp_skb->destructor = NULL;
1494                 tmp_skb->sk = NULL;
1495         }
1496
1497         /* Allow local fragmentation. */
1498         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1499                 skb->local_df = 1;
1500
1501         *final_dst = fl6->daddr;
1502         __skb_pull(skb, skb_network_header_len(skb));
1503         if (opt && opt->opt_flen)
1504                 ipv6_push_frag_opts(skb, opt, &proto);
1505         if (opt && opt->opt_nflen)
1506                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1507
1508         skb_push(skb, sizeof(struct ipv6hdr));
1509         skb_reset_network_header(skb);
1510         hdr = ipv6_hdr(skb);
1511
1512         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1513         hdr->hop_limit = np->cork.hop_limit;
1514         hdr->nexthdr = proto;
1515         hdr->saddr = fl6->saddr;
1516         hdr->daddr = *final_dst;
1517
1518         skb->priority = sk->sk_priority;
1519         skb->mark = sk->sk_mark;
1520
1521         skb_dst_set(skb, dst_clone(&rt->dst));
1522         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1523         if (proto == IPPROTO_ICMPV6) {
1524                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1525
1526                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1527                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1528         }
1529
1530         err = ip6_local_out(skb);
1531         if (err) {
1532                 if (err > 0)
1533                         err = net_xmit_errno(err);
1534                 if (err)
1535                         goto error;
1536         }
1537
1538 out:
1539         ip6_cork_release(inet, np);
1540         return err;
1541 error:
1542         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1543         goto out;
1544 }
1545 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1546
1547 void ip6_flush_pending_frames(struct sock *sk)
1548 {
1549         struct sk_buff *skb;
1550
1551         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1552                 if (skb_dst(skb))
1553                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1554                                       IPSTATS_MIB_OUTDISCARDS);
1555                 kfree_skb(skb);
1556         }
1557
1558         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1559 }
1560 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);