Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / net / sched / act_csum.c
1 /*
2  * Checksum updating actions
3  *
4  * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  */
12
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 #include <linux/spinlock.h>
18
19 #include <linux/netlink.h>
20 #include <net/netlink.h>
21 #include <linux/rtnetlink.h>
22
23 #include <linux/skbuff.h>
24
25 #include <net/ip.h>
26 #include <net/ipv6.h>
27 #include <net/icmp.h>
28 #include <linux/icmpv6.h>
29 #include <linux/igmp.h>
30 #include <net/tcp.h>
31 #include <net/udp.h>
32 #include <net/ip6_checksum.h>
33
34 #include <net/act_api.h>
35
36 #include <linux/tc_act/tc_csum.h>
37 #include <net/tc_act/tc_csum.h>
38
39 #define CSUM_TAB_MASK 15
40
41 static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
42         [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
43 };
44
45 static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
46                          struct tc_action *a, int ovr, int bind)
47 {
48         struct nlattr *tb[TCA_CSUM_MAX + 1];
49         struct tc_csum *parm;
50         struct tcf_csum *p;
51         int ret = 0, err;
52
53         if (nla == NULL)
54                 return -EINVAL;
55
56         err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
57         if (err < 0)
58                 return err;
59
60         if (tb[TCA_CSUM_PARMS] == NULL)
61                 return -EINVAL;
62         parm = nla_data(tb[TCA_CSUM_PARMS]);
63
64         if (!tcf_hash_check(parm->index, a, bind)) {
65                 ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
66                 if (ret)
67                         return ret;
68                 ret = ACT_P_CREATED;
69         } else {
70                 if (bind)/* dont override defaults */
71                         return 0;
72                 tcf_hash_release(a, bind);
73                 if (!ovr)
74                         return -EEXIST;
75         }
76
77         p = to_tcf_csum(a);
78         spin_lock_bh(&p->tcf_lock);
79         p->tcf_action = parm->action;
80         p->update_flags = parm->update_flags;
81         spin_unlock_bh(&p->tcf_lock);
82
83         if (ret == ACT_P_CREATED)
84                 tcf_hash_insert(a);
85
86         return ret;
87 }
88
89 /**
90  * tcf_csum_skb_nextlayer - Get next layer pointer
91  * @skb: sk_buff to use
92  * @ihl: previous summed headers length
93  * @ipl: complete packet length
94  * @jhl: next header length
95  *
96  * Check the expected next layer availability in the specified sk_buff.
97  * Return the next layer pointer if pass, NULL otherwise.
98  */
99 static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
100                                     unsigned int ihl, unsigned int ipl,
101                                     unsigned int jhl)
102 {
103         int ntkoff = skb_network_offset(skb);
104         int hl = ihl + jhl;
105
106         if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
107             (skb_cloned(skb) &&
108              !skb_clone_writable(skb, hl + ntkoff) &&
109              pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
110                 return NULL;
111         else
112                 return (void *)(skb_network_header(skb) + ihl);
113 }
114
115 static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
116                               unsigned int ihl, unsigned int ipl)
117 {
118         struct icmphdr *icmph;
119
120         icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
121         if (icmph == NULL)
122                 return 0;
123
124         icmph->checksum = 0;
125         skb->csum = csum_partial(icmph, ipl - ihl, 0);
126         icmph->checksum = csum_fold(skb->csum);
127
128         skb->ip_summed = CHECKSUM_NONE;
129
130         return 1;
131 }
132
133 static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
134                               unsigned int ihl, unsigned int ipl)
135 {
136         struct igmphdr *igmph;
137
138         igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
139         if (igmph == NULL)
140                 return 0;
141
142         igmph->csum = 0;
143         skb->csum = csum_partial(igmph, ipl - ihl, 0);
144         igmph->csum = csum_fold(skb->csum);
145
146         skb->ip_summed = CHECKSUM_NONE;
147
148         return 1;
149 }
150
151 static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
152                               unsigned int ihl, unsigned int ipl)
153 {
154         struct icmp6hdr *icmp6h;
155         const struct ipv6hdr *ip6h;
156
157         icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
158         if (icmp6h == NULL)
159                 return 0;
160
161         ip6h = ipv6_hdr(skb);
162         icmp6h->icmp6_cksum = 0;
163         skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
164         icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
165                                               ipl - ihl, IPPROTO_ICMPV6,
166                                               skb->csum);
167
168         skb->ip_summed = CHECKSUM_NONE;
169
170         return 1;
171 }
172
173 static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
174                              unsigned int ihl, unsigned int ipl)
175 {
176         struct tcphdr *tcph;
177         const struct iphdr *iph;
178
179         tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
180         if (tcph == NULL)
181                 return 0;
182
183         iph = ip_hdr(skb);
184         tcph->check = 0;
185         skb->csum = csum_partial(tcph, ipl - ihl, 0);
186         tcph->check = tcp_v4_check(ipl - ihl,
187                                    iph->saddr, iph->daddr, skb->csum);
188
189         skb->ip_summed = CHECKSUM_NONE;
190
191         return 1;
192 }
193
194 static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
195                              unsigned int ihl, unsigned int ipl)
196 {
197         struct tcphdr *tcph;
198         const struct ipv6hdr *ip6h;
199
200         tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
201         if (tcph == NULL)
202                 return 0;
203
204         ip6h = ipv6_hdr(skb);
205         tcph->check = 0;
206         skb->csum = csum_partial(tcph, ipl - ihl, 0);
207         tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
208                                       ipl - ihl, IPPROTO_TCP,
209                                       skb->csum);
210
211         skb->ip_summed = CHECKSUM_NONE;
212
213         return 1;
214 }
215
216 static int tcf_csum_ipv4_udp(struct sk_buff *skb,
217                              unsigned int ihl, unsigned int ipl, int udplite)
218 {
219         struct udphdr *udph;
220         const struct iphdr *iph;
221         u16 ul;
222
223         /*
224          * Support both UDP and UDPLITE checksum algorithms, Don't use
225          * udph->len to get the real length without any protocol check,
226          * UDPLITE uses udph->len for another thing,
227          * Use iph->tot_len, or just ipl.
228          */
229
230         udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
231         if (udph == NULL)
232                 return 0;
233
234         iph = ip_hdr(skb);
235         ul = ntohs(udph->len);
236
237         if (udplite || udph->check) {
238
239                 udph->check = 0;
240
241                 if (udplite) {
242                         if (ul == 0)
243                                 skb->csum = csum_partial(udph, ipl - ihl, 0);
244                         else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
245                                 skb->csum = csum_partial(udph, ul, 0);
246                         else
247                                 goto ignore_obscure_skb;
248                 } else {
249                         if (ul != ipl - ihl)
250                                 goto ignore_obscure_skb;
251
252                         skb->csum = csum_partial(udph, ul, 0);
253                 }
254
255                 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
256                                                 ul, iph->protocol,
257                                                 skb->csum);
258
259                 if (!udph->check)
260                         udph->check = CSUM_MANGLED_0;
261         }
262
263         skb->ip_summed = CHECKSUM_NONE;
264
265 ignore_obscure_skb:
266         return 1;
267 }
268
269 static int tcf_csum_ipv6_udp(struct sk_buff *skb,
270                              unsigned int ihl, unsigned int ipl, int udplite)
271 {
272         struct udphdr *udph;
273         const struct ipv6hdr *ip6h;
274         u16 ul;
275
276         /*
277          * Support both UDP and UDPLITE checksum algorithms, Don't use
278          * udph->len to get the real length without any protocol check,
279          * UDPLITE uses udph->len for another thing,
280          * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
281          */
282
283         udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
284         if (udph == NULL)
285                 return 0;
286
287         ip6h = ipv6_hdr(skb);
288         ul = ntohs(udph->len);
289
290         udph->check = 0;
291
292         if (udplite) {
293                 if (ul == 0)
294                         skb->csum = csum_partial(udph, ipl - ihl, 0);
295
296                 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
297                         skb->csum = csum_partial(udph, ul, 0);
298
299                 else
300                         goto ignore_obscure_skb;
301         } else {
302                 if (ul != ipl - ihl)
303                         goto ignore_obscure_skb;
304
305                 skb->csum = csum_partial(udph, ul, 0);
306         }
307
308         udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
309                                       udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
310                                       skb->csum);
311
312         if (!udph->check)
313                 udph->check = CSUM_MANGLED_0;
314
315         skb->ip_summed = CHECKSUM_NONE;
316
317 ignore_obscure_skb:
318         return 1;
319 }
320
321 static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
322 {
323         const struct iphdr *iph;
324         int ntkoff;
325
326         ntkoff = skb_network_offset(skb);
327
328         if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
329                 goto fail;
330
331         iph = ip_hdr(skb);
332
333         switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
334         case IPPROTO_ICMP:
335                 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
336                         if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
337                                                 ntohs(iph->tot_len)))
338                                 goto fail;
339                 break;
340         case IPPROTO_IGMP:
341                 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
342                         if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
343                                                 ntohs(iph->tot_len)))
344                                 goto fail;
345                 break;
346         case IPPROTO_TCP:
347                 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
348                         if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
349                                                ntohs(iph->tot_len)))
350                                 goto fail;
351                 break;
352         case IPPROTO_UDP:
353                 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
354                         if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
355                                                ntohs(iph->tot_len), 0))
356                                 goto fail;
357                 break;
358         case IPPROTO_UDPLITE:
359                 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
360                         if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
361                                                ntohs(iph->tot_len), 1))
362                                 goto fail;
363                 break;
364         }
365
366         if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
367                 if (skb_cloned(skb) &&
368                     !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
369                     pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
370                         goto fail;
371
372                 ip_send_check(ip_hdr(skb));
373         }
374
375         return 1;
376
377 fail:
378         return 0;
379 }
380
381 static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
382                                  unsigned int ixhl, unsigned int *pl)
383 {
384         int off, len, optlen;
385         unsigned char *xh = (void *)ip6xh;
386
387         off = sizeof(*ip6xh);
388         len = ixhl - off;
389
390         while (len > 1) {
391                 switch (xh[off]) {
392                 case IPV6_TLV_PAD1:
393                         optlen = 1;
394                         break;
395                 case IPV6_TLV_JUMBO:
396                         optlen = xh[off + 1] + 2;
397                         if (optlen != 6 || len < 6 || (off & 3) != 2)
398                                 /* wrong jumbo option length/alignment */
399                                 return 0;
400                         *pl = ntohl(*(__be32 *)(xh + off + 2));
401                         goto done;
402                 default:
403                         optlen = xh[off + 1] + 2;
404                         if (optlen > len)
405                                 /* ignore obscure options */
406                                 goto done;
407                         break;
408                 }
409                 off += optlen;
410                 len -= optlen;
411         }
412
413 done:
414         return 1;
415 }
416
417 static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
418 {
419         struct ipv6hdr *ip6h;
420         struct ipv6_opt_hdr *ip6xh;
421         unsigned int hl, ixhl;
422         unsigned int pl;
423         int ntkoff;
424         u8 nexthdr;
425
426         ntkoff = skb_network_offset(skb);
427
428         hl = sizeof(*ip6h);
429
430         if (!pskb_may_pull(skb, hl + ntkoff))
431                 goto fail;
432
433         ip6h = ipv6_hdr(skb);
434
435         pl = ntohs(ip6h->payload_len);
436         nexthdr = ip6h->nexthdr;
437
438         do {
439                 switch (nexthdr) {
440                 case NEXTHDR_FRAGMENT:
441                         goto ignore_skb;
442                 case NEXTHDR_ROUTING:
443                 case NEXTHDR_HOP:
444                 case NEXTHDR_DEST:
445                         if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
446                                 goto fail;
447                         ip6xh = (void *)(skb_network_header(skb) + hl);
448                         ixhl = ipv6_optlen(ip6xh);
449                         if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
450                                 goto fail;
451                         ip6xh = (void *)(skb_network_header(skb) + hl);
452                         if ((nexthdr == NEXTHDR_HOP) &&
453                             !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
454                                 goto fail;
455                         nexthdr = ip6xh->nexthdr;
456                         hl += ixhl;
457                         break;
458                 case IPPROTO_ICMPV6:
459                         if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
460                                 if (!tcf_csum_ipv6_icmp(skb,
461                                                         hl, pl + sizeof(*ip6h)))
462                                         goto fail;
463                         goto done;
464                 case IPPROTO_TCP:
465                         if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
466                                 if (!tcf_csum_ipv6_tcp(skb,
467                                                        hl, pl + sizeof(*ip6h)))
468                                         goto fail;
469                         goto done;
470                 case IPPROTO_UDP:
471                         if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
472                                 if (!tcf_csum_ipv6_udp(skb, hl,
473                                                        pl + sizeof(*ip6h), 0))
474                                         goto fail;
475                         goto done;
476                 case IPPROTO_UDPLITE:
477                         if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
478                                 if (!tcf_csum_ipv6_udp(skb, hl,
479                                                        pl + sizeof(*ip6h), 1))
480                                         goto fail;
481                         goto done;
482                 default:
483                         goto ignore_skb;
484                 }
485         } while (pskb_may_pull(skb, hl + 1 + ntkoff));
486
487 done:
488 ignore_skb:
489         return 1;
490
491 fail:
492         return 0;
493 }
494
495 static int tcf_csum(struct sk_buff *skb,
496                     const struct tc_action *a, struct tcf_result *res)
497 {
498         struct tcf_csum *p = a->priv;
499         int action;
500         u32 update_flags;
501
502         spin_lock(&p->tcf_lock);
503         p->tcf_tm.lastuse = jiffies;
504         bstats_update(&p->tcf_bstats, skb);
505         action = p->tcf_action;
506         update_flags = p->update_flags;
507         spin_unlock(&p->tcf_lock);
508
509         if (unlikely(action == TC_ACT_SHOT))
510                 goto drop;
511
512         switch (skb->protocol) {
513         case cpu_to_be16(ETH_P_IP):
514                 if (!tcf_csum_ipv4(skb, update_flags))
515                         goto drop;
516                 break;
517         case cpu_to_be16(ETH_P_IPV6):
518                 if (!tcf_csum_ipv6(skb, update_flags))
519                         goto drop;
520                 break;
521         }
522
523         return action;
524
525 drop:
526         spin_lock(&p->tcf_lock);
527         p->tcf_qstats.drops++;
528         spin_unlock(&p->tcf_lock);
529         return TC_ACT_SHOT;
530 }
531
532 static int tcf_csum_dump(struct sk_buff *skb,
533                          struct tc_action *a, int bind, int ref)
534 {
535         unsigned char *b = skb_tail_pointer(skb);
536         struct tcf_csum *p = a->priv;
537         struct tc_csum opt = {
538                 .update_flags = p->update_flags,
539                 .index   = p->tcf_index,
540                 .action  = p->tcf_action,
541                 .refcnt  = p->tcf_refcnt - ref,
542                 .bindcnt = p->tcf_bindcnt - bind,
543         };
544         struct tcf_t t;
545
546         if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
547                 goto nla_put_failure;
548         t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
549         t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
550         t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
551         if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t))
552                 goto nla_put_failure;
553
554         return skb->len;
555
556 nla_put_failure:
557         nlmsg_trim(skb, b);
558         return -1;
559 }
560
561 static struct tc_action_ops act_csum_ops = {
562         .kind           = "csum",
563         .type           = TCA_ACT_CSUM,
564         .owner          = THIS_MODULE,
565         .act            = tcf_csum,
566         .dump           = tcf_csum_dump,
567         .init           = tcf_csum_init,
568 };
569
570 MODULE_DESCRIPTION("Checksum updating actions");
571 MODULE_LICENSE("GPL");
572
573 static int __init csum_init_module(void)
574 {
575         return tcf_register_action(&act_csum_ops, CSUM_TAB_MASK);
576 }
577
578 static void __exit csum_cleanup_module(void)
579 {
580         tcf_unregister_action(&act_csum_ops);
581 }
582
583 module_init(csum_init_module);
584 module_exit(csum_cleanup_module);