Documentation: embargoed-hardware-issues.rst: Add myself for Power
[sfrench/cifs-2.6.git] / net / bridge / netfilter / nf_conntrack_bridge.c
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21
22 #include "../br_private.h"
23
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25  * has been linearized or cloned.
26  */
27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28                              struct sk_buff *skb,
29                              struct nf_bridge_frag_data *data,
30                              int (*output)(struct net *, struct sock *sk,
31                                            const struct nf_bridge_frag_data *data,
32                                            struct sk_buff *))
33 {
34         int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35         bool mono_delivery_time = skb->mono_delivery_time;
36         unsigned int hlen, ll_rs, mtu;
37         ktime_t tstamp = skb->tstamp;
38         struct ip_frag_state state;
39         struct iphdr *iph;
40         int err = 0;
41
42         /* for offloaded checksums cleanup checksum before fragmentation */
43         if (skb->ip_summed == CHECKSUM_PARTIAL &&
44             (err = skb_checksum_help(skb)))
45                 goto blackhole;
46
47         iph = ip_hdr(skb);
48
49         /*
50          *      Setup starting values
51          */
52
53         hlen = iph->ihl * 4;
54         frag_max_size -= hlen;
55         ll_rs = LL_RESERVED_SPACE(skb->dev);
56         mtu = skb->dev->mtu;
57
58         if (skb_has_frag_list(skb)) {
59                 unsigned int first_len = skb_pagelen(skb);
60                 struct ip_fraglist_iter iter;
61                 struct sk_buff *frag;
62
63                 if (first_len - hlen > mtu ||
64                     skb_headroom(skb) < ll_rs)
65                         goto blackhole;
66
67                 if (skb_cloned(skb))
68                         goto slow_path;
69
70                 skb_walk_frags(skb, frag) {
71                         if (frag->len > mtu ||
72                             skb_headroom(frag) < hlen + ll_rs)
73                                 goto blackhole;
74
75                         if (skb_shared(frag))
76                                 goto slow_path;
77                 }
78
79                 ip_fraglist_init(skb, iph, hlen, &iter);
80
81                 for (;;) {
82                         if (iter.frag)
83                                 ip_fraglist_prepare(skb, &iter);
84
85                         skb_set_delivery_time(skb, tstamp, mono_delivery_time);
86                         err = output(net, sk, data, skb);
87                         if (err || !iter.frag)
88                                 break;
89
90                         skb = ip_fraglist_next(&iter);
91                 }
92
93                 if (!err)
94                         return 0;
95
96                 kfree_skb_list(iter.frag);
97
98                 return err;
99         }
100 slow_path:
101         /* This is a linearized skbuff, the original geometry is lost for us.
102          * This may also be a clone skbuff, we could preserve the geometry for
103          * the copies but probably not worth the effort.
104          */
105         ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
106
107         while (state.left > 0) {
108                 struct sk_buff *skb2;
109
110                 skb2 = ip_frag_next(skb, &state);
111                 if (IS_ERR(skb2)) {
112                         err = PTR_ERR(skb2);
113                         goto blackhole;
114                 }
115
116                 skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
117                 err = output(net, sk, data, skb2);
118                 if (err)
119                         goto blackhole;
120         }
121         consume_skb(skb);
122         return err;
123
124 blackhole:
125         kfree_skb(skb);
126         return 0;
127 }
128
129 /* ip_defrag() expects IPCB() in place. */
130 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
131                            size_t inet_skb_parm_size)
132 {
133         memcpy(cb, skb->cb, sizeof(*cb));
134         memset(skb->cb, 0, inet_skb_parm_size);
135 }
136
137 static void br_skb_cb_restore(struct sk_buff *skb,
138                               const struct br_input_skb_cb *cb,
139                               u16 fragsz)
140 {
141         memcpy(skb->cb, cb, sizeof(*cb));
142         BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
143 }
144
145 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
146                                      const struct nf_hook_state *state)
147 {
148         u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
149         enum ip_conntrack_info ctinfo;
150         struct br_input_skb_cb cb;
151         const struct nf_conn *ct;
152         int err;
153
154         if (!ip_is_fragment(ip_hdr(skb)))
155                 return NF_ACCEPT;
156
157         ct = nf_ct_get(skb, &ctinfo);
158         if (ct)
159                 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
160
161         br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
162         local_bh_disable();
163         err = ip_defrag(state->net, skb,
164                         IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
165         local_bh_enable();
166         if (!err) {
167                 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
168                 skb->ignore_df = 1;
169                 return NF_ACCEPT;
170         }
171
172         return NF_STOLEN;
173 }
174
175 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
176                                      const struct nf_hook_state *state)
177 {
178 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
179         u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
180         enum ip_conntrack_info ctinfo;
181         struct br_input_skb_cb cb;
182         const struct nf_conn *ct;
183         int err;
184
185         ct = nf_ct_get(skb, &ctinfo);
186         if (ct)
187                 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
188
189         br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
190
191         err = nf_ct_frag6_gather(state->net, skb,
192                                  IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
193         /* queued */
194         if (err == -EINPROGRESS)
195                 return NF_STOLEN;
196
197         br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
198         return err == 0 ? NF_ACCEPT : NF_DROP;
199 #else
200         return NF_ACCEPT;
201 #endif
202 }
203
204 static int nf_ct_br_ip_check(const struct sk_buff *skb)
205 {
206         const struct iphdr *iph;
207         int nhoff, len;
208
209         nhoff = skb_network_offset(skb);
210         iph = ip_hdr(skb);
211         if (iph->ihl < 5 ||
212             iph->version != 4)
213                 return -1;
214
215         len = skb_ip_totlen(skb);
216         if (skb->len < nhoff + len ||
217             len < (iph->ihl * 4))
218                 return -1;
219
220         return 0;
221 }
222
223 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
224 {
225         const struct ipv6hdr *hdr;
226         int nhoff, len;
227
228         nhoff = skb_network_offset(skb);
229         hdr = ipv6_hdr(skb);
230         if (hdr->version != 6)
231                 return -1;
232
233         len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
234         if (skb->len < len)
235                 return -1;
236
237         return 0;
238 }
239
240 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
241                                      const struct nf_hook_state *state)
242 {
243         struct nf_hook_state bridge_state = *state;
244         enum ip_conntrack_info ctinfo;
245         struct nf_conn *ct;
246         u32 len;
247         int ret;
248
249         ct = nf_ct_get(skb, &ctinfo);
250         if ((ct && !nf_ct_is_template(ct)) ||
251             ctinfo == IP_CT_UNTRACKED)
252                 return NF_ACCEPT;
253
254         switch (skb->protocol) {
255         case htons(ETH_P_IP):
256                 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
257                         return NF_ACCEPT;
258
259                 len = skb_ip_totlen(skb);
260                 if (pskb_trim_rcsum(skb, len))
261                         return NF_ACCEPT;
262
263                 if (nf_ct_br_ip_check(skb))
264                         return NF_ACCEPT;
265
266                 bridge_state.pf = NFPROTO_IPV4;
267                 ret = nf_ct_br_defrag4(skb, &bridge_state);
268                 break;
269         case htons(ETH_P_IPV6):
270                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
271                         return NF_ACCEPT;
272
273                 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
274                 if (pskb_trim_rcsum(skb, len))
275                         return NF_ACCEPT;
276
277                 if (nf_ct_br_ipv6_check(skb))
278                         return NF_ACCEPT;
279
280                 bridge_state.pf = NFPROTO_IPV6;
281                 ret = nf_ct_br_defrag6(skb, &bridge_state);
282                 break;
283         default:
284                 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
285                 return NF_ACCEPT;
286         }
287
288         if (ret != NF_ACCEPT)
289                 return ret;
290
291         return nf_conntrack_in(skb, &bridge_state);
292 }
293
294 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
295                                     const struct nf_hook_state *state)
296 {
297         enum ip_conntrack_info ctinfo;
298         struct nf_conn *ct;
299
300         if (skb->pkt_type == PACKET_HOST)
301                 return NF_ACCEPT;
302
303         /* nf_conntrack_confirm() cannot handle concurrent clones,
304          * this happens for broad/multicast frames with e.g. macvlan on top
305          * of the bridge device.
306          */
307         ct = nf_ct_get(skb, &ctinfo);
308         if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
309                 return NF_ACCEPT;
310
311         /* let inet prerouting call conntrack again */
312         skb->_nfct = 0;
313         nf_ct_put(ct);
314
315         return NF_ACCEPT;
316 }
317
318 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
319                                    struct nf_bridge_frag_data *data)
320 {
321         if (skb_vlan_tag_present(skb)) {
322                 data->vlan_present = true;
323                 data->vlan_tci = skb->vlan_tci;
324                 data->vlan_proto = skb->vlan_proto;
325         } else {
326                 data->vlan_present = false;
327         }
328         skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
329 }
330
331 static unsigned int
332 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
333                     int (*output)(struct net *, struct sock *sk,
334                                   const struct nf_bridge_frag_data *data,
335                                   struct sk_buff *))
336 {
337         struct nf_bridge_frag_data data;
338
339         if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
340                 return NF_ACCEPT;
341
342         nf_ct_bridge_frag_save(skb, &data);
343         switch (skb->protocol) {
344         case htons(ETH_P_IP):
345                 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
346                 break;
347         case htons(ETH_P_IPV6):
348                 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
349                 break;
350         default:
351                 WARN_ON_ONCE(1);
352                 return NF_DROP;
353         }
354
355         return NF_STOLEN;
356 }
357
358 /* Actually only slow path refragmentation needs this. */
359 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
360                                      const struct nf_bridge_frag_data *data)
361 {
362         int err;
363
364         err = skb_cow_head(skb, ETH_HLEN);
365         if (err) {
366                 kfree_skb(skb);
367                 return -ENOMEM;
368         }
369         if (data->vlan_present)
370                 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
371         else if (skb_vlan_tag_present(skb))
372                 __vlan_hwaccel_clear_tag(skb);
373
374         skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
375         skb_reset_mac_header(skb);
376
377         return 0;
378 }
379
380 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
381                                     const struct nf_bridge_frag_data *data,
382                                     struct sk_buff *skb)
383 {
384         int err;
385
386         err = nf_ct_bridge_frag_restore(skb, data);
387         if (err < 0)
388                 return err;
389
390         return br_dev_queue_push_xmit(net, sk, skb);
391 }
392
393 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
394                                       const struct nf_hook_state *state)
395 {
396         int ret;
397
398         ret = nf_confirm(priv, skb, state);
399         if (ret != NF_ACCEPT)
400                 return ret;
401
402         return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
403 }
404
405 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
406         {
407                 .hook           = nf_ct_bridge_pre,
408                 .pf             = NFPROTO_BRIDGE,
409                 .hooknum        = NF_BR_PRE_ROUTING,
410                 .priority       = NF_IP_PRI_CONNTRACK,
411         },
412         {
413                 .hook           = nf_ct_bridge_in,
414                 .pf             = NFPROTO_BRIDGE,
415                 .hooknum        = NF_BR_LOCAL_IN,
416                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
417         },
418         {
419                 .hook           = nf_ct_bridge_post,
420                 .pf             = NFPROTO_BRIDGE,
421                 .hooknum        = NF_BR_POST_ROUTING,
422                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
423         },
424 };
425
426 static struct nf_ct_bridge_info bridge_info = {
427         .ops            = nf_ct_bridge_hook_ops,
428         .ops_size       = ARRAY_SIZE(nf_ct_bridge_hook_ops),
429         .me             = THIS_MODULE,
430 };
431
432 static int __init nf_conntrack_l3proto_bridge_init(void)
433 {
434         nf_ct_bridge_register(&bridge_info);
435
436         return 0;
437 }
438
439 static void __exit nf_conntrack_l3proto_bridge_fini(void)
440 {
441         nf_ct_bridge_unregister(&bridge_info);
442 }
443
444 module_init(nf_conntrack_l3proto_bridge_init);
445 module_exit(nf_conntrack_l3proto_bridge_fini);
446
447 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
448 MODULE_LICENSE("GPL");
449 MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking");