Merge tag 'xtensa-20180225' of git://github.com/jcmvbkbc/linux-xtensa
[sfrench/cifs-2.6.git] / net / netfilter / nf_flow_table.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <net/netfilter/nf_tables.h>
8 #include <net/netfilter/nf_flow_table.h>
9 #include <net/netfilter/nf_conntrack.h>
10 #include <net/netfilter/nf_conntrack_core.h>
11 #include <net/netfilter/nf_conntrack_tuple.h>
12
13 struct flow_offload_entry {
14         struct flow_offload     flow;
15         struct nf_conn          *ct;
16         struct rcu_head         rcu_head;
17 };
18
19 struct flow_offload *
20 flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
21 {
22         struct flow_offload_entry *entry;
23         struct flow_offload *flow;
24
25         if (unlikely(nf_ct_is_dying(ct) ||
26             !atomic_inc_not_zero(&ct->ct_general.use)))
27                 return NULL;
28
29         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
30         if (!entry)
31                 goto err_ct_refcnt;
32
33         flow = &entry->flow;
34
35         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
36                 goto err_dst_cache_original;
37
38         if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
39                 goto err_dst_cache_reply;
40
41         entry->ct = ct;
42
43         switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
44         case NFPROTO_IPV4:
45                 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
46                         ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
47                 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
48                         ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
49                 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
50                         ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
51                 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
52                         ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
53                 break;
54         case NFPROTO_IPV6:
55                 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
56                         ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
57                 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
58                         ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
59                 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
60                         ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
61                 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
62                         ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
63                 break;
64         }
65
66         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
67                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
68         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
69                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
70         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
71                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
72         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
73                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
74
75         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
76                   route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
77         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
78                   route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
79
80         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
81                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
82         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
83                 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
84         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
85                 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
86         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
87                 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
88
89         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
90                                                 FLOW_OFFLOAD_DIR_ORIGINAL;
91         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
92                                                 FLOW_OFFLOAD_DIR_REPLY;
93
94         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
95                 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
96         flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
97                 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
98         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
99                 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
100         flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
101                 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
102
103         if (ct->status & IPS_SRC_NAT)
104                 flow->flags |= FLOW_OFFLOAD_SNAT;
105         else if (ct->status & IPS_DST_NAT)
106                 flow->flags |= FLOW_OFFLOAD_DNAT;
107
108         return flow;
109
110 err_dst_cache_reply:
111         dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
112 err_dst_cache_original:
113         kfree(entry);
114 err_ct_refcnt:
115         nf_ct_put(ct);
116
117         return NULL;
118 }
119 EXPORT_SYMBOL_GPL(flow_offload_alloc);
120
121 void flow_offload_free(struct flow_offload *flow)
122 {
123         struct flow_offload_entry *e;
124
125         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
126         dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
127         e = container_of(flow, struct flow_offload_entry, flow);
128         nf_ct_delete(e->ct, 0, 0);
129         nf_ct_put(e->ct);
130         kfree_rcu(e, rcu_head);
131 }
132 EXPORT_SYMBOL_GPL(flow_offload_free);
133
134 void flow_offload_dead(struct flow_offload *flow)
135 {
136         flow->flags |= FLOW_OFFLOAD_DYING;
137 }
138 EXPORT_SYMBOL_GPL(flow_offload_dead);
139
140 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
141 {
142         flow->timeout = (u32)jiffies;
143
144         rhashtable_insert_fast(&flow_table->rhashtable,
145                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
146                                *flow_table->type->params);
147         rhashtable_insert_fast(&flow_table->rhashtable,
148                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
149                                *flow_table->type->params);
150         return 0;
151 }
152 EXPORT_SYMBOL_GPL(flow_offload_add);
153
154 static void flow_offload_del(struct nf_flowtable *flow_table,
155                              struct flow_offload *flow)
156 {
157         rhashtable_remove_fast(&flow_table->rhashtable,
158                                &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
159                                *flow_table->type->params);
160         rhashtable_remove_fast(&flow_table->rhashtable,
161                                &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
162                                *flow_table->type->params);
163
164         flow_offload_free(flow);
165 }
166
167 struct flow_offload_tuple_rhash *
168 flow_offload_lookup(struct nf_flowtable *flow_table,
169                     struct flow_offload_tuple *tuple)
170 {
171         return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
172                                       *flow_table->type->params);
173 }
174 EXPORT_SYMBOL_GPL(flow_offload_lookup);
175
176 int nf_flow_table_iterate(struct nf_flowtable *flow_table,
177                           void (*iter)(struct flow_offload *flow, void *data),
178                           void *data)
179 {
180         struct flow_offload_tuple_rhash *tuplehash;
181         struct rhashtable_iter hti;
182         struct flow_offload *flow;
183         int err;
184
185         err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
186         if (err)
187                 return err;
188
189         rhashtable_walk_start(&hti);
190
191         while ((tuplehash = rhashtable_walk_next(&hti))) {
192                 if (IS_ERR(tuplehash)) {
193                         err = PTR_ERR(tuplehash);
194                         if (err != -EAGAIN)
195                                 goto out;
196
197                         continue;
198                 }
199                 if (tuplehash->tuple.dir)
200                         continue;
201
202                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
203
204                 iter(flow, data);
205         }
206 out:
207         rhashtable_walk_stop(&hti);
208         rhashtable_walk_exit(&hti);
209
210         return err;
211 }
212 EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
213
214 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
215 {
216         return (__s32)(flow->timeout - (u32)jiffies) <= 0;
217 }
218
219 static inline bool nf_flow_is_dying(const struct flow_offload *flow)
220 {
221         return flow->flags & FLOW_OFFLOAD_DYING;
222 }
223
224 static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
225 {
226         struct flow_offload_tuple_rhash *tuplehash;
227         struct rhashtable_iter hti;
228         struct flow_offload *flow;
229         int err;
230
231         err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
232         if (err)
233                 return 0;
234
235         rhashtable_walk_start(&hti);
236
237         while ((tuplehash = rhashtable_walk_next(&hti))) {
238                 if (IS_ERR(tuplehash)) {
239                         err = PTR_ERR(tuplehash);
240                         if (err != -EAGAIN)
241                                 goto out;
242
243                         continue;
244                 }
245                 if (tuplehash->tuple.dir)
246                         continue;
247
248                 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
249
250                 if (nf_flow_has_expired(flow) ||
251                     nf_flow_is_dying(flow))
252                         flow_offload_del(flow_table, flow);
253         }
254 out:
255         rhashtable_walk_stop(&hti);
256         rhashtable_walk_exit(&hti);
257
258         return 1;
259 }
260
261 void nf_flow_offload_work_gc(struct work_struct *work)
262 {
263         struct nf_flowtable *flow_table;
264
265         flow_table = container_of(work, struct nf_flowtable, gc_work.work);
266         nf_flow_offload_gc_step(flow_table);
267         queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
268 }
269 EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
270
271 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
272 {
273         const struct flow_offload_tuple *tuple = data;
274
275         return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
276 }
277
278 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
279 {
280         const struct flow_offload_tuple_rhash *tuplehash = data;
281
282         return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
283 }
284
285 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
286                                         const void *ptr)
287 {
288         const struct flow_offload_tuple *tuple = arg->key;
289         const struct flow_offload_tuple_rhash *x = ptr;
290
291         if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
292                 return 1;
293
294         return 0;
295 }
296
297 const struct rhashtable_params nf_flow_offload_rhash_params = {
298         .head_offset            = offsetof(struct flow_offload_tuple_rhash, node),
299         .hashfn                 = flow_offload_hash,
300         .obj_hashfn             = flow_offload_hash_obj,
301         .obj_cmpfn              = flow_offload_hash_cmp,
302         .automatic_shrinking    = true,
303 };
304 EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
305
306 static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
307                                 __be16 port, __be16 new_port)
308 {
309         struct tcphdr *tcph;
310
311         if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
312             skb_try_make_writable(skb, thoff + sizeof(*tcph)))
313                 return -1;
314
315         tcph = (void *)(skb_network_header(skb) + thoff);
316         inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
317
318         return 0;
319 }
320
321 static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
322                                 __be16 port, __be16 new_port)
323 {
324         struct udphdr *udph;
325
326         if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
327             skb_try_make_writable(skb, thoff + sizeof(*udph)))
328                 return -1;
329
330         udph = (void *)(skb_network_header(skb) + thoff);
331         if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
332                 inet_proto_csum_replace2(&udph->check, skb, port,
333                                          new_port, true);
334                 if (!udph->check)
335                         udph->check = CSUM_MANGLED_0;
336         }
337
338         return 0;
339 }
340
341 static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
342                             u8 protocol, __be16 port, __be16 new_port)
343 {
344         switch (protocol) {
345         case IPPROTO_TCP:
346                 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
347                         return NF_DROP;
348                 break;
349         case IPPROTO_UDP:
350                 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
351                         return NF_DROP;
352                 break;
353         }
354
355         return 0;
356 }
357
358 int nf_flow_snat_port(const struct flow_offload *flow,
359                       struct sk_buff *skb, unsigned int thoff,
360                       u8 protocol, enum flow_offload_tuple_dir dir)
361 {
362         struct flow_ports *hdr;
363         __be16 port, new_port;
364
365         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
366             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
367                 return -1;
368
369         hdr = (void *)(skb_network_header(skb) + thoff);
370
371         switch (dir) {
372         case FLOW_OFFLOAD_DIR_ORIGINAL:
373                 port = hdr->source;
374                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
375                 hdr->source = new_port;
376                 break;
377         case FLOW_OFFLOAD_DIR_REPLY:
378                 port = hdr->dest;
379                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
380                 hdr->dest = new_port;
381                 break;
382         default:
383                 return -1;
384         }
385
386         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
387 }
388 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
389
390 int nf_flow_dnat_port(const struct flow_offload *flow,
391                       struct sk_buff *skb, unsigned int thoff,
392                       u8 protocol, enum flow_offload_tuple_dir dir)
393 {
394         struct flow_ports *hdr;
395         __be16 port, new_port;
396
397         if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
398             skb_try_make_writable(skb, thoff + sizeof(*hdr)))
399                 return -1;
400
401         hdr = (void *)(skb_network_header(skb) + thoff);
402
403         switch (dir) {
404         case FLOW_OFFLOAD_DIR_ORIGINAL:
405                 port = hdr->dest;
406                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
407                 hdr->dest = new_port;
408                 break;
409         case FLOW_OFFLOAD_DIR_REPLY:
410                 port = hdr->source;
411                 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
412                 hdr->source = new_port;
413                 break;
414         default:
415                 return -1;
416         }
417
418         return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
419 }
420 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
421
422 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
423 {
424         struct net_device *dev = data;
425
426         if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
427                 return;
428
429         flow_offload_dead(flow);
430 }
431
432 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
433                                           void *data)
434 {
435         nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
436         flush_delayed_work(&flowtable->gc_work);
437 }
438
439 void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
440 {
441         nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
442 }
443 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
444
445 void nf_flow_table_free(struct nf_flowtable *flow_table)
446 {
447         nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
448         WARN_ON(!nf_flow_offload_gc_step(flow_table));
449 }
450 EXPORT_SYMBOL_GPL(nf_flow_table_free);
451
452 MODULE_LICENSE("GPL");
453 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");